1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCInstrDesc.h"
29 #include "llvm/MC/MCParser/MCAsmLexer.h"
30 #include "llvm/MC/MCParser/MCAsmParser.h"
31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
32 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/AMDGPUMetadata.h"
36 #include "llvm/Support/AMDHSAKernelDescriptor.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/MachineValueType.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/TargetParser.h"
41 
42 using namespace llvm;
43 using namespace llvm::AMDGPU;
44 using namespace llvm::amdhsa;
45 
46 namespace {
47 
48 class AMDGPUAsmParser;
49 
50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
51 
52 //===----------------------------------------------------------------------===//
53 // Operand
54 //===----------------------------------------------------------------------===//
55 
56 class AMDGPUOperand : public MCParsedAsmOperand {
57   enum KindTy {
58     Token,
59     Immediate,
60     Register,
61     Expression
62   } Kind;
63 
64   SMLoc StartLoc, EndLoc;
65   const AMDGPUAsmParser *AsmParser;
66 
67 public:
68   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
69       : Kind(Kind_), AsmParser(AsmParser_) {}
70 
71   using Ptr = std::unique_ptr<AMDGPUOperand>;
72 
73   struct Modifiers {
74     bool Abs = false;
75     bool Neg = false;
76     bool Sext = false;
77 
78     bool hasFPModifiers() const { return Abs || Neg; }
79     bool hasIntModifiers() const { return Sext; }
80     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
81 
82     int64_t getFPModifiersOperand() const {
83       int64_t Operand = 0;
84       Operand |= Abs ? SISrcMods::ABS : 0u;
85       Operand |= Neg ? SISrcMods::NEG : 0u;
86       return Operand;
87     }
88 
89     int64_t getIntModifiersOperand() const {
90       int64_t Operand = 0;
91       Operand |= Sext ? SISrcMods::SEXT : 0u;
92       return Operand;
93     }
94 
95     int64_t getModifiersOperand() const {
96       assert(!(hasFPModifiers() && hasIntModifiers())
97            && "fp and int modifiers should not be used simultaneously");
98       if (hasFPModifiers()) {
99         return getFPModifiersOperand();
100       } else if (hasIntModifiers()) {
101         return getIntModifiersOperand();
102       } else {
103         return 0;
104       }
105     }
106 
107     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
108   };
109 
110   enum ImmTy {
111     ImmTyNone,
112     ImmTyGDS,
113     ImmTyLDS,
114     ImmTyOffen,
115     ImmTyIdxen,
116     ImmTyAddr64,
117     ImmTyOffset,
118     ImmTyInstOffset,
119     ImmTyOffset0,
120     ImmTyOffset1,
121     ImmTyCPol,
122     ImmTySWZ,
123     ImmTyTFE,
124     ImmTyD16,
125     ImmTyClampSI,
126     ImmTyOModSI,
127     ImmTySdwaDstSel,
128     ImmTySdwaSrc0Sel,
129     ImmTySdwaSrc1Sel,
130     ImmTySdwaDstUnused,
131     ImmTyDMask,
132     ImmTyDim,
133     ImmTyUNorm,
134     ImmTyDA,
135     ImmTyR128A16,
136     ImmTyA16,
137     ImmTyLWE,
138     ImmTyExpTgt,
139     ImmTyExpCompr,
140     ImmTyExpVM,
141     ImmTyFORMAT,
142     ImmTyHwreg,
143     ImmTyOff,
144     ImmTySendMsg,
145     ImmTyInterpSlot,
146     ImmTyInterpAttr,
147     ImmTyAttrChan,
148     ImmTyOpSel,
149     ImmTyOpSelHi,
150     ImmTyNegLo,
151     ImmTyNegHi,
152     ImmTyDPP8,
153     ImmTyDppCtrl,
154     ImmTyDppRowMask,
155     ImmTyDppBankMask,
156     ImmTyDppBoundCtrl,
157     ImmTyDppFi,
158     ImmTySwizzle,
159     ImmTyGprIdxMode,
160     ImmTyHigh,
161     ImmTyBLGP,
162     ImmTyCBSZ,
163     ImmTyABID,
164     ImmTyEndpgm,
165     ImmTyWaitVDST,
166     ImmTyWaitEXP,
167   };
168 
169   enum ImmKindTy {
170     ImmKindTyNone,
171     ImmKindTyLiteral,
172     ImmKindTyConst,
173   };
174 
175 private:
176   struct TokOp {
177     const char *Data;
178     unsigned Length;
179   };
180 
181   struct ImmOp {
182     int64_t Val;
183     ImmTy Type;
184     bool IsFPImm;
185     mutable ImmKindTy Kind;
186     Modifiers Mods;
187   };
188 
189   struct RegOp {
190     unsigned RegNo;
191     Modifiers Mods;
192   };
193 
194   union {
195     TokOp Tok;
196     ImmOp Imm;
197     RegOp Reg;
198     const MCExpr *Expr;
199   };
200 
201 public:
202   bool isToken() const override {
203     if (Kind == Token)
204       return true;
205 
206     // When parsing operands, we can't always tell if something was meant to be
207     // a token, like 'gds', or an expression that references a global variable.
208     // In this case, we assume the string is an expression, and if we need to
209     // interpret is a token, then we treat the symbol name as the token.
210     return isSymbolRefExpr();
211   }
212 
213   bool isSymbolRefExpr() const {
214     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
215   }
216 
217   bool isImm() const override {
218     return Kind == Immediate;
219   }
220 
221   void setImmKindNone() const {
222     assert(isImm());
223     Imm.Kind = ImmKindTyNone;
224   }
225 
226   void setImmKindLiteral() const {
227     assert(isImm());
228     Imm.Kind = ImmKindTyLiteral;
229   }
230 
231   void setImmKindConst() const {
232     assert(isImm());
233     Imm.Kind = ImmKindTyConst;
234   }
235 
236   bool IsImmKindLiteral() const {
237     return isImm() && Imm.Kind == ImmKindTyLiteral;
238   }
239 
240   bool isImmKindConst() const {
241     return isImm() && Imm.Kind == ImmKindTyConst;
242   }
243 
244   bool isInlinableImm(MVT type) const;
245   bool isLiteralImm(MVT type) const;
246 
247   bool isRegKind() const {
248     return Kind == Register;
249   }
250 
251   bool isReg() const override {
252     return isRegKind() && !hasModifiers();
253   }
254 
255   bool isRegOrInline(unsigned RCID, MVT type) const {
256     return isRegClass(RCID) || isInlinableImm(type);
257   }
258 
259   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
260     return isRegOrInline(RCID, type) || isLiteralImm(type);
261   }
262 
263   bool isRegOrImmWithInt16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
265   }
266 
267   bool isRegOrImmWithInt32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
269   }
270 
271   bool isRegOrInlineImmWithInt16InputMods() const {
272     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
273   }
274 
275   bool isRegOrInlineImmWithInt32InputMods() const {
276     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
277   }
278 
279   bool isRegOrImmWithInt64InputMods() const {
280     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
281   }
282 
283   bool isRegOrImmWithFP16InputMods() const {
284     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
285   }
286 
287   bool isRegOrImmWithFP32InputMods() const {
288     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
289   }
290 
291   bool isRegOrImmWithFP64InputMods() const {
292     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
293   }
294 
295   bool isRegOrInlineImmWithFP16InputMods() const {
296     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
297   }
298 
299   bool isRegOrInlineImmWithFP32InputMods() const {
300     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
301   }
302 
303 
304   bool isVReg() const {
305     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
306            isRegClass(AMDGPU::VReg_64RegClassID) ||
307            isRegClass(AMDGPU::VReg_96RegClassID) ||
308            isRegClass(AMDGPU::VReg_128RegClassID) ||
309            isRegClass(AMDGPU::VReg_160RegClassID) ||
310            isRegClass(AMDGPU::VReg_192RegClassID) ||
311            isRegClass(AMDGPU::VReg_256RegClassID) ||
312            isRegClass(AMDGPU::VReg_512RegClassID) ||
313            isRegClass(AMDGPU::VReg_1024RegClassID);
314   }
315 
316   bool isVReg32() const {
317     return isRegClass(AMDGPU::VGPR_32RegClassID);
318   }
319 
320   bool isVReg32OrOff() const {
321     return isOff() || isVReg32();
322   }
323 
324   bool isNull() const {
325     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
326   }
327 
328   bool isVRegWithInputMods() const;
329 
330   bool isSDWAOperand(MVT type) const;
331   bool isSDWAFP16Operand() const;
332   bool isSDWAFP32Operand() const;
333   bool isSDWAInt16Operand() const;
334   bool isSDWAInt32Operand() const;
335 
336   bool isImmTy(ImmTy ImmT) const {
337     return isImm() && Imm.Type == ImmT;
338   }
339 
340   bool isImmModifier() const {
341     return isImm() && Imm.Type != ImmTyNone;
342   }
343 
344   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
345   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
346   bool isDMask() const { return isImmTy(ImmTyDMask); }
347   bool isDim() const { return isImmTy(ImmTyDim); }
348   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
349   bool isDA() const { return isImmTy(ImmTyDA); }
350   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
351   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
352   bool isLWE() const { return isImmTy(ImmTyLWE); }
353   bool isOff() const { return isImmTy(ImmTyOff); }
354   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
355   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
356   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
357   bool isOffen() const { return isImmTy(ImmTyOffen); }
358   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
359   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
360   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
361   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
362   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
363 
364   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
365   bool isGDS() const { return isImmTy(ImmTyGDS); }
366   bool isLDS() const { return isImmTy(ImmTyLDS); }
367   bool isCPol() const { return isImmTy(ImmTyCPol); }
368   bool isSWZ() const { return isImmTy(ImmTySWZ); }
369   bool isTFE() const { return isImmTy(ImmTyTFE); }
370   bool isD16() const { return isImmTy(ImmTyD16); }
371   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
372   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
373   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
374   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
375   bool isFI() const { return isImmTy(ImmTyDppFi); }
376   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
377   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
378   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
379   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
380   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
381   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
382   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
383   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
384   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
385   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
386   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
387   bool isHigh() const { return isImmTy(ImmTyHigh); }
388 
389   bool isMod() const {
390     return isClampSI() || isOModSI();
391   }
392 
393   bool isRegOrImm() const {
394     return isReg() || isImm();
395   }
396 
397   bool isRegClass(unsigned RCID) const;
398 
399   bool isInlineValue() const;
400 
401   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
402     return isRegOrInline(RCID, type) && !hasModifiers();
403   }
404 
405   bool isSCSrcB16() const {
406     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
407   }
408 
409   bool isSCSrcV2B16() const {
410     return isSCSrcB16();
411   }
412 
413   bool isSCSrcB32() const {
414     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
415   }
416 
417   bool isSCSrcB64() const {
418     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
419   }
420 
421   bool isBoolReg() const;
422 
423   bool isSCSrcF16() const {
424     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
425   }
426 
427   bool isSCSrcV2F16() const {
428     return isSCSrcF16();
429   }
430 
431   bool isSCSrcF32() const {
432     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
433   }
434 
435   bool isSCSrcF64() const {
436     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
437   }
438 
439   bool isSSrcB32() const {
440     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
441   }
442 
443   bool isSSrcB16() const {
444     return isSCSrcB16() || isLiteralImm(MVT::i16);
445   }
446 
447   bool isSSrcV2B16() const {
448     llvm_unreachable("cannot happen");
449     return isSSrcB16();
450   }
451 
452   bool isSSrcB64() const {
453     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
454     // See isVSrc64().
455     return isSCSrcB64() || isLiteralImm(MVT::i64);
456   }
457 
458   bool isSSrcF32() const {
459     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
460   }
461 
462   bool isSSrcF64() const {
463     return isSCSrcB64() || isLiteralImm(MVT::f64);
464   }
465 
466   bool isSSrcF16() const {
467     return isSCSrcB16() || isLiteralImm(MVT::f16);
468   }
469 
470   bool isSSrcV2F16() const {
471     llvm_unreachable("cannot happen");
472     return isSSrcF16();
473   }
474 
475   bool isSSrcV2FP32() const {
476     llvm_unreachable("cannot happen");
477     return isSSrcF32();
478   }
479 
480   bool isSCSrcV2FP32() const {
481     llvm_unreachable("cannot happen");
482     return isSCSrcF32();
483   }
484 
485   bool isSSrcV2INT32() const {
486     llvm_unreachable("cannot happen");
487     return isSSrcB32();
488   }
489 
490   bool isSCSrcV2INT32() const {
491     llvm_unreachable("cannot happen");
492     return isSCSrcB32();
493   }
494 
495   bool isSSrcOrLdsB32() const {
496     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
497            isLiteralImm(MVT::i32) || isExpr();
498   }
499 
500   bool isVCSrcB32() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
502   }
503 
504   bool isVCSrcB64() const {
505     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
506   }
507 
508   bool isVCSrcB16() const {
509     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
510   }
511 
512   bool isVCSrcV2B16() const {
513     return isVCSrcB16();
514   }
515 
516   bool isVCSrcF32() const {
517     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
518   }
519 
520   bool isVCSrcF64() const {
521     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
522   }
523 
524   bool isVCSrcF16() const {
525     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
526   }
527 
528   bool isVCSrcV2F16() const {
529     return isVCSrcF16();
530   }
531 
532   bool isVSrcB32() const {
533     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
534   }
535 
536   bool isVSrcB64() const {
537     return isVCSrcF64() || isLiteralImm(MVT::i64);
538   }
539 
540   bool isVSrcB16() const {
541     return isVCSrcB16() || isLiteralImm(MVT::i16);
542   }
543 
544   bool isVSrcV2B16() const {
545     return isVSrcB16() || isLiteralImm(MVT::v2i16);
546   }
547 
548   bool isVCSrcV2FP32() const {
549     return isVCSrcF64();
550   }
551 
552   bool isVSrcV2FP32() const {
553     return isVSrcF64() || isLiteralImm(MVT::v2f32);
554   }
555 
556   bool isVCSrcV2INT32() const {
557     return isVCSrcB64();
558   }
559 
560   bool isVSrcV2INT32() const {
561     return isVSrcB64() || isLiteralImm(MVT::v2i32);
562   }
563 
564   bool isVSrcF32() const {
565     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
566   }
567 
568   bool isVSrcF64() const {
569     return isVCSrcF64() || isLiteralImm(MVT::f64);
570   }
571 
572   bool isVSrcF16() const {
573     return isVCSrcF16() || isLiteralImm(MVT::f16);
574   }
575 
576   bool isVSrcV2F16() const {
577     return isVSrcF16() || isLiteralImm(MVT::v2f16);
578   }
579 
580   bool isVISrcB32() const {
581     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
582   }
583 
584   bool isVISrcB16() const {
585     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
586   }
587 
588   bool isVISrcV2B16() const {
589     return isVISrcB16();
590   }
591 
592   bool isVISrcF32() const {
593     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
594   }
595 
596   bool isVISrcF16() const {
597     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
598   }
599 
600   bool isVISrcV2F16() const {
601     return isVISrcF16() || isVISrcB32();
602   }
603 
604   bool isVISrc_64B64() const {
605     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
606   }
607 
608   bool isVISrc_64F64() const {
609     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
610   }
611 
612   bool isVISrc_64V2FP32() const {
613     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
614   }
615 
616   bool isVISrc_64V2INT32() const {
617     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
618   }
619 
620   bool isVISrc_256B64() const {
621     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
622   }
623 
624   bool isVISrc_256F64() const {
625     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
626   }
627 
628   bool isVISrc_128B16() const {
629     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
630   }
631 
632   bool isVISrc_128V2B16() const {
633     return isVISrc_128B16();
634   }
635 
636   bool isVISrc_128B32() const {
637     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
638   }
639 
640   bool isVISrc_128F32() const {
641     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
642   }
643 
644   bool isVISrc_256V2FP32() const {
645     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
646   }
647 
648   bool isVISrc_256V2INT32() const {
649     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
650   }
651 
652   bool isVISrc_512B32() const {
653     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
654   }
655 
656   bool isVISrc_512B16() const {
657     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
658   }
659 
660   bool isVISrc_512V2B16() const {
661     return isVISrc_512B16();
662   }
663 
664   bool isVISrc_512F32() const {
665     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
666   }
667 
668   bool isVISrc_512F16() const {
669     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
670   }
671 
672   bool isVISrc_512V2F16() const {
673     return isVISrc_512F16() || isVISrc_512B32();
674   }
675 
676   bool isVISrc_1024B32() const {
677     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
678   }
679 
680   bool isVISrc_1024B16() const {
681     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
682   }
683 
684   bool isVISrc_1024V2B16() const {
685     return isVISrc_1024B16();
686   }
687 
688   bool isVISrc_1024F32() const {
689     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
690   }
691 
692   bool isVISrc_1024F16() const {
693     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
694   }
695 
696   bool isVISrc_1024V2F16() const {
697     return isVISrc_1024F16() || isVISrc_1024B32();
698   }
699 
700   bool isAISrcB32() const {
701     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
702   }
703 
704   bool isAISrcB16() const {
705     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
706   }
707 
708   bool isAISrcV2B16() const {
709     return isAISrcB16();
710   }
711 
712   bool isAISrcF32() const {
713     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
714   }
715 
716   bool isAISrcF16() const {
717     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
718   }
719 
720   bool isAISrcV2F16() const {
721     return isAISrcF16() || isAISrcB32();
722   }
723 
724   bool isAISrc_64B64() const {
725     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
726   }
727 
728   bool isAISrc_64F64() const {
729     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
730   }
731 
732   bool isAISrc_128B32() const {
733     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
734   }
735 
736   bool isAISrc_128B16() const {
737     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
738   }
739 
740   bool isAISrc_128V2B16() const {
741     return isAISrc_128B16();
742   }
743 
744   bool isAISrc_128F32() const {
745     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
746   }
747 
748   bool isAISrc_128F16() const {
749     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
750   }
751 
752   bool isAISrc_128V2F16() const {
753     return isAISrc_128F16() || isAISrc_128B32();
754   }
755 
756   bool isVISrc_128F16() const {
757     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
758   }
759 
760   bool isVISrc_128V2F16() const {
761     return isVISrc_128F16() || isVISrc_128B32();
762   }
763 
764   bool isAISrc_256B64() const {
765     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
766   }
767 
768   bool isAISrc_256F64() const {
769     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
770   }
771 
772   bool isAISrc_512B32() const {
773     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
774   }
775 
776   bool isAISrc_512B16() const {
777     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
778   }
779 
780   bool isAISrc_512V2B16() const {
781     return isAISrc_512B16();
782   }
783 
784   bool isAISrc_512F32() const {
785     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
786   }
787 
788   bool isAISrc_512F16() const {
789     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
790   }
791 
792   bool isAISrc_512V2F16() const {
793     return isAISrc_512F16() || isAISrc_512B32();
794   }
795 
796   bool isAISrc_1024B32() const {
797     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
798   }
799 
800   bool isAISrc_1024B16() const {
801     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
802   }
803 
804   bool isAISrc_1024V2B16() const {
805     return isAISrc_1024B16();
806   }
807 
808   bool isAISrc_1024F32() const {
809     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
810   }
811 
812   bool isAISrc_1024F16() const {
813     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
814   }
815 
816   bool isAISrc_1024V2F16() const {
817     return isAISrc_1024F16() || isAISrc_1024B32();
818   }
819 
820   bool isKImmFP32() const {
821     return isLiteralImm(MVT::f32);
822   }
823 
824   bool isKImmFP16() const {
825     return isLiteralImm(MVT::f16);
826   }
827 
828   bool isMem() const override {
829     return false;
830   }
831 
832   bool isExpr() const {
833     return Kind == Expression;
834   }
835 
836   bool isSoppBrTarget() const {
837     return isExpr() || isImm();
838   }
839 
840   bool isSWaitCnt() const;
841   bool isDepCtr() const;
842   bool isSDelayAlu() const;
843   bool isHwreg() const;
844   bool isSendMsg() const;
845   bool isSwizzle() const;
846   bool isSMRDOffset8() const;
847   bool isSMEMOffset() const;
848   bool isSMRDLiteralOffset() const;
849   bool isDPP8() const;
850   bool isDPPCtrl() const;
851   bool isBLGP() const;
852   bool isCBSZ() const;
853   bool isABID() const;
854   bool isGPRIdxMode() const;
855   bool isS16Imm() const;
856   bool isU16Imm() const;
857   bool isEndpgm() const;
858   bool isWaitVDST() const;
859   bool isWaitEXP() const;
860 
861   StringRef getExpressionAsToken() const {
862     assert(isExpr());
863     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
864     return S->getSymbol().getName();
865   }
866 
867   StringRef getToken() const {
868     assert(isToken());
869 
870     if (Kind == Expression)
871       return getExpressionAsToken();
872 
873     return StringRef(Tok.Data, Tok.Length);
874   }
875 
876   int64_t getImm() const {
877     assert(isImm());
878     return Imm.Val;
879   }
880 
881   void setImm(int64_t Val) {
882     assert(isImm());
883     Imm.Val = Val;
884   }
885 
886   ImmTy getImmTy() const {
887     assert(isImm());
888     return Imm.Type;
889   }
890 
891   unsigned getReg() const override {
892     assert(isRegKind());
893     return Reg.RegNo;
894   }
895 
896   SMLoc getStartLoc() const override {
897     return StartLoc;
898   }
899 
900   SMLoc getEndLoc() const override {
901     return EndLoc;
902   }
903 
904   SMRange getLocRange() const {
905     return SMRange(StartLoc, EndLoc);
906   }
907 
908   Modifiers getModifiers() const {
909     assert(isRegKind() || isImmTy(ImmTyNone));
910     return isRegKind() ? Reg.Mods : Imm.Mods;
911   }
912 
913   void setModifiers(Modifiers Mods) {
914     assert(isRegKind() || isImmTy(ImmTyNone));
915     if (isRegKind())
916       Reg.Mods = Mods;
917     else
918       Imm.Mods = Mods;
919   }
920 
921   bool hasModifiers() const {
922     return getModifiers().hasModifiers();
923   }
924 
925   bool hasFPModifiers() const {
926     return getModifiers().hasFPModifiers();
927   }
928 
929   bool hasIntModifiers() const {
930     return getModifiers().hasIntModifiers();
931   }
932 
933   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
934 
935   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
936 
937   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
938 
939   template <unsigned Bitwidth>
940   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
941 
942   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
943     addKImmFPOperands<16>(Inst, N);
944   }
945 
946   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
947     addKImmFPOperands<32>(Inst, N);
948   }
949 
950   void addRegOperands(MCInst &Inst, unsigned N) const;
951 
952   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
953     addRegOperands(Inst, N);
954   }
955 
956   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
957     if (isRegKind())
958       addRegOperands(Inst, N);
959     else if (isExpr())
960       Inst.addOperand(MCOperand::createExpr(Expr));
961     else
962       addImmOperands(Inst, N);
963   }
964 
965   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
966     Modifiers Mods = getModifiers();
967     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
968     if (isRegKind()) {
969       addRegOperands(Inst, N);
970     } else {
971       addImmOperands(Inst, N, false);
972     }
973   }
974 
975   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
976     assert(!hasIntModifiers());
977     addRegOrImmWithInputModsOperands(Inst, N);
978   }
979 
980   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
981     assert(!hasFPModifiers());
982     addRegOrImmWithInputModsOperands(Inst, N);
983   }
984 
985   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
986     Modifiers Mods = getModifiers();
987     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
988     assert(isRegKind());
989     addRegOperands(Inst, N);
990   }
991 
992   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
993     assert(!hasIntModifiers());
994     addRegWithInputModsOperands(Inst, N);
995   }
996 
997   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
998     assert(!hasFPModifiers());
999     addRegWithInputModsOperands(Inst, N);
1000   }
1001 
1002   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
1003     if (isImm())
1004       addImmOperands(Inst, N);
1005     else {
1006       assert(isExpr());
1007       Inst.addOperand(MCOperand::createExpr(Expr));
1008     }
1009   }
1010 
1011   static void printImmTy(raw_ostream& OS, ImmTy Type) {
1012     switch (Type) {
1013     case ImmTyNone: OS << "None"; break;
1014     case ImmTyGDS: OS << "GDS"; break;
1015     case ImmTyLDS: OS << "LDS"; break;
1016     case ImmTyOffen: OS << "Offen"; break;
1017     case ImmTyIdxen: OS << "Idxen"; break;
1018     case ImmTyAddr64: OS << "Addr64"; break;
1019     case ImmTyOffset: OS << "Offset"; break;
1020     case ImmTyInstOffset: OS << "InstOffset"; break;
1021     case ImmTyOffset0: OS << "Offset0"; break;
1022     case ImmTyOffset1: OS << "Offset1"; break;
1023     case ImmTyCPol: OS << "CPol"; break;
1024     case ImmTySWZ: OS << "SWZ"; break;
1025     case ImmTyTFE: OS << "TFE"; break;
1026     case ImmTyD16: OS << "D16"; break;
1027     case ImmTyFORMAT: OS << "FORMAT"; break;
1028     case ImmTyClampSI: OS << "ClampSI"; break;
1029     case ImmTyOModSI: OS << "OModSI"; break;
1030     case ImmTyDPP8: OS << "DPP8"; break;
1031     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1032     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1033     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1034     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1035     case ImmTyDppFi: OS << "FI"; break;
1036     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1037     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1038     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1039     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1040     case ImmTyDMask: OS << "DMask"; break;
1041     case ImmTyDim: OS << "Dim"; break;
1042     case ImmTyUNorm: OS << "UNorm"; break;
1043     case ImmTyDA: OS << "DA"; break;
1044     case ImmTyR128A16: OS << "R128A16"; break;
1045     case ImmTyA16: OS << "A16"; break;
1046     case ImmTyLWE: OS << "LWE"; break;
1047     case ImmTyOff: OS << "Off"; break;
1048     case ImmTyExpTgt: OS << "ExpTgt"; break;
1049     case ImmTyExpCompr: OS << "ExpCompr"; break;
1050     case ImmTyExpVM: OS << "ExpVM"; break;
1051     case ImmTyHwreg: OS << "Hwreg"; break;
1052     case ImmTySendMsg: OS << "SendMsg"; break;
1053     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1054     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1055     case ImmTyAttrChan: OS << "AttrChan"; break;
1056     case ImmTyOpSel: OS << "OpSel"; break;
1057     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1058     case ImmTyNegLo: OS << "NegLo"; break;
1059     case ImmTyNegHi: OS << "NegHi"; break;
1060     case ImmTySwizzle: OS << "Swizzle"; break;
1061     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1062     case ImmTyHigh: OS << "High"; break;
1063     case ImmTyBLGP: OS << "BLGP"; break;
1064     case ImmTyCBSZ: OS << "CBSZ"; break;
1065     case ImmTyABID: OS << "ABID"; break;
1066     case ImmTyEndpgm: OS << "Endpgm"; break;
1067     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1068     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1069     }
1070   }
1071 
1072   void print(raw_ostream &OS) const override {
1073     switch (Kind) {
1074     case Register:
1075       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1076       break;
1077     case Immediate:
1078       OS << '<' << getImm();
1079       if (getImmTy() != ImmTyNone) {
1080         OS << " type: "; printImmTy(OS, getImmTy());
1081       }
1082       OS << " mods: " << Imm.Mods << '>';
1083       break;
1084     case Token:
1085       OS << '\'' << getToken() << '\'';
1086       break;
1087     case Expression:
1088       OS << "<expr " << *Expr << '>';
1089       break;
1090     }
1091   }
1092 
1093   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1094                                       int64_t Val, SMLoc Loc,
1095                                       ImmTy Type = ImmTyNone,
1096                                       bool IsFPImm = false) {
1097     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1098     Op->Imm.Val = Val;
1099     Op->Imm.IsFPImm = IsFPImm;
1100     Op->Imm.Kind = ImmKindTyNone;
1101     Op->Imm.Type = Type;
1102     Op->Imm.Mods = Modifiers();
1103     Op->StartLoc = Loc;
1104     Op->EndLoc = Loc;
1105     return Op;
1106   }
1107 
1108   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1109                                         StringRef Str, SMLoc Loc,
1110                                         bool HasExplicitEncodingSize = true) {
1111     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1112     Res->Tok.Data = Str.data();
1113     Res->Tok.Length = Str.size();
1114     Res->StartLoc = Loc;
1115     Res->EndLoc = Loc;
1116     return Res;
1117   }
1118 
1119   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1120                                       unsigned RegNo, SMLoc S,
1121                                       SMLoc E) {
1122     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1123     Op->Reg.RegNo = RegNo;
1124     Op->Reg.Mods = Modifiers();
1125     Op->StartLoc = S;
1126     Op->EndLoc = E;
1127     return Op;
1128   }
1129 
1130   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1131                                        const class MCExpr *Expr, SMLoc S) {
1132     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1133     Op->Expr = Expr;
1134     Op->StartLoc = S;
1135     Op->EndLoc = S;
1136     return Op;
1137   }
1138 };
1139 
1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1141   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1142   return OS;
1143 }
1144 
1145 //===----------------------------------------------------------------------===//
1146 // AsmParser
1147 //===----------------------------------------------------------------------===//
1148 
1149 // Holds info related to the current kernel, e.g. count of SGPRs used.
1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1151 // .amdgpu_hsa_kernel or at EOF.
1152 class KernelScopeInfo {
1153   int SgprIndexUnusedMin = -1;
1154   int VgprIndexUnusedMin = -1;
1155   int AgprIndexUnusedMin = -1;
1156   MCContext *Ctx = nullptr;
1157   MCSubtargetInfo const *MSTI = nullptr;
1158 
1159   void usesSgprAt(int i) {
1160     if (i >= SgprIndexUnusedMin) {
1161       SgprIndexUnusedMin = ++i;
1162       if (Ctx) {
1163         MCSymbol* const Sym =
1164           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1165         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1166       }
1167     }
1168   }
1169 
1170   void usesVgprAt(int i) {
1171     if (i >= VgprIndexUnusedMin) {
1172       VgprIndexUnusedMin = ++i;
1173       if (Ctx) {
1174         MCSymbol* const Sym =
1175           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1176         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1177                                          VgprIndexUnusedMin);
1178         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1179       }
1180     }
1181   }
1182 
1183   void usesAgprAt(int i) {
1184     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1185     if (!hasMAIInsts(*MSTI))
1186       return;
1187 
1188     if (i >= AgprIndexUnusedMin) {
1189       AgprIndexUnusedMin = ++i;
1190       if (Ctx) {
1191         MCSymbol* const Sym =
1192           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1193         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1194 
1195         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1196         MCSymbol* const vSym =
1197           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1198         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1199                                          VgprIndexUnusedMin);
1200         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1201       }
1202     }
1203   }
1204 
1205 public:
1206   KernelScopeInfo() = default;
1207 
1208   void initialize(MCContext &Context) {
1209     Ctx = &Context;
1210     MSTI = Ctx->getSubtargetInfo();
1211 
1212     usesSgprAt(SgprIndexUnusedMin = -1);
1213     usesVgprAt(VgprIndexUnusedMin = -1);
1214     if (hasMAIInsts(*MSTI)) {
1215       usesAgprAt(AgprIndexUnusedMin = -1);
1216     }
1217   }
1218 
1219   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1220                     unsigned RegWidth) {
1221     switch (RegKind) {
1222     case IS_SGPR:
1223       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1224       break;
1225     case IS_AGPR:
1226       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1227       break;
1228     case IS_VGPR:
1229       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1230       break;
1231     default:
1232       break;
1233     }
1234   }
1235 };
1236 
1237 class AMDGPUAsmParser : public MCTargetAsmParser {
1238   MCAsmParser &Parser;
1239 
1240   // Number of extra operands parsed after the first optional operand.
1241   // This may be necessary to skip hardcoded mandatory operands.
1242   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1243 
1244   unsigned ForcedEncodingSize = 0;
1245   bool ForcedDPP = false;
1246   bool ForcedSDWA = false;
1247   KernelScopeInfo KernelScope;
1248   unsigned CPolSeen;
1249 
1250   /// @name Auto-generated Match Functions
1251   /// {
1252 
1253 #define GET_ASSEMBLER_HEADER
1254 #include "AMDGPUGenAsmMatcher.inc"
1255 
1256   /// }
1257 
1258 private:
1259   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1260   bool OutOfRangeError(SMRange Range);
1261   /// Calculate VGPR/SGPR blocks required for given target, reserved
1262   /// registers, and user-specified NextFreeXGPR values.
1263   ///
1264   /// \param Features [in] Target features, used for bug corrections.
1265   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1266   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1267   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1268   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1269   /// descriptor field, if valid.
1270   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1271   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1272   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1273   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1274   /// \param VGPRBlocks [out] Result VGPR block count.
1275   /// \param SGPRBlocks [out] Result SGPR block count.
1276   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1277                           bool FlatScrUsed, bool XNACKUsed,
1278                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1279                           SMRange VGPRRange, unsigned NextFreeSGPR,
1280                           SMRange SGPRRange, unsigned &VGPRBlocks,
1281                           unsigned &SGPRBlocks);
1282   bool ParseDirectiveAMDGCNTarget();
1283   bool ParseDirectiveAMDHSAKernel();
1284   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1285   bool ParseDirectiveHSACodeObjectVersion();
1286   bool ParseDirectiveHSACodeObjectISA();
1287   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1288   bool ParseDirectiveAMDKernelCodeT();
1289   // TODO: Possibly make subtargetHasRegister const.
1290   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1291   bool ParseDirectiveAMDGPUHsaKernel();
1292 
1293   bool ParseDirectiveISAVersion();
1294   bool ParseDirectiveHSAMetadata();
1295   bool ParseDirectivePALMetadataBegin();
1296   bool ParseDirectivePALMetadata();
1297   bool ParseDirectiveAMDGPULDS();
1298 
1299   /// Common code to parse out a block of text (typically YAML) between start and
1300   /// end directives.
1301   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1302                            const char *AssemblerDirectiveEnd,
1303                            std::string &CollectString);
1304 
1305   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1306                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1307   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1308                            unsigned &RegNum, unsigned &RegWidth,
1309                            bool RestoreOnFailure = false);
1310   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1311                            unsigned &RegNum, unsigned &RegWidth,
1312                            SmallVectorImpl<AsmToken> &Tokens);
1313   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1314                            unsigned &RegWidth,
1315                            SmallVectorImpl<AsmToken> &Tokens);
1316   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1317                            unsigned &RegWidth,
1318                            SmallVectorImpl<AsmToken> &Tokens);
1319   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1320                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1321   bool ParseRegRange(unsigned& Num, unsigned& Width);
1322   unsigned getRegularReg(RegisterKind RegKind,
1323                          unsigned RegNum,
1324                          unsigned RegWidth,
1325                          SMLoc Loc);
1326 
1327   bool isRegister();
1328   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1329   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1330   void initializeGprCountSymbol(RegisterKind RegKind);
1331   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1332                              unsigned RegWidth);
1333   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1334                     bool IsAtomic, bool IsLds = false);
1335   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1336                  bool IsGdsHardcoded);
1337 
1338 public:
1339   enum AMDGPUMatchResultTy {
1340     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1341   };
1342   enum OperandMode {
1343     OperandMode_Default,
1344     OperandMode_NSA,
1345   };
1346 
1347   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1348 
1349   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1350                const MCInstrInfo &MII,
1351                const MCTargetOptions &Options)
1352       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1353     MCAsmParserExtension::Initialize(Parser);
1354 
1355     if (getFeatureBits().none()) {
1356       // Set default features.
1357       copySTI().ToggleFeature("southern-islands");
1358     }
1359 
1360     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1361 
1362     {
1363       // TODO: make those pre-defined variables read-only.
1364       // Currently there is none suitable machinery in the core llvm-mc for this.
1365       // MCSymbol::isRedefinable is intended for another purpose, and
1366       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1367       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1368       MCContext &Ctx = getContext();
1369       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1370         MCSymbol *Sym =
1371             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1372         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1373         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1374         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1375         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1376         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1377       } else {
1378         MCSymbol *Sym =
1379             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1380         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1381         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1382         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1383         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1384         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1385       }
1386       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1387         initializeGprCountSymbol(IS_VGPR);
1388         initializeGprCountSymbol(IS_SGPR);
1389       } else
1390         KernelScope.initialize(getContext());
1391     }
1392   }
1393 
1394   bool hasMIMG_R128() const {
1395     return AMDGPU::hasMIMG_R128(getSTI());
1396   }
1397 
1398   bool hasPackedD16() const {
1399     return AMDGPU::hasPackedD16(getSTI());
1400   }
1401 
1402   bool hasGFX10A16() const {
1403     return AMDGPU::hasGFX10A16(getSTI());
1404   }
1405 
1406   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1407 
1408   bool isSI() const {
1409     return AMDGPU::isSI(getSTI());
1410   }
1411 
1412   bool isCI() const {
1413     return AMDGPU::isCI(getSTI());
1414   }
1415 
1416   bool isVI() const {
1417     return AMDGPU::isVI(getSTI());
1418   }
1419 
1420   bool isGFX9() const {
1421     return AMDGPU::isGFX9(getSTI());
1422   }
1423 
1424   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1425   bool isGFX90A() const {
1426     return AMDGPU::isGFX90A(getSTI());
1427   }
1428 
1429   bool isGFX940() const {
1430     return AMDGPU::isGFX940(getSTI());
1431   }
1432 
1433   bool isGFX9Plus() const {
1434     return AMDGPU::isGFX9Plus(getSTI());
1435   }
1436 
1437   bool isGFX10() const {
1438     return AMDGPU::isGFX10(getSTI());
1439   }
1440 
1441   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1442 
1443   bool isGFX11() const {
1444     return AMDGPU::isGFX11(getSTI());
1445   }
1446 
1447   bool isGFX11Plus() const {
1448     return AMDGPU::isGFX11Plus(getSTI());
1449   }
1450 
1451   bool isGFX10_BEncoding() const {
1452     return AMDGPU::isGFX10_BEncoding(getSTI());
1453   }
1454 
1455   bool hasInv2PiInlineImm() const {
1456     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1457   }
1458 
1459   bool hasFlatOffsets() const {
1460     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1461   }
1462 
1463   bool hasArchitectedFlatScratch() const {
1464     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1465   }
1466 
1467   bool hasSGPR102_SGPR103() const {
1468     return !isVI() && !isGFX9();
1469   }
1470 
1471   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1472 
1473   bool hasIntClamp() const {
1474     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1475   }
1476 
1477   AMDGPUTargetStreamer &getTargetStreamer() {
1478     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1479     return static_cast<AMDGPUTargetStreamer &>(TS);
1480   }
1481 
1482   const MCRegisterInfo *getMRI() const {
1483     // We need this const_cast because for some reason getContext() is not const
1484     // in MCAsmParser.
1485     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1486   }
1487 
1488   const MCInstrInfo *getMII() const {
1489     return &MII;
1490   }
1491 
1492   const FeatureBitset &getFeatureBits() const {
1493     return getSTI().getFeatureBits();
1494   }
1495 
1496   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1497   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1498   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1499 
1500   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1501   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1502   bool isForcedDPP() const { return ForcedDPP; }
1503   bool isForcedSDWA() const { return ForcedSDWA; }
1504   ArrayRef<unsigned> getMatchedVariants() const;
1505   StringRef getMatchedVariantName() const;
1506 
1507   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1508   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1509                      bool RestoreOnFailure);
1510   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1511   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1512                                         SMLoc &EndLoc) override;
1513   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1514   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1515                                       unsigned Kind) override;
1516   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1517                                OperandVector &Operands, MCStreamer &Out,
1518                                uint64_t &ErrorInfo,
1519                                bool MatchingInlineAsm) override;
1520   bool ParseDirective(AsmToken DirectiveID) override;
1521   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1522                                     OperandMode Mode = OperandMode_Default);
1523   StringRef parseMnemonicSuffix(StringRef Name);
1524   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1525                         SMLoc NameLoc, OperandVector &Operands) override;
1526   //bool ProcessInstruction(MCInst &Inst);
1527 
1528   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1529 
1530   OperandMatchResultTy
1531   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1532                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1533                      bool (*ConvertResult)(int64_t &) = nullptr);
1534 
1535   OperandMatchResultTy
1536   parseOperandArrayWithPrefix(const char *Prefix,
1537                               OperandVector &Operands,
1538                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1539                               bool (*ConvertResult)(int64_t&) = nullptr);
1540 
1541   OperandMatchResultTy
1542   parseNamedBit(StringRef Name, OperandVector &Operands,
1543                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1544   OperandMatchResultTy parseCPol(OperandVector &Operands);
1545   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1546                                              StringRef &Value,
1547                                              SMLoc &StringLoc);
1548 
1549   bool isModifier();
1550   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1551   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1552   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1553   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1554   bool parseSP3NegModifier();
1555   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1556   OperandMatchResultTy parseReg(OperandVector &Operands);
1557   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1558   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1559   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1560   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1561   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1562   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1563   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1564   OperandMatchResultTy parseUfmt(int64_t &Format);
1565   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1566   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1567   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1568   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1569   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1570   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1571   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1572 
1573   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1574   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1575   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1576   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1577 
1578   bool parseCnt(int64_t &IntVal);
1579   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1580 
1581   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1582   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1583   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1584 
1585   bool parseDelay(int64_t &Delay);
1586   OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1587 
1588   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1589 
1590 private:
1591   struct OperandInfoTy {
1592     SMLoc Loc;
1593     int64_t Id;
1594     bool IsSymbolic = false;
1595     bool IsDefined = false;
1596 
1597     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1598   };
1599 
1600   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1601   bool validateSendMsg(const OperandInfoTy &Msg,
1602                        const OperandInfoTy &Op,
1603                        const OperandInfoTy &Stream);
1604 
1605   bool parseHwregBody(OperandInfoTy &HwReg,
1606                       OperandInfoTy &Offset,
1607                       OperandInfoTy &Width);
1608   bool validateHwreg(const OperandInfoTy &HwReg,
1609                      const OperandInfoTy &Offset,
1610                      const OperandInfoTy &Width);
1611 
1612   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1613   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1614   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1615 
1616   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1617                       const OperandVector &Operands) const;
1618   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1619   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1620   SMLoc getLitLoc(const OperandVector &Operands) const;
1621   SMLoc getConstLoc(const OperandVector &Operands) const;
1622 
1623   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1624   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1625   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1626   bool validateSOPLiteral(const MCInst &Inst) const;
1627   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1628   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1629   bool validateIntClampSupported(const MCInst &Inst);
1630   bool validateMIMGAtomicDMask(const MCInst &Inst);
1631   bool validateMIMGGatherDMask(const MCInst &Inst);
1632   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1633   Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1634   bool validateMIMGAddrSize(const MCInst &Inst);
1635   bool validateMIMGD16(const MCInst &Inst);
1636   bool validateMIMGDim(const MCInst &Inst);
1637   bool validateMIMGMSAA(const MCInst &Inst);
1638   bool validateOpSel(const MCInst &Inst);
1639   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1640   bool validateVccOperand(unsigned Reg) const;
1641   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1642   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1643   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1644   bool validateAGPRLdSt(const MCInst &Inst) const;
1645   bool validateVGPRAlign(const MCInst &Inst) const;
1646   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1647   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1648   bool validateDivScale(const MCInst &Inst);
1649   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1650                              const SMLoc &IDLoc);
1651   bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands,
1652                           const SMLoc &IDLoc);
1653   bool validateExeczVcczOperands(const OperandVector &Operands);
1654   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1655   unsigned getConstantBusLimit(unsigned Opcode) const;
1656   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1657   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1658   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1659 
1660   bool isSupportedMnemo(StringRef Mnemo,
1661                         const FeatureBitset &FBS);
1662   bool isSupportedMnemo(StringRef Mnemo,
1663                         const FeatureBitset &FBS,
1664                         ArrayRef<unsigned> Variants);
1665   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1666 
1667   bool isId(const StringRef Id) const;
1668   bool isId(const AsmToken &Token, const StringRef Id) const;
1669   bool isToken(const AsmToken::TokenKind Kind) const;
1670   bool trySkipId(const StringRef Id);
1671   bool trySkipId(const StringRef Pref, const StringRef Id);
1672   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1673   bool trySkipToken(const AsmToken::TokenKind Kind);
1674   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1675   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1676   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1677 
1678   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1679   AsmToken::TokenKind getTokenKind() const;
1680   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1681   bool parseExpr(OperandVector &Operands);
1682   StringRef getTokenStr() const;
1683   AsmToken peekToken(bool ShouldSkipSpace = true);
1684   AsmToken getToken() const;
1685   SMLoc getLoc() const;
1686   void lex();
1687 
1688 public:
1689   void onBeginOfFile() override;
1690 
1691   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1692   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1693 
1694   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1695   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1696   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1697   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1698   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1699   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1700 
1701   bool parseSwizzleOperand(int64_t &Op,
1702                            const unsigned MinVal,
1703                            const unsigned MaxVal,
1704                            const StringRef ErrMsg,
1705                            SMLoc &Loc);
1706   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1707                             const unsigned MinVal,
1708                             const unsigned MaxVal,
1709                             const StringRef ErrMsg);
1710   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1711   bool parseSwizzleOffset(int64_t &Imm);
1712   bool parseSwizzleMacro(int64_t &Imm);
1713   bool parseSwizzleQuadPerm(int64_t &Imm);
1714   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1715   bool parseSwizzleBroadcast(int64_t &Imm);
1716   bool parseSwizzleSwap(int64_t &Imm);
1717   bool parseSwizzleReverse(int64_t &Imm);
1718 
1719   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1720   int64_t parseGPRIdxMacro();
1721 
1722   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1723   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1724   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1725   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1726 
1727   AMDGPUOperand::Ptr defaultCPol() const;
1728 
1729   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1730   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1731   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1732   AMDGPUOperand::Ptr defaultFlatOffset() const;
1733 
1734   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1735 
1736   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1737                OptionalImmIndexMap &OptionalIdx);
1738   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1739   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1740   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1741   void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1742   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1743                     OptionalImmIndexMap &OptionalIdx);
1744   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1745                 OptionalImmIndexMap &OptionalIdx);
1746 
1747   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1748   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1749 
1750   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1751                bool IsAtomic = false);
1752   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1753   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1754 
1755   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1756 
1757   bool parseDimId(unsigned &Encoding);
1758   OperandMatchResultTy parseDim(OperandVector &Operands);
1759   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1760   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1761   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1762   int64_t parseDPPCtrlSel(StringRef Ctrl);
1763   int64_t parseDPPCtrlPerm();
1764   AMDGPUOperand::Ptr defaultRowMask() const;
1765   AMDGPUOperand::Ptr defaultBankMask() const;
1766   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1767   AMDGPUOperand::Ptr defaultFI() const;
1768   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1769   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1770     cvtDPP(Inst, Operands, true);
1771   }
1772   void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands,
1773                        bool IsDPP8 = false);
1774   void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) {
1775     cvtVOPCNoDstDPP(Inst, Operands, true);
1776   }
1777   void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1778                   bool IsDPP8 = false);
1779   void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1780     cvtVOP3DPP(Inst, Operands, true);
1781   }
1782   void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands,
1783                          bool IsDPP8 = false);
1784   void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) {
1785     cvtVOPC64NoDstDPP(Inst, Operands, true);
1786   }
1787 
1788   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1789                                     AMDGPUOperand::ImmTy Type);
1790   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1791   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1792   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1793   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1794   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1795   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1796   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1797                uint64_t BasicInstType,
1798                bool SkipDstVcc = false,
1799                bool SkipSrcVcc = false);
1800 
1801   AMDGPUOperand::Ptr defaultBLGP() const;
1802   AMDGPUOperand::Ptr defaultCBSZ() const;
1803   AMDGPUOperand::Ptr defaultABID() const;
1804 
1805   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1806   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1807 
1808   AMDGPUOperand::Ptr defaultWaitVDST() const;
1809   AMDGPUOperand::Ptr defaultWaitEXP() const;
1810   OperandMatchResultTy parseVOPD(OperandVector &Operands);
1811 };
1812 
1813 struct OptionalOperand {
1814   const char *Name;
1815   AMDGPUOperand::ImmTy Type;
1816   bool IsBit;
1817   bool (*ConvertResult)(int64_t&);
1818 };
1819 
1820 } // end anonymous namespace
1821 
1822 // May be called with integer type with equivalent bitwidth.
1823 static const fltSemantics *getFltSemantics(unsigned Size) {
1824   switch (Size) {
1825   case 4:
1826     return &APFloat::IEEEsingle();
1827   case 8:
1828     return &APFloat::IEEEdouble();
1829   case 2:
1830     return &APFloat::IEEEhalf();
1831   default:
1832     llvm_unreachable("unsupported fp type");
1833   }
1834 }
1835 
1836 static const fltSemantics *getFltSemantics(MVT VT) {
1837   return getFltSemantics(VT.getSizeInBits() / 8);
1838 }
1839 
1840 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1841   switch (OperandType) {
1842   case AMDGPU::OPERAND_REG_IMM_INT32:
1843   case AMDGPU::OPERAND_REG_IMM_FP32:
1844   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1845   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1846   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1847   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1848   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1849   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1850   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1851   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1852   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1853   case AMDGPU::OPERAND_KIMM32:
1854     return &APFloat::IEEEsingle();
1855   case AMDGPU::OPERAND_REG_IMM_INT64:
1856   case AMDGPU::OPERAND_REG_IMM_FP64:
1857   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1858   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1859   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1860     return &APFloat::IEEEdouble();
1861   case AMDGPU::OPERAND_REG_IMM_INT16:
1862   case AMDGPU::OPERAND_REG_IMM_FP16:
1863   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1864   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1865   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1866   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1867   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1868   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1869   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1870   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1871   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1872   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1873   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1874   case AMDGPU::OPERAND_KIMM16:
1875     return &APFloat::IEEEhalf();
1876   default:
1877     llvm_unreachable("unsupported fp type");
1878   }
1879 }
1880 
1881 //===----------------------------------------------------------------------===//
1882 // Operand
1883 //===----------------------------------------------------------------------===//
1884 
1885 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1886   bool Lost;
1887 
1888   // Convert literal to single precision
1889   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1890                                                APFloat::rmNearestTiesToEven,
1891                                                &Lost);
1892   // We allow precision lost but not overflow or underflow
1893   if (Status != APFloat::opOK &&
1894       Lost &&
1895       ((Status & APFloat::opOverflow)  != 0 ||
1896        (Status & APFloat::opUnderflow) != 0)) {
1897     return false;
1898   }
1899 
1900   return true;
1901 }
1902 
1903 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1904   return isUIntN(Size, Val) || isIntN(Size, Val);
1905 }
1906 
1907 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1908   if (VT.getScalarType() == MVT::i16) {
1909     // FP immediate values are broken.
1910     return isInlinableIntLiteral(Val);
1911   }
1912 
1913   // f16/v2f16 operands work correctly for all values.
1914   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1915 }
1916 
1917 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1918 
1919   // This is a hack to enable named inline values like
1920   // shared_base with both 32-bit and 64-bit operands.
1921   // Note that these values are defined as
1922   // 32-bit operands only.
1923   if (isInlineValue()) {
1924     return true;
1925   }
1926 
1927   if (!isImmTy(ImmTyNone)) {
1928     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1929     return false;
1930   }
1931   // TODO: We should avoid using host float here. It would be better to
1932   // check the float bit values which is what a few other places do.
1933   // We've had bot failures before due to weird NaN support on mips hosts.
1934 
1935   APInt Literal(64, Imm.Val);
1936 
1937   if (Imm.IsFPImm) { // We got fp literal token
1938     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1939       return AMDGPU::isInlinableLiteral64(Imm.Val,
1940                                           AsmParser->hasInv2PiInlineImm());
1941     }
1942 
1943     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1944     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1945       return false;
1946 
1947     if (type.getScalarSizeInBits() == 16) {
1948       return isInlineableLiteralOp16(
1949         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1950         type, AsmParser->hasInv2PiInlineImm());
1951     }
1952 
1953     // Check if single precision literal is inlinable
1954     return AMDGPU::isInlinableLiteral32(
1955       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1956       AsmParser->hasInv2PiInlineImm());
1957   }
1958 
1959   // We got int literal token.
1960   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1961     return AMDGPU::isInlinableLiteral64(Imm.Val,
1962                                         AsmParser->hasInv2PiInlineImm());
1963   }
1964 
1965   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1966     return false;
1967   }
1968 
1969   if (type.getScalarSizeInBits() == 16) {
1970     return isInlineableLiteralOp16(
1971       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1972       type, AsmParser->hasInv2PiInlineImm());
1973   }
1974 
1975   return AMDGPU::isInlinableLiteral32(
1976     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1977     AsmParser->hasInv2PiInlineImm());
1978 }
1979 
1980 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1981   // Check that this immediate can be added as literal
1982   if (!isImmTy(ImmTyNone)) {
1983     return false;
1984   }
1985 
1986   if (!Imm.IsFPImm) {
1987     // We got int literal token.
1988 
1989     if (type == MVT::f64 && hasFPModifiers()) {
1990       // Cannot apply fp modifiers to int literals preserving the same semantics
1991       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1992       // disable these cases.
1993       return false;
1994     }
1995 
1996     unsigned Size = type.getSizeInBits();
1997     if (Size == 64)
1998       Size = 32;
1999 
2000     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2001     // types.
2002     return isSafeTruncation(Imm.Val, Size);
2003   }
2004 
2005   // We got fp literal token
2006   if (type == MVT::f64) { // Expected 64-bit fp operand
2007     // We would set low 64-bits of literal to zeroes but we accept this literals
2008     return true;
2009   }
2010 
2011   if (type == MVT::i64) { // Expected 64-bit int operand
2012     // We don't allow fp literals in 64-bit integer instructions. It is
2013     // unclear how we should encode them.
2014     return false;
2015   }
2016 
2017   // We allow fp literals with f16x2 operands assuming that the specified
2018   // literal goes into the lower half and the upper half is zero. We also
2019   // require that the literal may be losslessly converted to f16.
2020   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2021                      (type == MVT::v2i16)? MVT::i16 :
2022                      (type == MVT::v2f32)? MVT::f32 : type;
2023 
2024   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2025   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2026 }
2027 
2028 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2029   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2030 }
2031 
2032 bool AMDGPUOperand::isVRegWithInputMods() const {
2033   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2034          // GFX90A allows DPP on 64-bit operands.
2035          (isRegClass(AMDGPU::VReg_64RegClassID) &&
2036           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
2037 }
2038 
2039 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2040   if (AsmParser->isVI())
2041     return isVReg32();
2042   else if (AsmParser->isGFX9Plus())
2043     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2044   else
2045     return false;
2046 }
2047 
2048 bool AMDGPUOperand::isSDWAFP16Operand() const {
2049   return isSDWAOperand(MVT::f16);
2050 }
2051 
2052 bool AMDGPUOperand::isSDWAFP32Operand() const {
2053   return isSDWAOperand(MVT::f32);
2054 }
2055 
2056 bool AMDGPUOperand::isSDWAInt16Operand() const {
2057   return isSDWAOperand(MVT::i16);
2058 }
2059 
2060 bool AMDGPUOperand::isSDWAInt32Operand() const {
2061   return isSDWAOperand(MVT::i32);
2062 }
2063 
2064 bool AMDGPUOperand::isBoolReg() const {
2065   auto FB = AsmParser->getFeatureBits();
2066   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2067                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2068 }
2069 
2070 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2071 {
2072   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2073   assert(Size == 2 || Size == 4 || Size == 8);
2074 
2075   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2076 
2077   if (Imm.Mods.Abs) {
2078     Val &= ~FpSignMask;
2079   }
2080   if (Imm.Mods.Neg) {
2081     Val ^= FpSignMask;
2082   }
2083 
2084   return Val;
2085 }
2086 
2087 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2088   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2089                              Inst.getNumOperands())) {
2090     addLiteralImmOperand(Inst, Imm.Val,
2091                          ApplyModifiers &
2092                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2093   } else {
2094     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2095     Inst.addOperand(MCOperand::createImm(Imm.Val));
2096     setImmKindNone();
2097   }
2098 }
2099 
2100 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2101   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2102   auto OpNum = Inst.getNumOperands();
2103   // Check that this operand accepts literals
2104   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2105 
2106   if (ApplyModifiers) {
2107     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2108     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2109     Val = applyInputFPModifiers(Val, Size);
2110   }
2111 
2112   APInt Literal(64, Val);
2113   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2114 
2115   if (Imm.IsFPImm) { // We got fp literal token
2116     switch (OpTy) {
2117     case AMDGPU::OPERAND_REG_IMM_INT64:
2118     case AMDGPU::OPERAND_REG_IMM_FP64:
2119     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2120     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2121     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2122       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2123                                        AsmParser->hasInv2PiInlineImm())) {
2124         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2125         setImmKindConst();
2126         return;
2127       }
2128 
2129       // Non-inlineable
2130       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2131         // For fp operands we check if low 32 bits are zeros
2132         if (Literal.getLoBits(32) != 0) {
2133           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2134           "Can't encode literal as exact 64-bit floating-point operand. "
2135           "Low 32-bits will be set to zero");
2136         }
2137 
2138         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2139         setImmKindLiteral();
2140         return;
2141       }
2142 
2143       // We don't allow fp literals in 64-bit integer instructions. It is
2144       // unclear how we should encode them. This case should be checked earlier
2145       // in predicate methods (isLiteralImm())
2146       llvm_unreachable("fp literal in 64-bit integer instruction.");
2147 
2148     case AMDGPU::OPERAND_REG_IMM_INT32:
2149     case AMDGPU::OPERAND_REG_IMM_FP32:
2150     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2151     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2152     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2153     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2154     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2155     case AMDGPU::OPERAND_REG_IMM_INT16:
2156     case AMDGPU::OPERAND_REG_IMM_FP16:
2157     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2158     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2159     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2160     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2161     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2162     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2163     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2164     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2165     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2166     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2167     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2168     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2169     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2170     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2171     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2172     case AMDGPU::OPERAND_KIMM32:
2173     case AMDGPU::OPERAND_KIMM16: {
2174       bool lost;
2175       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2176       // Convert literal to single precision
2177       FPLiteral.convert(*getOpFltSemantics(OpTy),
2178                         APFloat::rmNearestTiesToEven, &lost);
2179       // We allow precision lost but not overflow or underflow. This should be
2180       // checked earlier in isLiteralImm()
2181 
2182       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2183       Inst.addOperand(MCOperand::createImm(ImmVal));
2184       setImmKindLiteral();
2185       return;
2186     }
2187     default:
2188       llvm_unreachable("invalid operand size");
2189     }
2190 
2191     return;
2192   }
2193 
2194   // We got int literal token.
2195   // Only sign extend inline immediates.
2196   switch (OpTy) {
2197   case AMDGPU::OPERAND_REG_IMM_INT32:
2198   case AMDGPU::OPERAND_REG_IMM_FP32:
2199   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2200   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2201   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2202   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2203   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2204   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2205   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2206   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2207   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2208   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2209   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2210     if (isSafeTruncation(Val, 32) &&
2211         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2212                                      AsmParser->hasInv2PiInlineImm())) {
2213       Inst.addOperand(MCOperand::createImm(Val));
2214       setImmKindConst();
2215       return;
2216     }
2217 
2218     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2219     setImmKindLiteral();
2220     return;
2221 
2222   case AMDGPU::OPERAND_REG_IMM_INT64:
2223   case AMDGPU::OPERAND_REG_IMM_FP64:
2224   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2225   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2226   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2227     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2228       Inst.addOperand(MCOperand::createImm(Val));
2229       setImmKindConst();
2230       return;
2231     }
2232 
2233     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2234     setImmKindLiteral();
2235     return;
2236 
2237   case AMDGPU::OPERAND_REG_IMM_INT16:
2238   case AMDGPU::OPERAND_REG_IMM_FP16:
2239   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2240   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2241   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2242   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2243   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2244     if (isSafeTruncation(Val, 16) &&
2245         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2246                                      AsmParser->hasInv2PiInlineImm())) {
2247       Inst.addOperand(MCOperand::createImm(Val));
2248       setImmKindConst();
2249       return;
2250     }
2251 
2252     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2253     setImmKindLiteral();
2254     return;
2255 
2256   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2257   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2258   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2259   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2260     assert(isSafeTruncation(Val, 16));
2261     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2262                                         AsmParser->hasInv2PiInlineImm()));
2263 
2264     Inst.addOperand(MCOperand::createImm(Val));
2265     return;
2266   }
2267   case AMDGPU::OPERAND_KIMM32:
2268     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2269     setImmKindNone();
2270     return;
2271   case AMDGPU::OPERAND_KIMM16:
2272     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2273     setImmKindNone();
2274     return;
2275   default:
2276     llvm_unreachable("invalid operand size");
2277   }
2278 }
2279 
2280 template <unsigned Bitwidth>
2281 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2282   APInt Literal(64, Imm.Val);
2283   setImmKindNone();
2284 
2285   if (!Imm.IsFPImm) {
2286     // We got int literal token.
2287     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2288     return;
2289   }
2290 
2291   bool Lost;
2292   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2293   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2294                     APFloat::rmNearestTiesToEven, &Lost);
2295   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2296 }
2297 
2298 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2299   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2300 }
2301 
2302 static bool isInlineValue(unsigned Reg) {
2303   switch (Reg) {
2304   case AMDGPU::SRC_SHARED_BASE:
2305   case AMDGPU::SRC_SHARED_LIMIT:
2306   case AMDGPU::SRC_PRIVATE_BASE:
2307   case AMDGPU::SRC_PRIVATE_LIMIT:
2308   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2309     return true;
2310   case AMDGPU::SRC_VCCZ:
2311   case AMDGPU::SRC_EXECZ:
2312   case AMDGPU::SRC_SCC:
2313     return true;
2314   case AMDGPU::SGPR_NULL:
2315     return true;
2316   default:
2317     return false;
2318   }
2319 }
2320 
2321 bool AMDGPUOperand::isInlineValue() const {
2322   return isRegKind() && ::isInlineValue(getReg());
2323 }
2324 
2325 //===----------------------------------------------------------------------===//
2326 // AsmParser
2327 //===----------------------------------------------------------------------===//
2328 
2329 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2330   if (Is == IS_VGPR) {
2331     switch (RegWidth) {
2332       default: return -1;
2333       case 32:
2334         return AMDGPU::VGPR_32RegClassID;
2335       case 64:
2336         return AMDGPU::VReg_64RegClassID;
2337       case 96:
2338         return AMDGPU::VReg_96RegClassID;
2339       case 128:
2340         return AMDGPU::VReg_128RegClassID;
2341       case 160:
2342         return AMDGPU::VReg_160RegClassID;
2343       case 192:
2344         return AMDGPU::VReg_192RegClassID;
2345       case 224:
2346         return AMDGPU::VReg_224RegClassID;
2347       case 256:
2348         return AMDGPU::VReg_256RegClassID;
2349       case 512:
2350         return AMDGPU::VReg_512RegClassID;
2351       case 1024:
2352         return AMDGPU::VReg_1024RegClassID;
2353     }
2354   } else if (Is == IS_TTMP) {
2355     switch (RegWidth) {
2356       default: return -1;
2357       case 32:
2358         return AMDGPU::TTMP_32RegClassID;
2359       case 64:
2360         return AMDGPU::TTMP_64RegClassID;
2361       case 128:
2362         return AMDGPU::TTMP_128RegClassID;
2363       case 256:
2364         return AMDGPU::TTMP_256RegClassID;
2365       case 512:
2366         return AMDGPU::TTMP_512RegClassID;
2367     }
2368   } else if (Is == IS_SGPR) {
2369     switch (RegWidth) {
2370       default: return -1;
2371       case 32:
2372         return AMDGPU::SGPR_32RegClassID;
2373       case 64:
2374         return AMDGPU::SGPR_64RegClassID;
2375       case 96:
2376         return AMDGPU::SGPR_96RegClassID;
2377       case 128:
2378         return AMDGPU::SGPR_128RegClassID;
2379       case 160:
2380         return AMDGPU::SGPR_160RegClassID;
2381       case 192:
2382         return AMDGPU::SGPR_192RegClassID;
2383       case 224:
2384         return AMDGPU::SGPR_224RegClassID;
2385       case 256:
2386         return AMDGPU::SGPR_256RegClassID;
2387       case 512:
2388         return AMDGPU::SGPR_512RegClassID;
2389     }
2390   } else if (Is == IS_AGPR) {
2391     switch (RegWidth) {
2392       default: return -1;
2393       case 32:
2394         return AMDGPU::AGPR_32RegClassID;
2395       case 64:
2396         return AMDGPU::AReg_64RegClassID;
2397       case 96:
2398         return AMDGPU::AReg_96RegClassID;
2399       case 128:
2400         return AMDGPU::AReg_128RegClassID;
2401       case 160:
2402         return AMDGPU::AReg_160RegClassID;
2403       case 192:
2404         return AMDGPU::AReg_192RegClassID;
2405       case 224:
2406         return AMDGPU::AReg_224RegClassID;
2407       case 256:
2408         return AMDGPU::AReg_256RegClassID;
2409       case 512:
2410         return AMDGPU::AReg_512RegClassID;
2411       case 1024:
2412         return AMDGPU::AReg_1024RegClassID;
2413     }
2414   }
2415   return -1;
2416 }
2417 
2418 static unsigned getSpecialRegForName(StringRef RegName) {
2419   return StringSwitch<unsigned>(RegName)
2420     .Case("exec", AMDGPU::EXEC)
2421     .Case("vcc", AMDGPU::VCC)
2422     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2423     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2424     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2425     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2426     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2427     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2428     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2429     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2430     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2431     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2432     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2433     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2434     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2435     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2436     .Case("m0", AMDGPU::M0)
2437     .Case("vccz", AMDGPU::SRC_VCCZ)
2438     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2439     .Case("execz", AMDGPU::SRC_EXECZ)
2440     .Case("src_execz", AMDGPU::SRC_EXECZ)
2441     .Case("scc", AMDGPU::SRC_SCC)
2442     .Case("src_scc", AMDGPU::SRC_SCC)
2443     .Case("tba", AMDGPU::TBA)
2444     .Case("tma", AMDGPU::TMA)
2445     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2446     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2447     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2448     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2449     .Case("vcc_lo", AMDGPU::VCC_LO)
2450     .Case("vcc_hi", AMDGPU::VCC_HI)
2451     .Case("exec_lo", AMDGPU::EXEC_LO)
2452     .Case("exec_hi", AMDGPU::EXEC_HI)
2453     .Case("tma_lo", AMDGPU::TMA_LO)
2454     .Case("tma_hi", AMDGPU::TMA_HI)
2455     .Case("tba_lo", AMDGPU::TBA_LO)
2456     .Case("tba_hi", AMDGPU::TBA_HI)
2457     .Case("pc", AMDGPU::PC_REG)
2458     .Case("null", AMDGPU::SGPR_NULL)
2459     .Default(AMDGPU::NoRegister);
2460 }
2461 
2462 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2463                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2464   auto R = parseRegister();
2465   if (!R) return true;
2466   assert(R->isReg());
2467   RegNo = R->getReg();
2468   StartLoc = R->getStartLoc();
2469   EndLoc = R->getEndLoc();
2470   return false;
2471 }
2472 
2473 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2474                                     SMLoc &EndLoc) {
2475   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2476 }
2477 
2478 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2479                                                        SMLoc &StartLoc,
2480                                                        SMLoc &EndLoc) {
2481   bool Result =
2482       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2483   bool PendingErrors = getParser().hasPendingError();
2484   getParser().clearPendingErrors();
2485   if (PendingErrors)
2486     return MatchOperand_ParseFail;
2487   if (Result)
2488     return MatchOperand_NoMatch;
2489   return MatchOperand_Success;
2490 }
2491 
2492 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2493                                             RegisterKind RegKind, unsigned Reg1,
2494                                             SMLoc Loc) {
2495   switch (RegKind) {
2496   case IS_SPECIAL:
2497     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2498       Reg = AMDGPU::EXEC;
2499       RegWidth = 64;
2500       return true;
2501     }
2502     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2503       Reg = AMDGPU::FLAT_SCR;
2504       RegWidth = 64;
2505       return true;
2506     }
2507     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2508       Reg = AMDGPU::XNACK_MASK;
2509       RegWidth = 64;
2510       return true;
2511     }
2512     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2513       Reg = AMDGPU::VCC;
2514       RegWidth = 64;
2515       return true;
2516     }
2517     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2518       Reg = AMDGPU::TBA;
2519       RegWidth = 64;
2520       return true;
2521     }
2522     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2523       Reg = AMDGPU::TMA;
2524       RegWidth = 64;
2525       return true;
2526     }
2527     Error(Loc, "register does not fit in the list");
2528     return false;
2529   case IS_VGPR:
2530   case IS_SGPR:
2531   case IS_AGPR:
2532   case IS_TTMP:
2533     if (Reg1 != Reg + RegWidth / 32) {
2534       Error(Loc, "registers in a list must have consecutive indices");
2535       return false;
2536     }
2537     RegWidth += 32;
2538     return true;
2539   default:
2540     llvm_unreachable("unexpected register kind");
2541   }
2542 }
2543 
2544 struct RegInfo {
2545   StringLiteral Name;
2546   RegisterKind Kind;
2547 };
2548 
2549 static constexpr RegInfo RegularRegisters[] = {
2550   {{"v"},    IS_VGPR},
2551   {{"s"},    IS_SGPR},
2552   {{"ttmp"}, IS_TTMP},
2553   {{"acc"},  IS_AGPR},
2554   {{"a"},    IS_AGPR},
2555 };
2556 
2557 static bool isRegularReg(RegisterKind Kind) {
2558   return Kind == IS_VGPR ||
2559          Kind == IS_SGPR ||
2560          Kind == IS_TTMP ||
2561          Kind == IS_AGPR;
2562 }
2563 
2564 static const RegInfo* getRegularRegInfo(StringRef Str) {
2565   for (const RegInfo &Reg : RegularRegisters)
2566     if (Str.startswith(Reg.Name))
2567       return &Reg;
2568   return nullptr;
2569 }
2570 
2571 static bool getRegNum(StringRef Str, unsigned& Num) {
2572   return !Str.getAsInteger(10, Num);
2573 }
2574 
2575 bool
2576 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2577                             const AsmToken &NextToken) const {
2578 
2579   // A list of consecutive registers: [s0,s1,s2,s3]
2580   if (Token.is(AsmToken::LBrac))
2581     return true;
2582 
2583   if (!Token.is(AsmToken::Identifier))
2584     return false;
2585 
2586   // A single register like s0 or a range of registers like s[0:1]
2587 
2588   StringRef Str = Token.getString();
2589   const RegInfo *Reg = getRegularRegInfo(Str);
2590   if (Reg) {
2591     StringRef RegName = Reg->Name;
2592     StringRef RegSuffix = Str.substr(RegName.size());
2593     if (!RegSuffix.empty()) {
2594       unsigned Num;
2595       // A single register with an index: rXX
2596       if (getRegNum(RegSuffix, Num))
2597         return true;
2598     } else {
2599       // A range of registers: r[XX:YY].
2600       if (NextToken.is(AsmToken::LBrac))
2601         return true;
2602     }
2603   }
2604 
2605   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2606 }
2607 
2608 bool
2609 AMDGPUAsmParser::isRegister()
2610 {
2611   return isRegister(getToken(), peekToken());
2612 }
2613 
2614 unsigned
2615 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2616                                unsigned RegNum,
2617                                unsigned RegWidth,
2618                                SMLoc Loc) {
2619 
2620   assert(isRegularReg(RegKind));
2621 
2622   unsigned AlignSize = 1;
2623   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2624     // SGPR and TTMP registers must be aligned.
2625     // Max required alignment is 4 dwords.
2626     AlignSize = std::min(RegWidth / 32, 4u);
2627   }
2628 
2629   if (RegNum % AlignSize != 0) {
2630     Error(Loc, "invalid register alignment");
2631     return AMDGPU::NoRegister;
2632   }
2633 
2634   unsigned RegIdx = RegNum / AlignSize;
2635   int RCID = getRegClass(RegKind, RegWidth);
2636   if (RCID == -1) {
2637     Error(Loc, "invalid or unsupported register size");
2638     return AMDGPU::NoRegister;
2639   }
2640 
2641   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2642   const MCRegisterClass RC = TRI->getRegClass(RCID);
2643   if (RegIdx >= RC.getNumRegs()) {
2644     Error(Loc, "register index is out of range");
2645     return AMDGPU::NoRegister;
2646   }
2647 
2648   return RC.getRegister(RegIdx);
2649 }
2650 
2651 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2652   int64_t RegLo, RegHi;
2653   if (!skipToken(AsmToken::LBrac, "missing register index"))
2654     return false;
2655 
2656   SMLoc FirstIdxLoc = getLoc();
2657   SMLoc SecondIdxLoc;
2658 
2659   if (!parseExpr(RegLo))
2660     return false;
2661 
2662   if (trySkipToken(AsmToken::Colon)) {
2663     SecondIdxLoc = getLoc();
2664     if (!parseExpr(RegHi))
2665       return false;
2666   } else {
2667     RegHi = RegLo;
2668   }
2669 
2670   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2671     return false;
2672 
2673   if (!isUInt<32>(RegLo)) {
2674     Error(FirstIdxLoc, "invalid register index");
2675     return false;
2676   }
2677 
2678   if (!isUInt<32>(RegHi)) {
2679     Error(SecondIdxLoc, "invalid register index");
2680     return false;
2681   }
2682 
2683   if (RegLo > RegHi) {
2684     Error(FirstIdxLoc, "first register index should not exceed second index");
2685     return false;
2686   }
2687 
2688   Num = static_cast<unsigned>(RegLo);
2689   RegWidth = 32 * ((RegHi - RegLo) + 1);
2690   return true;
2691 }
2692 
2693 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2694                                           unsigned &RegNum, unsigned &RegWidth,
2695                                           SmallVectorImpl<AsmToken> &Tokens) {
2696   assert(isToken(AsmToken::Identifier));
2697   unsigned Reg = getSpecialRegForName(getTokenStr());
2698   if (Reg) {
2699     RegNum = 0;
2700     RegWidth = 32;
2701     RegKind = IS_SPECIAL;
2702     Tokens.push_back(getToken());
2703     lex(); // skip register name
2704   }
2705   return Reg;
2706 }
2707 
2708 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2709                                           unsigned &RegNum, unsigned &RegWidth,
2710                                           SmallVectorImpl<AsmToken> &Tokens) {
2711   assert(isToken(AsmToken::Identifier));
2712   StringRef RegName = getTokenStr();
2713   auto Loc = getLoc();
2714 
2715   const RegInfo *RI = getRegularRegInfo(RegName);
2716   if (!RI) {
2717     Error(Loc, "invalid register name");
2718     return AMDGPU::NoRegister;
2719   }
2720 
2721   Tokens.push_back(getToken());
2722   lex(); // skip register name
2723 
2724   RegKind = RI->Kind;
2725   StringRef RegSuffix = RegName.substr(RI->Name.size());
2726   if (!RegSuffix.empty()) {
2727     // Single 32-bit register: vXX.
2728     if (!getRegNum(RegSuffix, RegNum)) {
2729       Error(Loc, "invalid register index");
2730       return AMDGPU::NoRegister;
2731     }
2732     RegWidth = 32;
2733   } else {
2734     // Range of registers: v[XX:YY]. ":YY" is optional.
2735     if (!ParseRegRange(RegNum, RegWidth))
2736       return AMDGPU::NoRegister;
2737   }
2738 
2739   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2740 }
2741 
2742 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2743                                        unsigned &RegWidth,
2744                                        SmallVectorImpl<AsmToken> &Tokens) {
2745   unsigned Reg = AMDGPU::NoRegister;
2746   auto ListLoc = getLoc();
2747 
2748   if (!skipToken(AsmToken::LBrac,
2749                  "expected a register or a list of registers")) {
2750     return AMDGPU::NoRegister;
2751   }
2752 
2753   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2754 
2755   auto Loc = getLoc();
2756   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2757     return AMDGPU::NoRegister;
2758   if (RegWidth != 32) {
2759     Error(Loc, "expected a single 32-bit register");
2760     return AMDGPU::NoRegister;
2761   }
2762 
2763   for (; trySkipToken(AsmToken::Comma); ) {
2764     RegisterKind NextRegKind;
2765     unsigned NextReg, NextRegNum, NextRegWidth;
2766     Loc = getLoc();
2767 
2768     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2769                              NextRegNum, NextRegWidth,
2770                              Tokens)) {
2771       return AMDGPU::NoRegister;
2772     }
2773     if (NextRegWidth != 32) {
2774       Error(Loc, "expected a single 32-bit register");
2775       return AMDGPU::NoRegister;
2776     }
2777     if (NextRegKind != RegKind) {
2778       Error(Loc, "registers in a list must be of the same kind");
2779       return AMDGPU::NoRegister;
2780     }
2781     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2782       return AMDGPU::NoRegister;
2783   }
2784 
2785   if (!skipToken(AsmToken::RBrac,
2786                  "expected a comma or a closing square bracket")) {
2787     return AMDGPU::NoRegister;
2788   }
2789 
2790   if (isRegularReg(RegKind))
2791     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2792 
2793   return Reg;
2794 }
2795 
2796 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2797                                           unsigned &RegNum, unsigned &RegWidth,
2798                                           SmallVectorImpl<AsmToken> &Tokens) {
2799   auto Loc = getLoc();
2800   Reg = AMDGPU::NoRegister;
2801 
2802   if (isToken(AsmToken::Identifier)) {
2803     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2804     if (Reg == AMDGPU::NoRegister)
2805       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2806   } else {
2807     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2808   }
2809 
2810   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2811   if (Reg == AMDGPU::NoRegister) {
2812     assert(Parser.hasPendingError());
2813     return false;
2814   }
2815 
2816   if (!subtargetHasRegister(*TRI, Reg)) {
2817     if (Reg == AMDGPU::SGPR_NULL) {
2818       Error(Loc, "'null' operand is not supported on this GPU");
2819     } else {
2820       Error(Loc, "register not available on this GPU");
2821     }
2822     return false;
2823   }
2824 
2825   return true;
2826 }
2827 
2828 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2829                                           unsigned &RegNum, unsigned &RegWidth,
2830                                           bool RestoreOnFailure /*=false*/) {
2831   Reg = AMDGPU::NoRegister;
2832 
2833   SmallVector<AsmToken, 1> Tokens;
2834   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2835     if (RestoreOnFailure) {
2836       while (!Tokens.empty()) {
2837         getLexer().UnLex(Tokens.pop_back_val());
2838       }
2839     }
2840     return true;
2841   }
2842   return false;
2843 }
2844 
2845 Optional<StringRef>
2846 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2847   switch (RegKind) {
2848   case IS_VGPR:
2849     return StringRef(".amdgcn.next_free_vgpr");
2850   case IS_SGPR:
2851     return StringRef(".amdgcn.next_free_sgpr");
2852   default:
2853     return None;
2854   }
2855 }
2856 
2857 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2858   auto SymbolName = getGprCountSymbolName(RegKind);
2859   assert(SymbolName && "initializing invalid register kind");
2860   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2861   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2862 }
2863 
2864 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2865                                             unsigned DwordRegIndex,
2866                                             unsigned RegWidth) {
2867   // Symbols are only defined for GCN targets
2868   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2869     return true;
2870 
2871   auto SymbolName = getGprCountSymbolName(RegKind);
2872   if (!SymbolName)
2873     return true;
2874   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2875 
2876   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2877   int64_t OldCount;
2878 
2879   if (!Sym->isVariable())
2880     return !Error(getLoc(),
2881                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2882   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2883     return !Error(
2884         getLoc(),
2885         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2886 
2887   if (OldCount <= NewMax)
2888     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2889 
2890   return true;
2891 }
2892 
2893 std::unique_ptr<AMDGPUOperand>
2894 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2895   const auto &Tok = getToken();
2896   SMLoc StartLoc = Tok.getLoc();
2897   SMLoc EndLoc = Tok.getEndLoc();
2898   RegisterKind RegKind;
2899   unsigned Reg, RegNum, RegWidth;
2900 
2901   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2902     return nullptr;
2903   }
2904   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2905     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2906       return nullptr;
2907   } else
2908     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2909   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2910 }
2911 
2912 OperandMatchResultTy
2913 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2914   // TODO: add syntactic sugar for 1/(2*PI)
2915 
2916   if (isRegister())
2917     return MatchOperand_NoMatch;
2918   assert(!isModifier());
2919 
2920   const auto& Tok = getToken();
2921   const auto& NextTok = peekToken();
2922   bool IsReal = Tok.is(AsmToken::Real);
2923   SMLoc S = getLoc();
2924   bool Negate = false;
2925 
2926   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2927     lex();
2928     IsReal = true;
2929     Negate = true;
2930   }
2931 
2932   if (IsReal) {
2933     // Floating-point expressions are not supported.
2934     // Can only allow floating-point literals with an
2935     // optional sign.
2936 
2937     StringRef Num = getTokenStr();
2938     lex();
2939 
2940     APFloat RealVal(APFloat::IEEEdouble());
2941     auto roundMode = APFloat::rmNearestTiesToEven;
2942     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2943       return MatchOperand_ParseFail;
2944     }
2945     if (Negate)
2946       RealVal.changeSign();
2947 
2948     Operands.push_back(
2949       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2950                                AMDGPUOperand::ImmTyNone, true));
2951 
2952     return MatchOperand_Success;
2953 
2954   } else {
2955     int64_t IntVal;
2956     const MCExpr *Expr;
2957     SMLoc S = getLoc();
2958 
2959     if (HasSP3AbsModifier) {
2960       // This is a workaround for handling expressions
2961       // as arguments of SP3 'abs' modifier, for example:
2962       //     |1.0|
2963       //     |-1|
2964       //     |1+x|
2965       // This syntax is not compatible with syntax of standard
2966       // MC expressions (due to the trailing '|').
2967       SMLoc EndLoc;
2968       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2969         return MatchOperand_ParseFail;
2970     } else {
2971       if (Parser.parseExpression(Expr))
2972         return MatchOperand_ParseFail;
2973     }
2974 
2975     if (Expr->evaluateAsAbsolute(IntVal)) {
2976       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2977     } else {
2978       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2979     }
2980 
2981     return MatchOperand_Success;
2982   }
2983 
2984   return MatchOperand_NoMatch;
2985 }
2986 
2987 OperandMatchResultTy
2988 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2989   if (!isRegister())
2990     return MatchOperand_NoMatch;
2991 
2992   if (auto R = parseRegister()) {
2993     assert(R->isReg());
2994     Operands.push_back(std::move(R));
2995     return MatchOperand_Success;
2996   }
2997   return MatchOperand_ParseFail;
2998 }
2999 
3000 OperandMatchResultTy
3001 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
3002   auto res = parseReg(Operands);
3003   if (res != MatchOperand_NoMatch) {
3004     return res;
3005   } else if (isModifier()) {
3006     return MatchOperand_NoMatch;
3007   } else {
3008     return parseImm(Operands, HasSP3AbsMod);
3009   }
3010 }
3011 
3012 bool
3013 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3014   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3015     const auto &str = Token.getString();
3016     return str == "abs" || str == "neg" || str == "sext";
3017   }
3018   return false;
3019 }
3020 
3021 bool
3022 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3023   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3024 }
3025 
3026 bool
3027 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3028   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3029 }
3030 
3031 bool
3032 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3033   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3034 }
3035 
3036 // Check if this is an operand modifier or an opcode modifier
3037 // which may look like an expression but it is not. We should
3038 // avoid parsing these modifiers as expressions. Currently
3039 // recognized sequences are:
3040 //   |...|
3041 //   abs(...)
3042 //   neg(...)
3043 //   sext(...)
3044 //   -reg
3045 //   -|...|
3046 //   -abs(...)
3047 //   name:...
3048 // Note that simple opcode modifiers like 'gds' may be parsed as
3049 // expressions; this is a special case. See getExpressionAsToken.
3050 //
3051 bool
3052 AMDGPUAsmParser::isModifier() {
3053 
3054   AsmToken Tok = getToken();
3055   AsmToken NextToken[2];
3056   peekTokens(NextToken);
3057 
3058   return isOperandModifier(Tok, NextToken[0]) ||
3059          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3060          isOpcodeModifierWithVal(Tok, NextToken[0]);
3061 }
3062 
3063 // Check if the current token is an SP3 'neg' modifier.
3064 // Currently this modifier is allowed in the following context:
3065 //
3066 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3067 // 2. Before an 'abs' modifier: -abs(...)
3068 // 3. Before an SP3 'abs' modifier: -|...|
3069 //
3070 // In all other cases "-" is handled as a part
3071 // of an expression that follows the sign.
3072 //
3073 // Note: When "-" is followed by an integer literal,
3074 // this is interpreted as integer negation rather
3075 // than a floating-point NEG modifier applied to N.
3076 // Beside being contr-intuitive, such use of floating-point
3077 // NEG modifier would have resulted in different meaning
3078 // of integer literals used with VOP1/2/C and VOP3,
3079 // for example:
3080 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3081 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3082 // Negative fp literals with preceding "-" are
3083 // handled likewise for uniformity
3084 //
3085 bool
3086 AMDGPUAsmParser::parseSP3NegModifier() {
3087 
3088   AsmToken NextToken[2];
3089   peekTokens(NextToken);
3090 
3091   if (isToken(AsmToken::Minus) &&
3092       (isRegister(NextToken[0], NextToken[1]) ||
3093        NextToken[0].is(AsmToken::Pipe) ||
3094        isId(NextToken[0], "abs"))) {
3095     lex();
3096     return true;
3097   }
3098 
3099   return false;
3100 }
3101 
3102 OperandMatchResultTy
3103 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3104                                               bool AllowImm) {
3105   bool Neg, SP3Neg;
3106   bool Abs, SP3Abs;
3107   SMLoc Loc;
3108 
3109   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3110   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3111     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3112     return MatchOperand_ParseFail;
3113   }
3114 
3115   SP3Neg = parseSP3NegModifier();
3116 
3117   Loc = getLoc();
3118   Neg = trySkipId("neg");
3119   if (Neg && SP3Neg) {
3120     Error(Loc, "expected register or immediate");
3121     return MatchOperand_ParseFail;
3122   }
3123   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3124     return MatchOperand_ParseFail;
3125 
3126   Abs = trySkipId("abs");
3127   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3128     return MatchOperand_ParseFail;
3129 
3130   Loc = getLoc();
3131   SP3Abs = trySkipToken(AsmToken::Pipe);
3132   if (Abs && SP3Abs) {
3133     Error(Loc, "expected register or immediate");
3134     return MatchOperand_ParseFail;
3135   }
3136 
3137   OperandMatchResultTy Res;
3138   if (AllowImm) {
3139     Res = parseRegOrImm(Operands, SP3Abs);
3140   } else {
3141     Res = parseReg(Operands);
3142   }
3143   if (Res != MatchOperand_Success) {
3144     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3145   }
3146 
3147   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3148     return MatchOperand_ParseFail;
3149   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3150     return MatchOperand_ParseFail;
3151   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3152     return MatchOperand_ParseFail;
3153 
3154   AMDGPUOperand::Modifiers Mods;
3155   Mods.Abs = Abs || SP3Abs;
3156   Mods.Neg = Neg || SP3Neg;
3157 
3158   if (Mods.hasFPModifiers()) {
3159     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3160     if (Op.isExpr()) {
3161       Error(Op.getStartLoc(), "expected an absolute expression");
3162       return MatchOperand_ParseFail;
3163     }
3164     Op.setModifiers(Mods);
3165   }
3166   return MatchOperand_Success;
3167 }
3168 
3169 OperandMatchResultTy
3170 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3171                                                bool AllowImm) {
3172   bool Sext = trySkipId("sext");
3173   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3174     return MatchOperand_ParseFail;
3175 
3176   OperandMatchResultTy Res;
3177   if (AllowImm) {
3178     Res = parseRegOrImm(Operands);
3179   } else {
3180     Res = parseReg(Operands);
3181   }
3182   if (Res != MatchOperand_Success) {
3183     return Sext? MatchOperand_ParseFail : Res;
3184   }
3185 
3186   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3187     return MatchOperand_ParseFail;
3188 
3189   AMDGPUOperand::Modifiers Mods;
3190   Mods.Sext = Sext;
3191 
3192   if (Mods.hasIntModifiers()) {
3193     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3194     if (Op.isExpr()) {
3195       Error(Op.getStartLoc(), "expected an absolute expression");
3196       return MatchOperand_ParseFail;
3197     }
3198     Op.setModifiers(Mods);
3199   }
3200 
3201   return MatchOperand_Success;
3202 }
3203 
3204 OperandMatchResultTy
3205 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3206   return parseRegOrImmWithFPInputMods(Operands, false);
3207 }
3208 
3209 OperandMatchResultTy
3210 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3211   return parseRegOrImmWithIntInputMods(Operands, false);
3212 }
3213 
3214 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3215   auto Loc = getLoc();
3216   if (trySkipId("off")) {
3217     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3218                                                 AMDGPUOperand::ImmTyOff, false));
3219     return MatchOperand_Success;
3220   }
3221 
3222   if (!isRegister())
3223     return MatchOperand_NoMatch;
3224 
3225   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3226   if (Reg) {
3227     Operands.push_back(std::move(Reg));
3228     return MatchOperand_Success;
3229   }
3230 
3231   return MatchOperand_ParseFail;
3232 
3233 }
3234 
3235 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3236   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3237 
3238   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3239       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3240       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3241       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3242     return Match_InvalidOperand;
3243 
3244   if ((TSFlags & SIInstrFlags::VOP3) &&
3245       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3246       getForcedEncodingSize() != 64)
3247     return Match_PreferE32;
3248 
3249   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3250       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3251     // v_mac_f32/16 allow only dst_sel == DWORD;
3252     auto OpNum =
3253         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3254     const auto &Op = Inst.getOperand(OpNum);
3255     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3256       return Match_InvalidOperand;
3257     }
3258   }
3259 
3260   return Match_Success;
3261 }
3262 
3263 static ArrayRef<unsigned> getAllVariants() {
3264   static const unsigned Variants[] = {
3265     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3266     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3267     AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3268   };
3269 
3270   return makeArrayRef(Variants);
3271 }
3272 
3273 // What asm variants we should check
3274 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3275   if (isForcedDPP() && isForcedVOP3()) {
3276     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3277     return makeArrayRef(Variants);
3278   }
3279   if (getForcedEncodingSize() == 32) {
3280     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3281     return makeArrayRef(Variants);
3282   }
3283 
3284   if (isForcedVOP3()) {
3285     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3286     return makeArrayRef(Variants);
3287   }
3288 
3289   if (isForcedSDWA()) {
3290     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3291                                         AMDGPUAsmVariants::SDWA9};
3292     return makeArrayRef(Variants);
3293   }
3294 
3295   if (isForcedDPP()) {
3296     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3297     return makeArrayRef(Variants);
3298   }
3299 
3300   return getAllVariants();
3301 }
3302 
3303 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3304   if (isForcedDPP() && isForcedVOP3())
3305     return "e64_dpp";
3306 
3307   if (getForcedEncodingSize() == 32)
3308     return "e32";
3309 
3310   if (isForcedVOP3())
3311     return "e64";
3312 
3313   if (isForcedSDWA())
3314     return "sdwa";
3315 
3316   if (isForcedDPP())
3317     return "dpp";
3318 
3319   return "";
3320 }
3321 
3322 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3323   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3324   const unsigned Num = Desc.getNumImplicitUses();
3325   for (unsigned i = 0; i < Num; ++i) {
3326     unsigned Reg = Desc.ImplicitUses[i];
3327     switch (Reg) {
3328     case AMDGPU::FLAT_SCR:
3329     case AMDGPU::VCC:
3330     case AMDGPU::VCC_LO:
3331     case AMDGPU::VCC_HI:
3332     case AMDGPU::M0:
3333       return Reg;
3334     default:
3335       break;
3336     }
3337   }
3338   return AMDGPU::NoRegister;
3339 }
3340 
3341 // NB: This code is correct only when used to check constant
3342 // bus limitations because GFX7 support no f16 inline constants.
3343 // Note that there are no cases when a GFX7 opcode violates
3344 // constant bus limitations due to the use of an f16 constant.
3345 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3346                                        unsigned OpIdx) const {
3347   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3348 
3349   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3350     return false;
3351   }
3352 
3353   const MCOperand &MO = Inst.getOperand(OpIdx);
3354 
3355   int64_t Val = MO.getImm();
3356   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3357 
3358   switch (OpSize) { // expected operand size
3359   case 8:
3360     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3361   case 4:
3362     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3363   case 2: {
3364     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3365     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3366         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3367         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3368       return AMDGPU::isInlinableIntLiteral(Val);
3369 
3370     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3371         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3372         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3373       return AMDGPU::isInlinableIntLiteralV216(Val);
3374 
3375     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3376         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3377         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3378       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3379 
3380     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3381   }
3382   default:
3383     llvm_unreachable("invalid operand size");
3384   }
3385 }
3386 
3387 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3388   if (!isGFX10Plus())
3389     return 1;
3390 
3391   switch (Opcode) {
3392   // 64-bit shift instructions can use only one scalar value input
3393   case AMDGPU::V_LSHLREV_B64_e64:
3394   case AMDGPU::V_LSHLREV_B64_gfx10:
3395   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3396   case AMDGPU::V_LSHRREV_B64_e64:
3397   case AMDGPU::V_LSHRREV_B64_gfx10:
3398   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3399   case AMDGPU::V_ASHRREV_I64_e64:
3400   case AMDGPU::V_ASHRREV_I64_gfx10:
3401   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3402   case AMDGPU::V_LSHL_B64_e64:
3403   case AMDGPU::V_LSHR_B64_e64:
3404   case AMDGPU::V_ASHR_I64_e64:
3405     return 1;
3406   default:
3407     return 2;
3408   }
3409 }
3410 
3411 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3412   const MCOperand &MO = Inst.getOperand(OpIdx);
3413   if (MO.isImm()) {
3414     return !isInlineConstant(Inst, OpIdx);
3415   } else if (MO.isReg()) {
3416     auto Reg = MO.getReg();
3417     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3418     auto PReg = mc2PseudoReg(Reg);
3419     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3420   } else {
3421     return true;
3422   }
3423 }
3424 
3425 bool
3426 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3427                                                 const OperandVector &Operands) {
3428   const unsigned Opcode = Inst.getOpcode();
3429   const MCInstrDesc &Desc = MII.get(Opcode);
3430   unsigned LastSGPR = AMDGPU::NoRegister;
3431   unsigned ConstantBusUseCount = 0;
3432   unsigned NumLiterals = 0;
3433   unsigned LiteralSize;
3434 
3435   if (Desc.TSFlags &
3436       (SIInstrFlags::VOPC |
3437        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3438        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3439        SIInstrFlags::SDWA)) {
3440     // Check special imm operands (used by madmk, etc)
3441     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3442       ++NumLiterals;
3443       LiteralSize = 4;
3444     }
3445 
3446     SmallDenseSet<unsigned> SGPRsUsed;
3447     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3448     if (SGPRUsed != AMDGPU::NoRegister) {
3449       SGPRsUsed.insert(SGPRUsed);
3450       ++ConstantBusUseCount;
3451     }
3452 
3453     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3454     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3455     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3456 
3457     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3458 
3459     for (int OpIdx : OpIndices) {
3460       if (OpIdx == -1) break;
3461 
3462       const MCOperand &MO = Inst.getOperand(OpIdx);
3463       if (usesConstantBus(Inst, OpIdx)) {
3464         if (MO.isReg()) {
3465           LastSGPR = mc2PseudoReg(MO.getReg());
3466           // Pairs of registers with a partial intersections like these
3467           //   s0, s[0:1]
3468           //   flat_scratch_lo, flat_scratch
3469           //   flat_scratch_lo, flat_scratch_hi
3470           // are theoretically valid but they are disabled anyway.
3471           // Note that this code mimics SIInstrInfo::verifyInstruction
3472           if (SGPRsUsed.insert(LastSGPR).second) {
3473             ++ConstantBusUseCount;
3474           }
3475         } else { // Expression or a literal
3476 
3477           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3478             continue; // special operand like VINTERP attr_chan
3479 
3480           // An instruction may use only one literal.
3481           // This has been validated on the previous step.
3482           // See validateVOPLiteral.
3483           // This literal may be used as more than one operand.
3484           // If all these operands are of the same size,
3485           // this literal counts as one scalar value.
3486           // Otherwise it counts as 2 scalar values.
3487           // See "GFX10 Shader Programming", section 3.6.2.3.
3488 
3489           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3490           if (Size < 4) Size = 4;
3491 
3492           if (NumLiterals == 0) {
3493             NumLiterals = 1;
3494             LiteralSize = Size;
3495           } else if (LiteralSize != Size) {
3496             NumLiterals = 2;
3497           }
3498         }
3499       }
3500     }
3501   }
3502   ConstantBusUseCount += NumLiterals;
3503 
3504   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3505     return true;
3506 
3507   SMLoc LitLoc = getLitLoc(Operands);
3508   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3509   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3510   Error(Loc, "invalid operand (violates constant bus restrictions)");
3511   return false;
3512 }
3513 
3514 bool
3515 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3516                                                  const OperandVector &Operands) {
3517   const unsigned Opcode = Inst.getOpcode();
3518   const MCInstrDesc &Desc = MII.get(Opcode);
3519 
3520   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3521   if (DstIdx == -1 ||
3522       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3523     return true;
3524   }
3525 
3526   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3527 
3528   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3529   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3530   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3531 
3532   assert(DstIdx != -1);
3533   const MCOperand &Dst = Inst.getOperand(DstIdx);
3534   assert(Dst.isReg());
3535 
3536   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3537 
3538   for (int SrcIdx : SrcIndices) {
3539     if (SrcIdx == -1) break;
3540     const MCOperand &Src = Inst.getOperand(SrcIdx);
3541     if (Src.isReg()) {
3542       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3543         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3544         Error(getRegLoc(SrcReg, Operands),
3545           "destination must be different than all sources");
3546         return false;
3547       }
3548     }
3549   }
3550 
3551   return true;
3552 }
3553 
3554 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3555 
3556   const unsigned Opc = Inst.getOpcode();
3557   const MCInstrDesc &Desc = MII.get(Opc);
3558 
3559   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3560     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3561     assert(ClampIdx != -1);
3562     return Inst.getOperand(ClampIdx).getImm() == 0;
3563   }
3564 
3565   return true;
3566 }
3567 
3568 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3569 
3570   const unsigned Opc = Inst.getOpcode();
3571   const MCInstrDesc &Desc = MII.get(Opc);
3572 
3573   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3574     return None;
3575 
3576   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3577   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3578   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3579 
3580   assert(VDataIdx != -1);
3581 
3582   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3583     return None;
3584 
3585   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3586   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3587   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3588   if (DMask == 0)
3589     DMask = 1;
3590 
3591   bool isPackedD16 = false;
3592   unsigned DataSize =
3593     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3594   if (hasPackedD16()) {
3595     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3596     isPackedD16 = D16Idx >= 0;
3597     if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3598       DataSize = (DataSize + 1) / 2;
3599   }
3600 
3601   if ((VDataSize / 4) == DataSize + TFESize)
3602     return None;
3603 
3604   return StringRef(isPackedD16
3605                        ? "image data size does not match dmask, d16 and tfe"
3606                        : "image data size does not match dmask and tfe");
3607 }
3608 
3609 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3610   const unsigned Opc = Inst.getOpcode();
3611   const MCInstrDesc &Desc = MII.get(Opc);
3612 
3613   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3614     return true;
3615 
3616   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3617 
3618   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3619       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3620   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3621   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3622   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3623   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3624 
3625   assert(VAddr0Idx != -1);
3626   assert(SrsrcIdx != -1);
3627   assert(SrsrcIdx > VAddr0Idx);
3628 
3629   if (DimIdx == -1)
3630     return true; // intersect_ray
3631 
3632   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3633   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3634   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3635   unsigned ActualAddrSize =
3636       IsNSA ? SrsrcIdx - VAddr0Idx
3637             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3638   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3639 
3640   unsigned ExpectedAddrSize =
3641       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3642 
3643   if (!IsNSA) {
3644     if (ExpectedAddrSize > 8)
3645       ExpectedAddrSize = 16;
3646 
3647     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3648     // This provides backward compatibility for assembly created
3649     // before 160b/192b/224b types were directly supported.
3650     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3651       return true;
3652   }
3653 
3654   return ActualAddrSize == ExpectedAddrSize;
3655 }
3656 
3657 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3658 
3659   const unsigned Opc = Inst.getOpcode();
3660   const MCInstrDesc &Desc = MII.get(Opc);
3661 
3662   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3663     return true;
3664   if (!Desc.mayLoad() || !Desc.mayStore())
3665     return true; // Not atomic
3666 
3667   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3668   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3669 
3670   // This is an incomplete check because image_atomic_cmpswap
3671   // may only use 0x3 and 0xf while other atomic operations
3672   // may use 0x1 and 0x3. However these limitations are
3673   // verified when we check that dmask matches dst size.
3674   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3675 }
3676 
3677 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3678 
3679   const unsigned Opc = Inst.getOpcode();
3680   const MCInstrDesc &Desc = MII.get(Opc);
3681 
3682   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3683     return true;
3684 
3685   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3686   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3687 
3688   // GATHER4 instructions use dmask in a different fashion compared to
3689   // other MIMG instructions. The only useful DMASK values are
3690   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3691   // (red,red,red,red) etc.) The ISA document doesn't mention
3692   // this.
3693   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3694 }
3695 
3696 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3697   const unsigned Opc = Inst.getOpcode();
3698   const MCInstrDesc &Desc = MII.get(Opc);
3699 
3700   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3701     return true;
3702 
3703   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3704   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3705       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3706 
3707   if (!BaseOpcode->MSAA)
3708     return true;
3709 
3710   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3711   assert(DimIdx != -1);
3712 
3713   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3714   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3715 
3716   return DimInfo->MSAA;
3717 }
3718 
3719 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3720 {
3721   switch (Opcode) {
3722   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3723   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3724   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3725     return true;
3726   default:
3727     return false;
3728   }
3729 }
3730 
3731 // movrels* opcodes should only allow VGPRS as src0.
3732 // This is specified in .td description for vop1/vop3,
3733 // but sdwa is handled differently. See isSDWAOperand.
3734 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3735                                       const OperandVector &Operands) {
3736 
3737   const unsigned Opc = Inst.getOpcode();
3738   const MCInstrDesc &Desc = MII.get(Opc);
3739 
3740   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3741     return true;
3742 
3743   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3744   assert(Src0Idx != -1);
3745 
3746   SMLoc ErrLoc;
3747   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3748   if (Src0.isReg()) {
3749     auto Reg = mc2PseudoReg(Src0.getReg());
3750     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3751     if (!isSGPR(Reg, TRI))
3752       return true;
3753     ErrLoc = getRegLoc(Reg, Operands);
3754   } else {
3755     ErrLoc = getConstLoc(Operands);
3756   }
3757 
3758   Error(ErrLoc, "source operand must be a VGPR");
3759   return false;
3760 }
3761 
3762 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3763                                           const OperandVector &Operands) {
3764 
3765   const unsigned Opc = Inst.getOpcode();
3766 
3767   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3768     return true;
3769 
3770   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3771   assert(Src0Idx != -1);
3772 
3773   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3774   if (!Src0.isReg())
3775     return true;
3776 
3777   auto Reg = mc2PseudoReg(Src0.getReg());
3778   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3779   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3780     Error(getRegLoc(Reg, Operands),
3781           "source operand must be either a VGPR or an inline constant");
3782     return false;
3783   }
3784 
3785   return true;
3786 }
3787 
3788 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3789                                    const OperandVector &Operands) {
3790   const unsigned Opc = Inst.getOpcode();
3791   const MCInstrDesc &Desc = MII.get(Opc);
3792 
3793   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3794     return true;
3795 
3796   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3797   if (Src2Idx == -1)
3798     return true;
3799 
3800   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3801   if (!Src2.isReg())
3802     return true;
3803 
3804   MCRegister Src2Reg = Src2.getReg();
3805   MCRegister DstReg = Inst.getOperand(0).getReg();
3806   if (Src2Reg == DstReg)
3807     return true;
3808 
3809   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3810   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3811     return true;
3812 
3813   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3814     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3815           "source 2 operand must not partially overlap with dst");
3816     return false;
3817   }
3818 
3819   return true;
3820 }
3821 
3822 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3823   switch (Inst.getOpcode()) {
3824   default:
3825     return true;
3826   case V_DIV_SCALE_F32_gfx6_gfx7:
3827   case V_DIV_SCALE_F32_vi:
3828   case V_DIV_SCALE_F32_gfx10:
3829   case V_DIV_SCALE_F64_gfx6_gfx7:
3830   case V_DIV_SCALE_F64_vi:
3831   case V_DIV_SCALE_F64_gfx10:
3832     break;
3833   }
3834 
3835   // TODO: Check that src0 = src1 or src2.
3836 
3837   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3838                     AMDGPU::OpName::src2_modifiers,
3839                     AMDGPU::OpName::src2_modifiers}) {
3840     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3841             .getImm() &
3842         SISrcMods::ABS) {
3843       return false;
3844     }
3845   }
3846 
3847   return true;
3848 }
3849 
3850 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3851 
3852   const unsigned Opc = Inst.getOpcode();
3853   const MCInstrDesc &Desc = MII.get(Opc);
3854 
3855   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3856     return true;
3857 
3858   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3859   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3860     if (isCI() || isSI())
3861       return false;
3862   }
3863 
3864   return true;
3865 }
3866 
3867 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3868   const unsigned Opc = Inst.getOpcode();
3869   const MCInstrDesc &Desc = MII.get(Opc);
3870 
3871   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3872     return true;
3873 
3874   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3875   if (DimIdx < 0)
3876     return true;
3877 
3878   long Imm = Inst.getOperand(DimIdx).getImm();
3879   if (Imm < 0 || Imm >= 8)
3880     return false;
3881 
3882   return true;
3883 }
3884 
3885 static bool IsRevOpcode(const unsigned Opcode)
3886 {
3887   switch (Opcode) {
3888   case AMDGPU::V_SUBREV_F32_e32:
3889   case AMDGPU::V_SUBREV_F32_e64:
3890   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3891   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3892   case AMDGPU::V_SUBREV_F32_e32_vi:
3893   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3894   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3895   case AMDGPU::V_SUBREV_F32_e64_vi:
3896 
3897   case AMDGPU::V_SUBREV_CO_U32_e32:
3898   case AMDGPU::V_SUBREV_CO_U32_e64:
3899   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3900   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3901 
3902   case AMDGPU::V_SUBBREV_U32_e32:
3903   case AMDGPU::V_SUBBREV_U32_e64:
3904   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3905   case AMDGPU::V_SUBBREV_U32_e32_vi:
3906   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3907   case AMDGPU::V_SUBBREV_U32_e64_vi:
3908 
3909   case AMDGPU::V_SUBREV_U32_e32:
3910   case AMDGPU::V_SUBREV_U32_e64:
3911   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3912   case AMDGPU::V_SUBREV_U32_e32_vi:
3913   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3914   case AMDGPU::V_SUBREV_U32_e64_vi:
3915 
3916   case AMDGPU::V_SUBREV_F16_e32:
3917   case AMDGPU::V_SUBREV_F16_e64:
3918   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3919   case AMDGPU::V_SUBREV_F16_e32_vi:
3920   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3921   case AMDGPU::V_SUBREV_F16_e64_vi:
3922 
3923   case AMDGPU::V_SUBREV_U16_e32:
3924   case AMDGPU::V_SUBREV_U16_e64:
3925   case AMDGPU::V_SUBREV_U16_e32_vi:
3926   case AMDGPU::V_SUBREV_U16_e64_vi:
3927 
3928   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3929   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3930   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3931 
3932   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3933   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3934 
3935   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3936   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3937 
3938   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3939   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3940 
3941   case AMDGPU::V_LSHRREV_B32_e32:
3942   case AMDGPU::V_LSHRREV_B32_e64:
3943   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3944   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3945   case AMDGPU::V_LSHRREV_B32_e32_vi:
3946   case AMDGPU::V_LSHRREV_B32_e64_vi:
3947   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3948   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3949 
3950   case AMDGPU::V_ASHRREV_I32_e32:
3951   case AMDGPU::V_ASHRREV_I32_e64:
3952   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3953   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3954   case AMDGPU::V_ASHRREV_I32_e32_vi:
3955   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3956   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3957   case AMDGPU::V_ASHRREV_I32_e64_vi:
3958 
3959   case AMDGPU::V_LSHLREV_B32_e32:
3960   case AMDGPU::V_LSHLREV_B32_e64:
3961   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3962   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3963   case AMDGPU::V_LSHLREV_B32_e32_vi:
3964   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3965   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3966   case AMDGPU::V_LSHLREV_B32_e64_vi:
3967 
3968   case AMDGPU::V_LSHLREV_B16_e32:
3969   case AMDGPU::V_LSHLREV_B16_e64:
3970   case AMDGPU::V_LSHLREV_B16_e32_vi:
3971   case AMDGPU::V_LSHLREV_B16_e64_vi:
3972   case AMDGPU::V_LSHLREV_B16_gfx10:
3973 
3974   case AMDGPU::V_LSHRREV_B16_e32:
3975   case AMDGPU::V_LSHRREV_B16_e64:
3976   case AMDGPU::V_LSHRREV_B16_e32_vi:
3977   case AMDGPU::V_LSHRREV_B16_e64_vi:
3978   case AMDGPU::V_LSHRREV_B16_gfx10:
3979 
3980   case AMDGPU::V_ASHRREV_I16_e32:
3981   case AMDGPU::V_ASHRREV_I16_e64:
3982   case AMDGPU::V_ASHRREV_I16_e32_vi:
3983   case AMDGPU::V_ASHRREV_I16_e64_vi:
3984   case AMDGPU::V_ASHRREV_I16_gfx10:
3985 
3986   case AMDGPU::V_LSHLREV_B64_e64:
3987   case AMDGPU::V_LSHLREV_B64_gfx10:
3988   case AMDGPU::V_LSHLREV_B64_vi:
3989 
3990   case AMDGPU::V_LSHRREV_B64_e64:
3991   case AMDGPU::V_LSHRREV_B64_gfx10:
3992   case AMDGPU::V_LSHRREV_B64_vi:
3993 
3994   case AMDGPU::V_ASHRREV_I64_e64:
3995   case AMDGPU::V_ASHRREV_I64_gfx10:
3996   case AMDGPU::V_ASHRREV_I64_vi:
3997 
3998   case AMDGPU::V_PK_LSHLREV_B16:
3999   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4000   case AMDGPU::V_PK_LSHLREV_B16_vi:
4001 
4002   case AMDGPU::V_PK_LSHRREV_B16:
4003   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4004   case AMDGPU::V_PK_LSHRREV_B16_vi:
4005   case AMDGPU::V_PK_ASHRREV_I16:
4006   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4007   case AMDGPU::V_PK_ASHRREV_I16_vi:
4008     return true;
4009   default:
4010     return false;
4011   }
4012 }
4013 
4014 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4015 
4016   using namespace SIInstrFlags;
4017   const unsigned Opcode = Inst.getOpcode();
4018   const MCInstrDesc &Desc = MII.get(Opcode);
4019 
4020   // lds_direct register is defined so that it can be used
4021   // with 9-bit operands only. Ignore encodings which do not accept these.
4022   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4023   if ((Desc.TSFlags & Enc) == 0)
4024     return None;
4025 
4026   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4027     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4028     if (SrcIdx == -1)
4029       break;
4030     const auto &Src = Inst.getOperand(SrcIdx);
4031     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4032 
4033       if (isGFX90A() || isGFX11Plus())
4034         return StringRef("lds_direct is not supported on this GPU");
4035 
4036       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4037         return StringRef("lds_direct cannot be used with this instruction");
4038 
4039       if (SrcName != OpName::src0)
4040         return StringRef("lds_direct may be used as src0 only");
4041     }
4042   }
4043 
4044   return None;
4045 }
4046 
4047 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4048   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4049     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4050     if (Op.isFlatOffset())
4051       return Op.getStartLoc();
4052   }
4053   return getLoc();
4054 }
4055 
4056 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4057                                          const OperandVector &Operands) {
4058   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4059   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4060     return true;
4061 
4062   auto Opcode = Inst.getOpcode();
4063   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4064   assert(OpNum != -1);
4065 
4066   const auto &Op = Inst.getOperand(OpNum);
4067   if (!hasFlatOffsets() && Op.getImm() != 0) {
4068     Error(getFlatOffsetLoc(Operands),
4069           "flat offset modifier is not supported on this GPU");
4070     return false;
4071   }
4072 
4073   // For FLAT segment the offset must be positive;
4074   // MSB is ignored and forced to zero.
4075   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
4076     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4077     if (!isIntN(OffsetSize, Op.getImm())) {
4078       Error(getFlatOffsetLoc(Operands),
4079             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4080       return false;
4081     }
4082   } else {
4083     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4084     if (!isUIntN(OffsetSize, Op.getImm())) {
4085       Error(getFlatOffsetLoc(Operands),
4086             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4087       return false;
4088     }
4089   }
4090 
4091   return true;
4092 }
4093 
4094 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4095   // Start with second operand because SMEM Offset cannot be dst or src0.
4096   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4097     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4098     if (Op.isSMEMOffset())
4099       return Op.getStartLoc();
4100   }
4101   return getLoc();
4102 }
4103 
4104 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4105                                          const OperandVector &Operands) {
4106   if (isCI() || isSI())
4107     return true;
4108 
4109   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4110   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4111     return true;
4112 
4113   auto Opcode = Inst.getOpcode();
4114   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4115   if (OpNum == -1)
4116     return true;
4117 
4118   const auto &Op = Inst.getOperand(OpNum);
4119   if (!Op.isImm())
4120     return true;
4121 
4122   uint64_t Offset = Op.getImm();
4123   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4124   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4125       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4126     return true;
4127 
4128   Error(getSMEMOffsetLoc(Operands),
4129         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4130                                "expected a 21-bit signed offset");
4131 
4132   return false;
4133 }
4134 
4135 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4136   unsigned Opcode = Inst.getOpcode();
4137   const MCInstrDesc &Desc = MII.get(Opcode);
4138   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4139     return true;
4140 
4141   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4142   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4143 
4144   const int OpIndices[] = { Src0Idx, Src1Idx };
4145 
4146   unsigned NumExprs = 0;
4147   unsigned NumLiterals = 0;
4148   uint32_t LiteralValue;
4149 
4150   for (int OpIdx : OpIndices) {
4151     if (OpIdx == -1) break;
4152 
4153     const MCOperand &MO = Inst.getOperand(OpIdx);
4154     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4155     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4156       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4157         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4158         if (NumLiterals == 0 || LiteralValue != Value) {
4159           LiteralValue = Value;
4160           ++NumLiterals;
4161         }
4162       } else if (MO.isExpr()) {
4163         ++NumExprs;
4164       }
4165     }
4166   }
4167 
4168   return NumLiterals + NumExprs <= 1;
4169 }
4170 
4171 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4172   const unsigned Opc = Inst.getOpcode();
4173   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4174       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4175     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4176     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4177 
4178     if (OpSel & ~3)
4179       return false;
4180   }
4181 
4182   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4183     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4184     if (OpSelIdx != -1) {
4185       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4186         return false;
4187     }
4188     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4189     if (OpSelHiIdx != -1) {
4190       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4191         return false;
4192     }
4193   }
4194 
4195   return true;
4196 }
4197 
4198 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4199                                   const OperandVector &Operands) {
4200   const unsigned Opc = Inst.getOpcode();
4201   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4202   if (DppCtrlIdx < 0)
4203     return true;
4204   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4205 
4206   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4207     // DPP64 is supported for row_newbcast only.
4208     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4209     if (Src0Idx >= 0 &&
4210         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4211       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4212       Error(S, "64 bit dpp only supports row_newbcast");
4213       return false;
4214     }
4215   }
4216 
4217   return true;
4218 }
4219 
4220 // Check if VCC register matches wavefront size
4221 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4222   auto FB = getFeatureBits();
4223   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4224     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4225 }
4226 
4227 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4228 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4229                                          const OperandVector &Operands) {
4230   unsigned Opcode = Inst.getOpcode();
4231   const MCInstrDesc &Desc = MII.get(Opcode);
4232   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4233   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4234       ImmIdx == -1)
4235     return true;
4236 
4237   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4238   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4239   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4240 
4241   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4242 
4243   unsigned NumExprs = 0;
4244   unsigned NumLiterals = 0;
4245   uint32_t LiteralValue;
4246 
4247   for (int OpIdx : OpIndices) {
4248     if (OpIdx == -1)
4249       continue;
4250 
4251     const MCOperand &MO = Inst.getOperand(OpIdx);
4252     if (!MO.isImm() && !MO.isExpr())
4253       continue;
4254     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4255       continue;
4256 
4257     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4258         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4259       Error(getConstLoc(Operands),
4260             "inline constants are not allowed for this operand");
4261       return false;
4262     }
4263 
4264     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4265       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4266       if (NumLiterals == 0 || LiteralValue != Value) {
4267         LiteralValue = Value;
4268         ++NumLiterals;
4269       }
4270     } else if (MO.isExpr()) {
4271       ++NumExprs;
4272     }
4273   }
4274   NumLiterals += NumExprs;
4275 
4276   if (!NumLiterals)
4277     return true;
4278 
4279   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4280     Error(getLitLoc(Operands), "literal operands are not supported");
4281     return false;
4282   }
4283 
4284   if (NumLiterals > 1) {
4285     Error(getLitLoc(Operands), "only one literal operand is allowed");
4286     return false;
4287   }
4288 
4289   return true;
4290 }
4291 
4292 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4293 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4294                          const MCRegisterInfo *MRI) {
4295   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4296   if (OpIdx < 0)
4297     return -1;
4298 
4299   const MCOperand &Op = Inst.getOperand(OpIdx);
4300   if (!Op.isReg())
4301     return -1;
4302 
4303   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4304   auto Reg = Sub ? Sub : Op.getReg();
4305   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4306   return AGPR32.contains(Reg) ? 1 : 0;
4307 }
4308 
4309 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4310   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4311   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4312                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4313                   SIInstrFlags::DS)) == 0)
4314     return true;
4315 
4316   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4317                                                       : AMDGPU::OpName::vdata;
4318 
4319   const MCRegisterInfo *MRI = getMRI();
4320   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4321   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4322 
4323   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4324     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4325     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4326       return false;
4327   }
4328 
4329   auto FB = getFeatureBits();
4330   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4331     if (DataAreg < 0 || DstAreg < 0)
4332       return true;
4333     return DstAreg == DataAreg;
4334   }
4335 
4336   return DstAreg < 1 && DataAreg < 1;
4337 }
4338 
4339 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4340   auto FB = getFeatureBits();
4341   if (!FB[AMDGPU::FeatureGFX90AInsts])
4342     return true;
4343 
4344   const MCRegisterInfo *MRI = getMRI();
4345   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4346   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4347   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4348     const MCOperand &Op = Inst.getOperand(I);
4349     if (!Op.isReg())
4350       continue;
4351 
4352     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4353     if (!Sub)
4354       continue;
4355 
4356     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4357       return false;
4358     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4359       return false;
4360   }
4361 
4362   return true;
4363 }
4364 
4365 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4366   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4367     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4368     if (Op.isBLGP())
4369       return Op.getStartLoc();
4370   }
4371   return SMLoc();
4372 }
4373 
4374 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4375                                    const OperandVector &Operands) {
4376   unsigned Opc = Inst.getOpcode();
4377   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4378   if (BlgpIdx == -1)
4379     return true;
4380   SMLoc BLGPLoc = getBLGPLoc(Operands);
4381   if (!BLGPLoc.isValid())
4382     return true;
4383   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4384   auto FB = getFeatureBits();
4385   bool UsesNeg = false;
4386   if (FB[AMDGPU::FeatureGFX940Insts]) {
4387     switch (Opc) {
4388     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4389     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4390     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4391     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4392       UsesNeg = true;
4393     }
4394   }
4395 
4396   if (IsNeg == UsesNeg)
4397     return true;
4398 
4399   Error(BLGPLoc,
4400         UsesNeg ? "invalid modifier: blgp is not supported"
4401                 : "invalid modifier: neg is not supported");
4402 
4403   return false;
4404 }
4405 
4406 // gfx90a has an undocumented limitation:
4407 // DS_GWS opcodes must use even aligned registers.
4408 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4409                                   const OperandVector &Operands) {
4410   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4411     return true;
4412 
4413   int Opc = Inst.getOpcode();
4414   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4415       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4416     return true;
4417 
4418   const MCRegisterInfo *MRI = getMRI();
4419   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4420   int Data0Pos =
4421       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4422   assert(Data0Pos != -1);
4423   auto Reg = Inst.getOperand(Data0Pos).getReg();
4424   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4425   if (RegIdx & 1) {
4426     SMLoc RegLoc = getRegLoc(Reg, Operands);
4427     Error(RegLoc, "vgpr must be even aligned");
4428     return false;
4429   }
4430 
4431   return true;
4432 }
4433 
4434 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4435                                             const OperandVector &Operands,
4436                                             const SMLoc &IDLoc) {
4437   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4438                                            AMDGPU::OpName::cpol);
4439   if (CPolPos == -1)
4440     return true;
4441 
4442   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4443 
4444   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4445   if (TSFlags & SIInstrFlags::SMRD) {
4446     if (CPol && (isSI() || isCI())) {
4447       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4448       Error(S, "cache policy is not supported for SMRD instructions");
4449       return false;
4450     }
4451     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4452       Error(IDLoc, "invalid cache policy for SMEM instruction");
4453       return false;
4454     }
4455   }
4456 
4457   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4458     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4459     StringRef CStr(S.getPointer());
4460     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4461     Error(S, "scc is not supported on this GPU");
4462     return false;
4463   }
4464 
4465   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4466     return true;
4467 
4468   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4469     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4470       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4471                               : "instruction must use glc");
4472       return false;
4473     }
4474   } else {
4475     if (CPol & CPol::GLC) {
4476       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4477       StringRef CStr(S.getPointer());
4478       S = SMLoc::getFromPointer(
4479           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4480       Error(S, isGFX940() ? "instruction must not use sc0"
4481                           : "instruction must not use glc");
4482       return false;
4483     }
4484   }
4485 
4486   return true;
4487 }
4488 
4489 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst,
4490                                          const OperandVector &Operands,
4491                                          const SMLoc &IDLoc) {
4492   if (isGFX940())
4493     return true;
4494 
4495   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4496   if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) !=
4497       (SIInstrFlags::VALU | SIInstrFlags::FLAT))
4498     return true;
4499   // This is FLAT LDS DMA.
4500 
4501   SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands);
4502   StringRef CStr(S.getPointer());
4503   if (!CStr.startswith("lds")) {
4504     // This is incorrectly selected LDS DMA version of a FLAT load opcode.
4505     // And LDS version should have 'lds' modifier, but it follows optional
4506     // operands so its absense is ignored by the matcher.
4507     Error(IDLoc, "invalid operands for instruction");
4508     return false;
4509   }
4510 
4511   return true;
4512 }
4513 
4514 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4515   if (!isGFX11Plus())
4516     return true;
4517   for (auto &Operand : Operands) {
4518     if (!Operand->isReg())
4519       continue;
4520     unsigned Reg = Operand->getReg();
4521     if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4522       Error(getRegLoc(Reg, Operands),
4523             "execz and vccz are not supported on this GPU");
4524       return false;
4525     }
4526   }
4527   return true;
4528 }
4529 
4530 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4531                                           const SMLoc &IDLoc,
4532                                           const OperandVector &Operands) {
4533   if (auto ErrMsg = validateLdsDirect(Inst)) {
4534     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4535     return false;
4536   }
4537   if (!validateSOPLiteral(Inst)) {
4538     Error(getLitLoc(Operands),
4539       "only one literal operand is allowed");
4540     return false;
4541   }
4542   if (!validateVOPLiteral(Inst, Operands)) {
4543     return false;
4544   }
4545   if (!validateConstantBusLimitations(Inst, Operands)) {
4546     return false;
4547   }
4548   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4549     return false;
4550   }
4551   if (!validateIntClampSupported(Inst)) {
4552     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4553       "integer clamping is not supported on this GPU");
4554     return false;
4555   }
4556   if (!validateOpSel(Inst)) {
4557     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4558       "invalid op_sel operand");
4559     return false;
4560   }
4561   if (!validateDPP(Inst, Operands)) {
4562     return false;
4563   }
4564   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4565   if (!validateMIMGD16(Inst)) {
4566     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4567       "d16 modifier is not supported on this GPU");
4568     return false;
4569   }
4570   if (!validateMIMGDim(Inst)) {
4571     Error(IDLoc, "dim modifier is required on this GPU");
4572     return false;
4573   }
4574   if (!validateMIMGMSAA(Inst)) {
4575     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4576           "invalid dim; must be MSAA type");
4577     return false;
4578   }
4579   if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4580     Error(IDLoc, *ErrMsg);
4581     return false;
4582   }
4583   if (!validateMIMGAddrSize(Inst)) {
4584     Error(IDLoc,
4585       "image address size does not match dim and a16");
4586     return false;
4587   }
4588   if (!validateMIMGAtomicDMask(Inst)) {
4589     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4590       "invalid atomic image dmask");
4591     return false;
4592   }
4593   if (!validateMIMGGatherDMask(Inst)) {
4594     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4595       "invalid image_gather dmask: only one bit must be set");
4596     return false;
4597   }
4598   if (!validateMovrels(Inst, Operands)) {
4599     return false;
4600   }
4601   if (!validateFlatOffset(Inst, Operands)) {
4602     return false;
4603   }
4604   if (!validateSMEMOffset(Inst, Operands)) {
4605     return false;
4606   }
4607   if (!validateMAIAccWrite(Inst, Operands)) {
4608     return false;
4609   }
4610   if (!validateMFMA(Inst, Operands)) {
4611     return false;
4612   }
4613   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4614     return false;
4615   }
4616 
4617   if (!validateAGPRLdSt(Inst)) {
4618     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4619     ? "invalid register class: data and dst should be all VGPR or AGPR"
4620     : "invalid register class: agpr loads and stores not supported on this GPU"
4621     );
4622     return false;
4623   }
4624   if (!validateVGPRAlign(Inst)) {
4625     Error(IDLoc,
4626       "invalid register class: vgpr tuples must be 64 bit aligned");
4627     return false;
4628   }
4629   if (!validateGWS(Inst, Operands)) {
4630     return false;
4631   }
4632 
4633   if (!validateBLGP(Inst, Operands)) {
4634     return false;
4635   }
4636 
4637   if (!validateDivScale(Inst)) {
4638     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4639     return false;
4640   }
4641   if (!validateExeczVcczOperands(Operands)) {
4642     return false;
4643   }
4644 
4645   if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) {
4646     return false;
4647   }
4648 
4649   return true;
4650 }
4651 
4652 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4653                                             const FeatureBitset &FBS,
4654                                             unsigned VariantID = 0);
4655 
4656 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4657                                 const FeatureBitset &AvailableFeatures,
4658                                 unsigned VariantID);
4659 
4660 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4661                                        const FeatureBitset &FBS) {
4662   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4663 }
4664 
4665 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4666                                        const FeatureBitset &FBS,
4667                                        ArrayRef<unsigned> Variants) {
4668   for (auto Variant : Variants) {
4669     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4670       return true;
4671   }
4672 
4673   return false;
4674 }
4675 
4676 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4677                                                   const SMLoc &IDLoc) {
4678   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4679 
4680   // Check if requested instruction variant is supported.
4681   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4682     return false;
4683 
4684   // This instruction is not supported.
4685   // Clear any other pending errors because they are no longer relevant.
4686   getParser().clearPendingErrors();
4687 
4688   // Requested instruction variant is not supported.
4689   // Check if any other variants are supported.
4690   StringRef VariantName = getMatchedVariantName();
4691   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4692     return Error(IDLoc,
4693                  Twine(VariantName,
4694                        " variant of this instruction is not supported"));
4695   }
4696 
4697   // Finally check if this instruction is supported on any other GPU.
4698   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4699     return Error(IDLoc, "instruction not supported on this GPU");
4700   }
4701 
4702   // Instruction not supported on any GPU. Probably a typo.
4703   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4704   return Error(IDLoc, "invalid instruction" + Suggestion);
4705 }
4706 
4707 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4708                                               OperandVector &Operands,
4709                                               MCStreamer &Out,
4710                                               uint64_t &ErrorInfo,
4711                                               bool MatchingInlineAsm) {
4712   MCInst Inst;
4713   unsigned Result = Match_Success;
4714   for (auto Variant : getMatchedVariants()) {
4715     uint64_t EI;
4716     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4717                                   Variant);
4718     // We order match statuses from least to most specific. We use most specific
4719     // status as resulting
4720     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4721     if ((R == Match_Success) ||
4722         (R == Match_PreferE32) ||
4723         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4724         (R == Match_InvalidOperand && Result != Match_MissingFeature
4725                                    && Result != Match_PreferE32) ||
4726         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4727                                    && Result != Match_MissingFeature
4728                                    && Result != Match_PreferE32)) {
4729       Result = R;
4730       ErrorInfo = EI;
4731     }
4732     if (R == Match_Success)
4733       break;
4734   }
4735 
4736   if (Result == Match_Success) {
4737     if (!validateInstruction(Inst, IDLoc, Operands)) {
4738       return true;
4739     }
4740     Inst.setLoc(IDLoc);
4741     Out.emitInstruction(Inst, getSTI());
4742     return false;
4743   }
4744 
4745   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4746   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4747     return true;
4748   }
4749 
4750   switch (Result) {
4751   default: break;
4752   case Match_MissingFeature:
4753     // It has been verified that the specified instruction
4754     // mnemonic is valid. A match was found but it requires
4755     // features which are not supported on this GPU.
4756     return Error(IDLoc, "operands are not valid for this GPU or mode");
4757 
4758   case Match_InvalidOperand: {
4759     SMLoc ErrorLoc = IDLoc;
4760     if (ErrorInfo != ~0ULL) {
4761       if (ErrorInfo >= Operands.size()) {
4762         return Error(IDLoc, "too few operands for instruction");
4763       }
4764       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4765       if (ErrorLoc == SMLoc())
4766         ErrorLoc = IDLoc;
4767     }
4768     return Error(ErrorLoc, "invalid operand for instruction");
4769   }
4770 
4771   case Match_PreferE32:
4772     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4773                         "should be encoded as e32");
4774   case Match_MnemonicFail:
4775     llvm_unreachable("Invalid instructions should have been handled already");
4776   }
4777   llvm_unreachable("Implement any new match types added!");
4778 }
4779 
4780 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4781   int64_t Tmp = -1;
4782   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4783     return true;
4784   }
4785   if (getParser().parseAbsoluteExpression(Tmp)) {
4786     return true;
4787   }
4788   Ret = static_cast<uint32_t>(Tmp);
4789   return false;
4790 }
4791 
4792 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4793                                                uint32_t &Minor) {
4794   if (ParseAsAbsoluteExpression(Major))
4795     return TokError("invalid major version");
4796 
4797   if (!trySkipToken(AsmToken::Comma))
4798     return TokError("minor version number required, comma expected");
4799 
4800   if (ParseAsAbsoluteExpression(Minor))
4801     return TokError("invalid minor version");
4802 
4803   return false;
4804 }
4805 
4806 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4807   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4808     return TokError("directive only supported for amdgcn architecture");
4809 
4810   std::string TargetIDDirective;
4811   SMLoc TargetStart = getTok().getLoc();
4812   if (getParser().parseEscapedString(TargetIDDirective))
4813     return true;
4814 
4815   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4816   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4817     return getParser().Error(TargetRange.Start,
4818         (Twine(".amdgcn_target directive's target id ") +
4819          Twine(TargetIDDirective) +
4820          Twine(" does not match the specified target id ") +
4821          Twine(getTargetStreamer().getTargetID()->toString())).str());
4822 
4823   return false;
4824 }
4825 
4826 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4827   return Error(Range.Start, "value out of range", Range);
4828 }
4829 
4830 bool AMDGPUAsmParser::calculateGPRBlocks(
4831     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4832     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4833     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4834     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4835   // TODO(scott.linder): These calculations are duplicated from
4836   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4837   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4838 
4839   unsigned NumVGPRs = NextFreeVGPR;
4840   unsigned NumSGPRs = NextFreeSGPR;
4841 
4842   if (Version.Major >= 10)
4843     NumSGPRs = 0;
4844   else {
4845     unsigned MaxAddressableNumSGPRs =
4846         IsaInfo::getAddressableNumSGPRs(&getSTI());
4847 
4848     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4849         NumSGPRs > MaxAddressableNumSGPRs)
4850       return OutOfRangeError(SGPRRange);
4851 
4852     NumSGPRs +=
4853         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4854 
4855     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4856         NumSGPRs > MaxAddressableNumSGPRs)
4857       return OutOfRangeError(SGPRRange);
4858 
4859     if (Features.test(FeatureSGPRInitBug))
4860       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4861   }
4862 
4863   VGPRBlocks =
4864       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4865   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4866 
4867   return false;
4868 }
4869 
4870 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4871   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4872     return TokError("directive only supported for amdgcn architecture");
4873 
4874   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4875     return TokError("directive only supported for amdhsa OS");
4876 
4877   StringRef KernelName;
4878   if (getParser().parseIdentifier(KernelName))
4879     return true;
4880 
4881   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4882 
4883   StringSet<> Seen;
4884 
4885   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4886 
4887   SMRange VGPRRange;
4888   uint64_t NextFreeVGPR = 0;
4889   uint64_t AccumOffset = 0;
4890   uint64_t SharedVGPRCount = 0;
4891   SMRange SGPRRange;
4892   uint64_t NextFreeSGPR = 0;
4893 
4894   // Count the number of user SGPRs implied from the enabled feature bits.
4895   unsigned ImpliedUserSGPRCount = 0;
4896 
4897   // Track if the asm explicitly contains the directive for the user SGPR
4898   // count.
4899   Optional<unsigned> ExplicitUserSGPRCount;
4900   bool ReserveVCC = true;
4901   bool ReserveFlatScr = true;
4902   Optional<bool> EnableWavefrontSize32;
4903 
4904   while (true) {
4905     while (trySkipToken(AsmToken::EndOfStatement));
4906 
4907     StringRef ID;
4908     SMRange IDRange = getTok().getLocRange();
4909     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4910       return true;
4911 
4912     if (ID == ".end_amdhsa_kernel")
4913       break;
4914 
4915     if (!Seen.insert(ID).second)
4916       return TokError(".amdhsa_ directives cannot be repeated");
4917 
4918     SMLoc ValStart = getLoc();
4919     int64_t IVal;
4920     if (getParser().parseAbsoluteExpression(IVal))
4921       return true;
4922     SMLoc ValEnd = getLoc();
4923     SMRange ValRange = SMRange(ValStart, ValEnd);
4924 
4925     if (IVal < 0)
4926       return OutOfRangeError(ValRange);
4927 
4928     uint64_t Val = IVal;
4929 
4930 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4931   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4932     return OutOfRangeError(RANGE);                                             \
4933   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4934 
4935     if (ID == ".amdhsa_group_segment_fixed_size") {
4936       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4937         return OutOfRangeError(ValRange);
4938       KD.group_segment_fixed_size = Val;
4939     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4940       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4941         return OutOfRangeError(ValRange);
4942       KD.private_segment_fixed_size = Val;
4943     } else if (ID == ".amdhsa_kernarg_size") {
4944       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4945         return OutOfRangeError(ValRange);
4946       KD.kernarg_size = Val;
4947     } else if (ID == ".amdhsa_user_sgpr_count") {
4948       ExplicitUserSGPRCount = Val;
4949     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4950       if (hasArchitectedFlatScratch())
4951         return Error(IDRange.Start,
4952                      "directive is not supported with architected flat scratch",
4953                      IDRange);
4954       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4955                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4956                        Val, ValRange);
4957       if (Val)
4958         ImpliedUserSGPRCount += 4;
4959     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4960       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4961                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4962                        ValRange);
4963       if (Val)
4964         ImpliedUserSGPRCount += 2;
4965     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4966       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4967                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4968                        ValRange);
4969       if (Val)
4970         ImpliedUserSGPRCount += 2;
4971     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4972       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4973                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4974                        Val, ValRange);
4975       if (Val)
4976         ImpliedUserSGPRCount += 2;
4977     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4978       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4979                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4980                        ValRange);
4981       if (Val)
4982         ImpliedUserSGPRCount += 2;
4983     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4984       if (hasArchitectedFlatScratch())
4985         return Error(IDRange.Start,
4986                      "directive is not supported with architected flat scratch",
4987                      IDRange);
4988       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4989                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4990                        ValRange);
4991       if (Val)
4992         ImpliedUserSGPRCount += 2;
4993     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4994       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4995                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4996                        Val, ValRange);
4997       if (Val)
4998         ImpliedUserSGPRCount += 1;
4999     } else if (ID == ".amdhsa_wavefront_size32") {
5000       if (IVersion.Major < 10)
5001         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5002       EnableWavefrontSize32 = Val;
5003       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5004                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5005                        Val, ValRange);
5006     } else if (ID == ".amdhsa_uses_dynamic_stack") {
5007       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5008                        KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5009     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5010       if (hasArchitectedFlatScratch())
5011         return Error(IDRange.Start,
5012                      "directive is not supported with architected flat scratch",
5013                      IDRange);
5014       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5015                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5016     } else if (ID == ".amdhsa_enable_private_segment") {
5017       if (!hasArchitectedFlatScratch())
5018         return Error(
5019             IDRange.Start,
5020             "directive is not supported without architected flat scratch",
5021             IDRange);
5022       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5023                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5024     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5025       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5026                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5027                        ValRange);
5028     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5029       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5030                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5031                        ValRange);
5032     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5033       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5034                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5035                        ValRange);
5036     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5037       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5038                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5039                        ValRange);
5040     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5041       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5042                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5043                        ValRange);
5044     } else if (ID == ".amdhsa_next_free_vgpr") {
5045       VGPRRange = ValRange;
5046       NextFreeVGPR = Val;
5047     } else if (ID == ".amdhsa_next_free_sgpr") {
5048       SGPRRange = ValRange;
5049       NextFreeSGPR = Val;
5050     } else if (ID == ".amdhsa_accum_offset") {
5051       if (!isGFX90A())
5052         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5053       AccumOffset = Val;
5054     } else if (ID == ".amdhsa_reserve_vcc") {
5055       if (!isUInt<1>(Val))
5056         return OutOfRangeError(ValRange);
5057       ReserveVCC = Val;
5058     } else if (ID == ".amdhsa_reserve_flat_scratch") {
5059       if (IVersion.Major < 7)
5060         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5061       if (hasArchitectedFlatScratch())
5062         return Error(IDRange.Start,
5063                      "directive is not supported with architected flat scratch",
5064                      IDRange);
5065       if (!isUInt<1>(Val))
5066         return OutOfRangeError(ValRange);
5067       ReserveFlatScr = Val;
5068     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5069       if (IVersion.Major < 8)
5070         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5071       if (!isUInt<1>(Val))
5072         return OutOfRangeError(ValRange);
5073       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5074         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5075                                  IDRange);
5076     } else if (ID == ".amdhsa_float_round_mode_32") {
5077       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5078                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5079     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5080       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5081                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5082     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5083       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5084                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5085     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5086       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5087                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5088                        ValRange);
5089     } else if (ID == ".amdhsa_dx10_clamp") {
5090       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5091                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5092     } else if (ID == ".amdhsa_ieee_mode") {
5093       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5094                        Val, ValRange);
5095     } else if (ID == ".amdhsa_fp16_overflow") {
5096       if (IVersion.Major < 9)
5097         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5098       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5099                        ValRange);
5100     } else if (ID == ".amdhsa_tg_split") {
5101       if (!isGFX90A())
5102         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5103       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5104                        ValRange);
5105     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5106       if (IVersion.Major < 10)
5107         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5108       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5109                        ValRange);
5110     } else if (ID == ".amdhsa_memory_ordered") {
5111       if (IVersion.Major < 10)
5112         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5113       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5114                        ValRange);
5115     } else if (ID == ".amdhsa_forward_progress") {
5116       if (IVersion.Major < 10)
5117         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5118       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5119                        ValRange);
5120     } else if (ID == ".amdhsa_shared_vgpr_count") {
5121       if (IVersion.Major < 10)
5122         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5123       SharedVGPRCount = Val;
5124       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5125                        COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5126                        ValRange);
5127     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5128       PARSE_BITS_ENTRY(
5129           KD.compute_pgm_rsrc2,
5130           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5131           ValRange);
5132     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5133       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5134                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5135                        Val, ValRange);
5136     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5137       PARSE_BITS_ENTRY(
5138           KD.compute_pgm_rsrc2,
5139           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5140           ValRange);
5141     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5142       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5143                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5144                        Val, ValRange);
5145     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5146       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5147                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5148                        Val, ValRange);
5149     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5150       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5151                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5152                        Val, ValRange);
5153     } else if (ID == ".amdhsa_exception_int_div_zero") {
5154       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5155                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5156                        Val, ValRange);
5157     } else {
5158       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5159     }
5160 
5161 #undef PARSE_BITS_ENTRY
5162   }
5163 
5164   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5165     return TokError(".amdhsa_next_free_vgpr directive is required");
5166 
5167   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5168     return TokError(".amdhsa_next_free_sgpr directive is required");
5169 
5170   unsigned VGPRBlocks;
5171   unsigned SGPRBlocks;
5172   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5173                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5174                          EnableWavefrontSize32, NextFreeVGPR,
5175                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5176                          SGPRBlocks))
5177     return true;
5178 
5179   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5180           VGPRBlocks))
5181     return OutOfRangeError(VGPRRange);
5182   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5183                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5184 
5185   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5186           SGPRBlocks))
5187     return OutOfRangeError(SGPRRange);
5188   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5189                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5190                   SGPRBlocks);
5191 
5192   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5193     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5194                     "enabled user SGPRs");
5195 
5196   unsigned UserSGPRCount =
5197       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5198 
5199   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5200     return TokError("too many user SGPRs enabled");
5201   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5202                   UserSGPRCount);
5203 
5204   if (isGFX90A()) {
5205     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5206       return TokError(".amdhsa_accum_offset directive is required");
5207     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5208       return TokError("accum_offset should be in range [4..256] in "
5209                       "increments of 4");
5210     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5211       return TokError("accum_offset exceeds total VGPR allocation");
5212     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5213                     (AccumOffset / 4 - 1));
5214   }
5215 
5216   if (IVersion.Major == 10) {
5217     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5218     if (SharedVGPRCount && EnableWavefrontSize32) {
5219       return TokError("shared_vgpr_count directive not valid on "
5220                       "wavefront size 32");
5221     }
5222     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5223       return TokError("shared_vgpr_count*2 + "
5224                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5225                       "exceed 63\n");
5226     }
5227   }
5228 
5229   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5230       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5231       ReserveFlatScr);
5232   return false;
5233 }
5234 
5235 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5236   uint32_t Major;
5237   uint32_t Minor;
5238 
5239   if (ParseDirectiveMajorMinor(Major, Minor))
5240     return true;
5241 
5242   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5243   return false;
5244 }
5245 
5246 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5247   uint32_t Major;
5248   uint32_t Minor;
5249   uint32_t Stepping;
5250   StringRef VendorName;
5251   StringRef ArchName;
5252 
5253   // If this directive has no arguments, then use the ISA version for the
5254   // targeted GPU.
5255   if (isToken(AsmToken::EndOfStatement)) {
5256     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5257     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5258                                                         ISA.Stepping,
5259                                                         "AMD", "AMDGPU");
5260     return false;
5261   }
5262 
5263   if (ParseDirectiveMajorMinor(Major, Minor))
5264     return true;
5265 
5266   if (!trySkipToken(AsmToken::Comma))
5267     return TokError("stepping version number required, comma expected");
5268 
5269   if (ParseAsAbsoluteExpression(Stepping))
5270     return TokError("invalid stepping version");
5271 
5272   if (!trySkipToken(AsmToken::Comma))
5273     return TokError("vendor name required, comma expected");
5274 
5275   if (!parseString(VendorName, "invalid vendor name"))
5276     return true;
5277 
5278   if (!trySkipToken(AsmToken::Comma))
5279     return TokError("arch name required, comma expected");
5280 
5281   if (!parseString(ArchName, "invalid arch name"))
5282     return true;
5283 
5284   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5285                                                       VendorName, ArchName);
5286   return false;
5287 }
5288 
5289 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5290                                                amd_kernel_code_t &Header) {
5291   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5292   // assembly for backwards compatibility.
5293   if (ID == "max_scratch_backing_memory_byte_size") {
5294     Parser.eatToEndOfStatement();
5295     return false;
5296   }
5297 
5298   SmallString<40> ErrStr;
5299   raw_svector_ostream Err(ErrStr);
5300   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5301     return TokError(Err.str());
5302   }
5303   Lex();
5304 
5305   if (ID == "enable_wavefront_size32") {
5306     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5307       if (!isGFX10Plus())
5308         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5309       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5310         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5311     } else {
5312       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5313         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5314     }
5315   }
5316 
5317   if (ID == "wavefront_size") {
5318     if (Header.wavefront_size == 5) {
5319       if (!isGFX10Plus())
5320         return TokError("wavefront_size=5 is only allowed on GFX10+");
5321       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5322         return TokError("wavefront_size=5 requires +WavefrontSize32");
5323     } else if (Header.wavefront_size == 6) {
5324       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5325         return TokError("wavefront_size=6 requires +WavefrontSize64");
5326     }
5327   }
5328 
5329   if (ID == "enable_wgp_mode") {
5330     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5331         !isGFX10Plus())
5332       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5333   }
5334 
5335   if (ID == "enable_mem_ordered") {
5336     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5337         !isGFX10Plus())
5338       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5339   }
5340 
5341   if (ID == "enable_fwd_progress") {
5342     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5343         !isGFX10Plus())
5344       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5345   }
5346 
5347   return false;
5348 }
5349 
5350 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5351   amd_kernel_code_t Header;
5352   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5353 
5354   while (true) {
5355     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5356     // will set the current token to EndOfStatement.
5357     while(trySkipToken(AsmToken::EndOfStatement));
5358 
5359     StringRef ID;
5360     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5361       return true;
5362 
5363     if (ID == ".end_amd_kernel_code_t")
5364       break;
5365 
5366     if (ParseAMDKernelCodeTValue(ID, Header))
5367       return true;
5368   }
5369 
5370   getTargetStreamer().EmitAMDKernelCodeT(Header);
5371 
5372   return false;
5373 }
5374 
5375 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5376   StringRef KernelName;
5377   if (!parseId(KernelName, "expected symbol name"))
5378     return true;
5379 
5380   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5381                                            ELF::STT_AMDGPU_HSA_KERNEL);
5382 
5383   KernelScope.initialize(getContext());
5384   return false;
5385 }
5386 
5387 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5388   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5389     return Error(getLoc(),
5390                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5391                  "architectures");
5392   }
5393 
5394   auto TargetIDDirective = getLexer().getTok().getStringContents();
5395   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5396     return Error(getParser().getTok().getLoc(), "target id must match options");
5397 
5398   getTargetStreamer().EmitISAVersion();
5399   Lex();
5400 
5401   return false;
5402 }
5403 
5404 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5405   const char *AssemblerDirectiveBegin;
5406   const char *AssemblerDirectiveEnd;
5407   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5408       isHsaAbiVersion3AndAbove(&getSTI())
5409           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5410                             HSAMD::V3::AssemblerDirectiveEnd)
5411           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5412                             HSAMD::AssemblerDirectiveEnd);
5413 
5414   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5415     return Error(getLoc(),
5416                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5417                  "not available on non-amdhsa OSes")).str());
5418   }
5419 
5420   std::string HSAMetadataString;
5421   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5422                           HSAMetadataString))
5423     return true;
5424 
5425   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5426     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5427       return Error(getLoc(), "invalid HSA metadata");
5428   } else {
5429     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5430       return Error(getLoc(), "invalid HSA metadata");
5431   }
5432 
5433   return false;
5434 }
5435 
5436 /// Common code to parse out a block of text (typically YAML) between start and
5437 /// end directives.
5438 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5439                                           const char *AssemblerDirectiveEnd,
5440                                           std::string &CollectString) {
5441 
5442   raw_string_ostream CollectStream(CollectString);
5443 
5444   getLexer().setSkipSpace(false);
5445 
5446   bool FoundEnd = false;
5447   while (!isToken(AsmToken::Eof)) {
5448     while (isToken(AsmToken::Space)) {
5449       CollectStream << getTokenStr();
5450       Lex();
5451     }
5452 
5453     if (trySkipId(AssemblerDirectiveEnd)) {
5454       FoundEnd = true;
5455       break;
5456     }
5457 
5458     CollectStream << Parser.parseStringToEndOfStatement()
5459                   << getContext().getAsmInfo()->getSeparatorString();
5460 
5461     Parser.eatToEndOfStatement();
5462   }
5463 
5464   getLexer().setSkipSpace(true);
5465 
5466   if (isToken(AsmToken::Eof) && !FoundEnd) {
5467     return TokError(Twine("expected directive ") +
5468                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5469   }
5470 
5471   CollectStream.flush();
5472   return false;
5473 }
5474 
5475 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5476 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5477   std::string String;
5478   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5479                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5480     return true;
5481 
5482   auto PALMetadata = getTargetStreamer().getPALMetadata();
5483   if (!PALMetadata->setFromString(String))
5484     return Error(getLoc(), "invalid PAL metadata");
5485   return false;
5486 }
5487 
5488 /// Parse the assembler directive for old linear-format PAL metadata.
5489 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5490   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5491     return Error(getLoc(),
5492                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5493                  "not available on non-amdpal OSes")).str());
5494   }
5495 
5496   auto PALMetadata = getTargetStreamer().getPALMetadata();
5497   PALMetadata->setLegacy();
5498   for (;;) {
5499     uint32_t Key, Value;
5500     if (ParseAsAbsoluteExpression(Key)) {
5501       return TokError(Twine("invalid value in ") +
5502                       Twine(PALMD::AssemblerDirective));
5503     }
5504     if (!trySkipToken(AsmToken::Comma)) {
5505       return TokError(Twine("expected an even number of values in ") +
5506                       Twine(PALMD::AssemblerDirective));
5507     }
5508     if (ParseAsAbsoluteExpression(Value)) {
5509       return TokError(Twine("invalid value in ") +
5510                       Twine(PALMD::AssemblerDirective));
5511     }
5512     PALMetadata->setRegister(Key, Value);
5513     if (!trySkipToken(AsmToken::Comma))
5514       break;
5515   }
5516   return false;
5517 }
5518 
5519 /// ParseDirectiveAMDGPULDS
5520 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5521 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5522   if (getParser().checkForValidSection())
5523     return true;
5524 
5525   StringRef Name;
5526   SMLoc NameLoc = getLoc();
5527   if (getParser().parseIdentifier(Name))
5528     return TokError("expected identifier in directive");
5529 
5530   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5531   if (parseToken(AsmToken::Comma, "expected ','"))
5532     return true;
5533 
5534   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5535 
5536   int64_t Size;
5537   SMLoc SizeLoc = getLoc();
5538   if (getParser().parseAbsoluteExpression(Size))
5539     return true;
5540   if (Size < 0)
5541     return Error(SizeLoc, "size must be non-negative");
5542   if (Size > LocalMemorySize)
5543     return Error(SizeLoc, "size is too large");
5544 
5545   int64_t Alignment = 4;
5546   if (trySkipToken(AsmToken::Comma)) {
5547     SMLoc AlignLoc = getLoc();
5548     if (getParser().parseAbsoluteExpression(Alignment))
5549       return true;
5550     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5551       return Error(AlignLoc, "alignment must be a power of two");
5552 
5553     // Alignment larger than the size of LDS is possible in theory, as long
5554     // as the linker manages to place to symbol at address 0, but we do want
5555     // to make sure the alignment fits nicely into a 32-bit integer.
5556     if (Alignment >= 1u << 31)
5557       return Error(AlignLoc, "alignment is too large");
5558   }
5559 
5560   if (parseEOL())
5561     return true;
5562 
5563   Symbol->redefineIfPossible();
5564   if (!Symbol->isUndefined())
5565     return Error(NameLoc, "invalid symbol redefinition");
5566 
5567   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5568   return false;
5569 }
5570 
5571 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5572   StringRef IDVal = DirectiveID.getString();
5573 
5574   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5575     if (IDVal == ".amdhsa_kernel")
5576      return ParseDirectiveAMDHSAKernel();
5577 
5578     // TODO: Restructure/combine with PAL metadata directive.
5579     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5580       return ParseDirectiveHSAMetadata();
5581   } else {
5582     if (IDVal == ".hsa_code_object_version")
5583       return ParseDirectiveHSACodeObjectVersion();
5584 
5585     if (IDVal == ".hsa_code_object_isa")
5586       return ParseDirectiveHSACodeObjectISA();
5587 
5588     if (IDVal == ".amd_kernel_code_t")
5589       return ParseDirectiveAMDKernelCodeT();
5590 
5591     if (IDVal == ".amdgpu_hsa_kernel")
5592       return ParseDirectiveAMDGPUHsaKernel();
5593 
5594     if (IDVal == ".amd_amdgpu_isa")
5595       return ParseDirectiveISAVersion();
5596 
5597     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5598       return ParseDirectiveHSAMetadata();
5599   }
5600 
5601   if (IDVal == ".amdgcn_target")
5602     return ParseDirectiveAMDGCNTarget();
5603 
5604   if (IDVal == ".amdgpu_lds")
5605     return ParseDirectiveAMDGPULDS();
5606 
5607   if (IDVal == PALMD::AssemblerDirectiveBegin)
5608     return ParseDirectivePALMetadataBegin();
5609 
5610   if (IDVal == PALMD::AssemblerDirective)
5611     return ParseDirectivePALMetadata();
5612 
5613   return true;
5614 }
5615 
5616 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5617                                            unsigned RegNo) {
5618 
5619   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5620     return isGFX9Plus();
5621 
5622   // GFX10+ has 2 more SGPRs 104 and 105.
5623   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5624     return hasSGPR104_SGPR105();
5625 
5626   switch (RegNo) {
5627   case AMDGPU::SRC_SHARED_BASE:
5628   case AMDGPU::SRC_SHARED_LIMIT:
5629   case AMDGPU::SRC_PRIVATE_BASE:
5630   case AMDGPU::SRC_PRIVATE_LIMIT:
5631     return isGFX9Plus();
5632   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5633     return isGFX9Plus() && !isGFX11Plus();
5634   case AMDGPU::TBA:
5635   case AMDGPU::TBA_LO:
5636   case AMDGPU::TBA_HI:
5637   case AMDGPU::TMA:
5638   case AMDGPU::TMA_LO:
5639   case AMDGPU::TMA_HI:
5640     return !isGFX9Plus();
5641   case AMDGPU::XNACK_MASK:
5642   case AMDGPU::XNACK_MASK_LO:
5643   case AMDGPU::XNACK_MASK_HI:
5644     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5645   case AMDGPU::SGPR_NULL:
5646     return isGFX10Plus();
5647   default:
5648     break;
5649   }
5650 
5651   if (isCI())
5652     return true;
5653 
5654   if (isSI() || isGFX10Plus()) {
5655     // No flat_scr on SI.
5656     // On GFX10Plus flat scratch is not a valid register operand and can only be
5657     // accessed with s_setreg/s_getreg.
5658     switch (RegNo) {
5659     case AMDGPU::FLAT_SCR:
5660     case AMDGPU::FLAT_SCR_LO:
5661     case AMDGPU::FLAT_SCR_HI:
5662       return false;
5663     default:
5664       return true;
5665     }
5666   }
5667 
5668   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5669   // SI/CI have.
5670   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5671     return hasSGPR102_SGPR103();
5672 
5673   return true;
5674 }
5675 
5676 OperandMatchResultTy
5677 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5678                               OperandMode Mode) {
5679   OperandMatchResultTy ResTy = parseVOPD(Operands);
5680   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5681       isToken(AsmToken::EndOfStatement))
5682     return ResTy;
5683 
5684   // Try to parse with a custom parser
5685   ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5686 
5687   // If we successfully parsed the operand or if there as an error parsing,
5688   // we are done.
5689   //
5690   // If we are parsing after we reach EndOfStatement then this means we
5691   // are appending default values to the Operands list.  This is only done
5692   // by custom parser, so we shouldn't continue on to the generic parsing.
5693   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5694       isToken(AsmToken::EndOfStatement))
5695     return ResTy;
5696 
5697   SMLoc RBraceLoc;
5698   SMLoc LBraceLoc = getLoc();
5699   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5700     unsigned Prefix = Operands.size();
5701 
5702     for (;;) {
5703       auto Loc = getLoc();
5704       ResTy = parseReg(Operands);
5705       if (ResTy == MatchOperand_NoMatch)
5706         Error(Loc, "expected a register");
5707       if (ResTy != MatchOperand_Success)
5708         return MatchOperand_ParseFail;
5709 
5710       RBraceLoc = getLoc();
5711       if (trySkipToken(AsmToken::RBrac))
5712         break;
5713 
5714       if (!skipToken(AsmToken::Comma,
5715                      "expected a comma or a closing square bracket")) {
5716         return MatchOperand_ParseFail;
5717       }
5718     }
5719 
5720     if (Operands.size() - Prefix > 1) {
5721       Operands.insert(Operands.begin() + Prefix,
5722                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5723       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5724     }
5725 
5726     return MatchOperand_Success;
5727   }
5728 
5729   return parseRegOrImm(Operands);
5730 }
5731 
5732 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5733   // Clear any forced encodings from the previous instruction.
5734   setForcedEncodingSize(0);
5735   setForcedDPP(false);
5736   setForcedSDWA(false);
5737 
5738   if (Name.endswith("_e64_dpp")) {
5739     setForcedDPP(true);
5740     setForcedEncodingSize(64);
5741     return Name.substr(0, Name.size() - 8);
5742   } else if (Name.endswith("_e64")) {
5743     setForcedEncodingSize(64);
5744     return Name.substr(0, Name.size() - 4);
5745   } else if (Name.endswith("_e32")) {
5746     setForcedEncodingSize(32);
5747     return Name.substr(0, Name.size() - 4);
5748   } else if (Name.endswith("_dpp")) {
5749     setForcedDPP(true);
5750     return Name.substr(0, Name.size() - 4);
5751   } else if (Name.endswith("_sdwa")) {
5752     setForcedSDWA(true);
5753     return Name.substr(0, Name.size() - 5);
5754   }
5755   return Name;
5756 }
5757 
5758 static void applyMnemonicAliases(StringRef &Mnemonic,
5759                                  const FeatureBitset &Features,
5760                                  unsigned VariantID);
5761 
5762 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5763                                        StringRef Name,
5764                                        SMLoc NameLoc, OperandVector &Operands) {
5765   // Add the instruction mnemonic
5766   Name = parseMnemonicSuffix(Name);
5767 
5768   // If the target architecture uses MnemonicAlias, call it here to parse
5769   // operands correctly.
5770   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5771 
5772   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5773 
5774   bool IsMIMG = Name.startswith("image_");
5775 
5776   while (!trySkipToken(AsmToken::EndOfStatement)) {
5777     OperandMode Mode = OperandMode_Default;
5778     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5779       Mode = OperandMode_NSA;
5780     CPolSeen = 0;
5781     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5782 
5783     if (Res != MatchOperand_Success) {
5784       checkUnsupportedInstruction(Name, NameLoc);
5785       if (!Parser.hasPendingError()) {
5786         // FIXME: use real operand location rather than the current location.
5787         StringRef Msg =
5788           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5789                                             "not a valid operand.";
5790         Error(getLoc(), Msg);
5791       }
5792       while (!trySkipToken(AsmToken::EndOfStatement)) {
5793         lex();
5794       }
5795       return true;
5796     }
5797 
5798     // Eat the comma or space if there is one.
5799     trySkipToken(AsmToken::Comma);
5800   }
5801 
5802   return false;
5803 }
5804 
5805 //===----------------------------------------------------------------------===//
5806 // Utility functions
5807 //===----------------------------------------------------------------------===//
5808 
5809 OperandMatchResultTy
5810 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5811 
5812   if (!trySkipId(Prefix, AsmToken::Colon))
5813     return MatchOperand_NoMatch;
5814 
5815   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5816 }
5817 
5818 OperandMatchResultTy
5819 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5820                                     AMDGPUOperand::ImmTy ImmTy,
5821                                     bool (*ConvertResult)(int64_t&)) {
5822   SMLoc S = getLoc();
5823   int64_t Value = 0;
5824 
5825   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5826   if (Res != MatchOperand_Success)
5827     return Res;
5828 
5829   if (ConvertResult && !ConvertResult(Value)) {
5830     Error(S, "invalid " + StringRef(Prefix) + " value.");
5831   }
5832 
5833   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5834   return MatchOperand_Success;
5835 }
5836 
5837 OperandMatchResultTy
5838 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5839                                              OperandVector &Operands,
5840                                              AMDGPUOperand::ImmTy ImmTy,
5841                                              bool (*ConvertResult)(int64_t&)) {
5842   SMLoc S = getLoc();
5843   if (!trySkipId(Prefix, AsmToken::Colon))
5844     return MatchOperand_NoMatch;
5845 
5846   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5847     return MatchOperand_ParseFail;
5848 
5849   unsigned Val = 0;
5850   const unsigned MaxSize = 4;
5851 
5852   // FIXME: How to verify the number of elements matches the number of src
5853   // operands?
5854   for (int I = 0; ; ++I) {
5855     int64_t Op;
5856     SMLoc Loc = getLoc();
5857     if (!parseExpr(Op))
5858       return MatchOperand_ParseFail;
5859 
5860     if (Op != 0 && Op != 1) {
5861       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5862       return MatchOperand_ParseFail;
5863     }
5864 
5865     Val |= (Op << I);
5866 
5867     if (trySkipToken(AsmToken::RBrac))
5868       break;
5869 
5870     if (I + 1 == MaxSize) {
5871       Error(getLoc(), "expected a closing square bracket");
5872       return MatchOperand_ParseFail;
5873     }
5874 
5875     if (!skipToken(AsmToken::Comma, "expected a comma"))
5876       return MatchOperand_ParseFail;
5877   }
5878 
5879   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5880   return MatchOperand_Success;
5881 }
5882 
5883 OperandMatchResultTy
5884 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5885                                AMDGPUOperand::ImmTy ImmTy) {
5886   int64_t Bit;
5887   SMLoc S = getLoc();
5888 
5889   if (trySkipId(Name)) {
5890     Bit = 1;
5891   } else if (trySkipId("no", Name)) {
5892     Bit = 0;
5893   } else {
5894     return MatchOperand_NoMatch;
5895   }
5896 
5897   if (Name == "r128" && !hasMIMG_R128()) {
5898     Error(S, "r128 modifier is not supported on this GPU");
5899     return MatchOperand_ParseFail;
5900   }
5901   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5902     Error(S, "a16 modifier is not supported on this GPU");
5903     return MatchOperand_ParseFail;
5904   }
5905 
5906   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5907     ImmTy = AMDGPUOperand::ImmTyR128A16;
5908 
5909   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5910   return MatchOperand_Success;
5911 }
5912 
5913 OperandMatchResultTy
5914 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5915   unsigned CPolOn = 0;
5916   unsigned CPolOff = 0;
5917   SMLoc S = getLoc();
5918 
5919   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5920   if (isGFX940() && !Mnemo.startswith("s_")) {
5921     if (trySkipId("sc0"))
5922       CPolOn = AMDGPU::CPol::SC0;
5923     else if (trySkipId("nosc0"))
5924       CPolOff = AMDGPU::CPol::SC0;
5925     else if (trySkipId("nt"))
5926       CPolOn = AMDGPU::CPol::NT;
5927     else if (trySkipId("nont"))
5928       CPolOff = AMDGPU::CPol::NT;
5929     else if (trySkipId("sc1"))
5930       CPolOn = AMDGPU::CPol::SC1;
5931     else if (trySkipId("nosc1"))
5932       CPolOff = AMDGPU::CPol::SC1;
5933     else
5934       return MatchOperand_NoMatch;
5935   }
5936   else if (trySkipId("glc"))
5937     CPolOn = AMDGPU::CPol::GLC;
5938   else if (trySkipId("noglc"))
5939     CPolOff = AMDGPU::CPol::GLC;
5940   else if (trySkipId("slc"))
5941     CPolOn = AMDGPU::CPol::SLC;
5942   else if (trySkipId("noslc"))
5943     CPolOff = AMDGPU::CPol::SLC;
5944   else if (trySkipId("dlc"))
5945     CPolOn = AMDGPU::CPol::DLC;
5946   else if (trySkipId("nodlc"))
5947     CPolOff = AMDGPU::CPol::DLC;
5948   else if (trySkipId("scc"))
5949     CPolOn = AMDGPU::CPol::SCC;
5950   else if (trySkipId("noscc"))
5951     CPolOff = AMDGPU::CPol::SCC;
5952   else
5953     return MatchOperand_NoMatch;
5954 
5955   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5956     Error(S, "dlc modifier is not supported on this GPU");
5957     return MatchOperand_ParseFail;
5958   }
5959 
5960   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5961     Error(S, "scc modifier is not supported on this GPU");
5962     return MatchOperand_ParseFail;
5963   }
5964 
5965   if (CPolSeen & (CPolOn | CPolOff)) {
5966     Error(S, "duplicate cache policy modifier");
5967     return MatchOperand_ParseFail;
5968   }
5969 
5970   CPolSeen |= (CPolOn | CPolOff);
5971 
5972   for (unsigned I = 1; I != Operands.size(); ++I) {
5973     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5974     if (Op.isCPol()) {
5975       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5976       return MatchOperand_Success;
5977     }
5978   }
5979 
5980   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5981                                               AMDGPUOperand::ImmTyCPol));
5982 
5983   return MatchOperand_Success;
5984 }
5985 
5986 static void addOptionalImmOperand(
5987   MCInst& Inst, const OperandVector& Operands,
5988   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5989   AMDGPUOperand::ImmTy ImmT,
5990   int64_t Default = 0) {
5991   auto i = OptionalIdx.find(ImmT);
5992   if (i != OptionalIdx.end()) {
5993     unsigned Idx = i->second;
5994     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5995   } else {
5996     Inst.addOperand(MCOperand::createImm(Default));
5997   }
5998 }
5999 
6000 OperandMatchResultTy
6001 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6002                                        StringRef &Value,
6003                                        SMLoc &StringLoc) {
6004   if (!trySkipId(Prefix, AsmToken::Colon))
6005     return MatchOperand_NoMatch;
6006 
6007   StringLoc = getLoc();
6008   return parseId(Value, "expected an identifier") ? MatchOperand_Success
6009                                                   : MatchOperand_ParseFail;
6010 }
6011 
6012 //===----------------------------------------------------------------------===//
6013 // MTBUF format
6014 //===----------------------------------------------------------------------===//
6015 
6016 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6017                                   int64_t MaxVal,
6018                                   int64_t &Fmt) {
6019   int64_t Val;
6020   SMLoc Loc = getLoc();
6021 
6022   auto Res = parseIntWithPrefix(Pref, Val);
6023   if (Res == MatchOperand_ParseFail)
6024     return false;
6025   if (Res == MatchOperand_NoMatch)
6026     return true;
6027 
6028   if (Val < 0 || Val > MaxVal) {
6029     Error(Loc, Twine("out of range ", StringRef(Pref)));
6030     return false;
6031   }
6032 
6033   Fmt = Val;
6034   return true;
6035 }
6036 
6037 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6038 // values to live in a joint format operand in the MCInst encoding.
6039 OperandMatchResultTy
6040 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6041   using namespace llvm::AMDGPU::MTBUFFormat;
6042 
6043   int64_t Dfmt = DFMT_UNDEF;
6044   int64_t Nfmt = NFMT_UNDEF;
6045 
6046   // dfmt and nfmt can appear in either order, and each is optional.
6047   for (int I = 0; I < 2; ++I) {
6048     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6049       return MatchOperand_ParseFail;
6050 
6051     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
6052       return MatchOperand_ParseFail;
6053     }
6054     // Skip optional comma between dfmt/nfmt
6055     // but guard against 2 commas following each other.
6056     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6057         !peekToken().is(AsmToken::Comma)) {
6058       trySkipToken(AsmToken::Comma);
6059     }
6060   }
6061 
6062   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6063     return MatchOperand_NoMatch;
6064 
6065   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6066   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6067 
6068   Format = encodeDfmtNfmt(Dfmt, Nfmt);
6069   return MatchOperand_Success;
6070 }
6071 
6072 OperandMatchResultTy
6073 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6074   using namespace llvm::AMDGPU::MTBUFFormat;
6075 
6076   int64_t Fmt = UFMT_UNDEF;
6077 
6078   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6079     return MatchOperand_ParseFail;
6080 
6081   if (Fmt == UFMT_UNDEF)
6082     return MatchOperand_NoMatch;
6083 
6084   Format = Fmt;
6085   return MatchOperand_Success;
6086 }
6087 
6088 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6089                                     int64_t &Nfmt,
6090                                     StringRef FormatStr,
6091                                     SMLoc Loc) {
6092   using namespace llvm::AMDGPU::MTBUFFormat;
6093   int64_t Format;
6094 
6095   Format = getDfmt(FormatStr);
6096   if (Format != DFMT_UNDEF) {
6097     Dfmt = Format;
6098     return true;
6099   }
6100 
6101   Format = getNfmt(FormatStr, getSTI());
6102   if (Format != NFMT_UNDEF) {
6103     Nfmt = Format;
6104     return true;
6105   }
6106 
6107   Error(Loc, "unsupported format");
6108   return false;
6109 }
6110 
6111 OperandMatchResultTy
6112 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6113                                           SMLoc FormatLoc,
6114                                           int64_t &Format) {
6115   using namespace llvm::AMDGPU::MTBUFFormat;
6116 
6117   int64_t Dfmt = DFMT_UNDEF;
6118   int64_t Nfmt = NFMT_UNDEF;
6119   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6120     return MatchOperand_ParseFail;
6121 
6122   if (trySkipToken(AsmToken::Comma)) {
6123     StringRef Str;
6124     SMLoc Loc = getLoc();
6125     if (!parseId(Str, "expected a format string") ||
6126         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
6127       return MatchOperand_ParseFail;
6128     }
6129     if (Dfmt == DFMT_UNDEF) {
6130       Error(Loc, "duplicate numeric format");
6131       return MatchOperand_ParseFail;
6132     } else if (Nfmt == NFMT_UNDEF) {
6133       Error(Loc, "duplicate data format");
6134       return MatchOperand_ParseFail;
6135     }
6136   }
6137 
6138   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6139   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6140 
6141   if (isGFX10Plus()) {
6142     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6143     if (Ufmt == UFMT_UNDEF) {
6144       Error(FormatLoc, "unsupported format");
6145       return MatchOperand_ParseFail;
6146     }
6147     Format = Ufmt;
6148   } else {
6149     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6150   }
6151 
6152   return MatchOperand_Success;
6153 }
6154 
6155 OperandMatchResultTy
6156 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6157                                             SMLoc Loc,
6158                                             int64_t &Format) {
6159   using namespace llvm::AMDGPU::MTBUFFormat;
6160 
6161   auto Id = getUnifiedFormat(FormatStr, getSTI());
6162   if (Id == UFMT_UNDEF)
6163     return MatchOperand_NoMatch;
6164 
6165   if (!isGFX10Plus()) {
6166     Error(Loc, "unified format is not supported on this GPU");
6167     return MatchOperand_ParseFail;
6168   }
6169 
6170   Format = Id;
6171   return MatchOperand_Success;
6172 }
6173 
6174 OperandMatchResultTy
6175 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6176   using namespace llvm::AMDGPU::MTBUFFormat;
6177   SMLoc Loc = getLoc();
6178 
6179   if (!parseExpr(Format))
6180     return MatchOperand_ParseFail;
6181   if (!isValidFormatEncoding(Format, getSTI())) {
6182     Error(Loc, "out of range format");
6183     return MatchOperand_ParseFail;
6184   }
6185 
6186   return MatchOperand_Success;
6187 }
6188 
6189 OperandMatchResultTy
6190 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6191   using namespace llvm::AMDGPU::MTBUFFormat;
6192 
6193   if (!trySkipId("format", AsmToken::Colon))
6194     return MatchOperand_NoMatch;
6195 
6196   if (trySkipToken(AsmToken::LBrac)) {
6197     StringRef FormatStr;
6198     SMLoc Loc = getLoc();
6199     if (!parseId(FormatStr, "expected a format string"))
6200       return MatchOperand_ParseFail;
6201 
6202     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6203     if (Res == MatchOperand_NoMatch)
6204       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6205     if (Res != MatchOperand_Success)
6206       return Res;
6207 
6208     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6209       return MatchOperand_ParseFail;
6210 
6211     return MatchOperand_Success;
6212   }
6213 
6214   return parseNumericFormat(Format);
6215 }
6216 
6217 OperandMatchResultTy
6218 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6219   using namespace llvm::AMDGPU::MTBUFFormat;
6220 
6221   int64_t Format = getDefaultFormatEncoding(getSTI());
6222   OperandMatchResultTy Res;
6223   SMLoc Loc = getLoc();
6224 
6225   // Parse legacy format syntax.
6226   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6227   if (Res == MatchOperand_ParseFail)
6228     return Res;
6229 
6230   bool FormatFound = (Res == MatchOperand_Success);
6231 
6232   Operands.push_back(
6233     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6234 
6235   if (FormatFound)
6236     trySkipToken(AsmToken::Comma);
6237 
6238   if (isToken(AsmToken::EndOfStatement)) {
6239     // We are expecting an soffset operand,
6240     // but let matcher handle the error.
6241     return MatchOperand_Success;
6242   }
6243 
6244   // Parse soffset.
6245   Res = parseRegOrImm(Operands);
6246   if (Res != MatchOperand_Success)
6247     return Res;
6248 
6249   trySkipToken(AsmToken::Comma);
6250 
6251   if (!FormatFound) {
6252     Res = parseSymbolicOrNumericFormat(Format);
6253     if (Res == MatchOperand_ParseFail)
6254       return Res;
6255     if (Res == MatchOperand_Success) {
6256       auto Size = Operands.size();
6257       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6258       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6259       Op.setImm(Format);
6260     }
6261     return MatchOperand_Success;
6262   }
6263 
6264   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6265     Error(getLoc(), "duplicate format");
6266     return MatchOperand_ParseFail;
6267   }
6268   return MatchOperand_Success;
6269 }
6270 
6271 //===----------------------------------------------------------------------===//
6272 // ds
6273 //===----------------------------------------------------------------------===//
6274 
6275 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6276                                     const OperandVector &Operands) {
6277   OptionalImmIndexMap OptionalIdx;
6278 
6279   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6280     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6281 
6282     // Add the register arguments
6283     if (Op.isReg()) {
6284       Op.addRegOperands(Inst, 1);
6285       continue;
6286     }
6287 
6288     // Handle optional arguments
6289     OptionalIdx[Op.getImmTy()] = i;
6290   }
6291 
6292   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6293   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6294   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6295 
6296   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6297 }
6298 
6299 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6300                                 bool IsGdsHardcoded) {
6301   OptionalImmIndexMap OptionalIdx;
6302   AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset;
6303 
6304   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6305     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6306 
6307     // Add the register arguments
6308     if (Op.isReg()) {
6309       Op.addRegOperands(Inst, 1);
6310       continue;
6311     }
6312 
6313     if (Op.isToken() && Op.getToken() == "gds") {
6314       IsGdsHardcoded = true;
6315       continue;
6316     }
6317 
6318     // Handle optional arguments
6319     OptionalIdx[Op.getImmTy()] = i;
6320 
6321     if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle)
6322       OffsetType = AMDGPUOperand::ImmTySwizzle;
6323   }
6324 
6325   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6326 
6327   if (!IsGdsHardcoded) {
6328     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6329   }
6330   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6331 }
6332 
6333 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6334   OptionalImmIndexMap OptionalIdx;
6335 
6336   unsigned OperandIdx[4];
6337   unsigned EnMask = 0;
6338   int SrcIdx = 0;
6339 
6340   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6341     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6342 
6343     // Add the register arguments
6344     if (Op.isReg()) {
6345       assert(SrcIdx < 4);
6346       OperandIdx[SrcIdx] = Inst.size();
6347       Op.addRegOperands(Inst, 1);
6348       ++SrcIdx;
6349       continue;
6350     }
6351 
6352     if (Op.isOff()) {
6353       assert(SrcIdx < 4);
6354       OperandIdx[SrcIdx] = Inst.size();
6355       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6356       ++SrcIdx;
6357       continue;
6358     }
6359 
6360     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6361       Op.addImmOperands(Inst, 1);
6362       continue;
6363     }
6364 
6365     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6366       continue;
6367 
6368     // Handle optional arguments
6369     OptionalIdx[Op.getImmTy()] = i;
6370   }
6371 
6372   assert(SrcIdx == 4);
6373 
6374   bool Compr = false;
6375   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6376     Compr = true;
6377     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6378     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6379     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6380   }
6381 
6382   for (auto i = 0; i < SrcIdx; ++i) {
6383     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6384       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6385     }
6386   }
6387 
6388   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6389   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6390 
6391   Inst.addOperand(MCOperand::createImm(EnMask));
6392 }
6393 
6394 //===----------------------------------------------------------------------===//
6395 // s_waitcnt
6396 //===----------------------------------------------------------------------===//
6397 
6398 static bool
6399 encodeCnt(
6400   const AMDGPU::IsaVersion ISA,
6401   int64_t &IntVal,
6402   int64_t CntVal,
6403   bool Saturate,
6404   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6405   unsigned (*decode)(const IsaVersion &Version, unsigned))
6406 {
6407   bool Failed = false;
6408 
6409   IntVal = encode(ISA, IntVal, CntVal);
6410   if (CntVal != decode(ISA, IntVal)) {
6411     if (Saturate) {
6412       IntVal = encode(ISA, IntVal, -1);
6413     } else {
6414       Failed = true;
6415     }
6416   }
6417   return Failed;
6418 }
6419 
6420 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6421 
6422   SMLoc CntLoc = getLoc();
6423   StringRef CntName = getTokenStr();
6424 
6425   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6426       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6427     return false;
6428 
6429   int64_t CntVal;
6430   SMLoc ValLoc = getLoc();
6431   if (!parseExpr(CntVal))
6432     return false;
6433 
6434   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6435 
6436   bool Failed = true;
6437   bool Sat = CntName.endswith("_sat");
6438 
6439   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6440     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6441   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6442     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6443   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6444     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6445   } else {
6446     Error(CntLoc, "invalid counter name " + CntName);
6447     return false;
6448   }
6449 
6450   if (Failed) {
6451     Error(ValLoc, "too large value for " + CntName);
6452     return false;
6453   }
6454 
6455   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6456     return false;
6457 
6458   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6459     if (isToken(AsmToken::EndOfStatement)) {
6460       Error(getLoc(), "expected a counter name");
6461       return false;
6462     }
6463   }
6464 
6465   return true;
6466 }
6467 
6468 OperandMatchResultTy
6469 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6470   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6471   int64_t Waitcnt = getWaitcntBitMask(ISA);
6472   SMLoc S = getLoc();
6473 
6474   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6475     while (!isToken(AsmToken::EndOfStatement)) {
6476       if (!parseCnt(Waitcnt))
6477         return MatchOperand_ParseFail;
6478     }
6479   } else {
6480     if (!parseExpr(Waitcnt))
6481       return MatchOperand_ParseFail;
6482   }
6483 
6484   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6485   return MatchOperand_Success;
6486 }
6487 
6488 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6489   SMLoc FieldLoc = getLoc();
6490   StringRef FieldName = getTokenStr();
6491   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6492       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6493     return false;
6494 
6495   SMLoc ValueLoc = getLoc();
6496   StringRef ValueName = getTokenStr();
6497   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6498       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6499     return false;
6500 
6501   unsigned Shift;
6502   if (FieldName == "instid0") {
6503     Shift = 0;
6504   } else if (FieldName == "instskip") {
6505     Shift = 4;
6506   } else if (FieldName == "instid1") {
6507     Shift = 7;
6508   } else {
6509     Error(FieldLoc, "invalid field name " + FieldName);
6510     return false;
6511   }
6512 
6513   int Value;
6514   if (Shift == 4) {
6515     // Parse values for instskip.
6516     Value = StringSwitch<int>(ValueName)
6517                 .Case("SAME", 0)
6518                 .Case("NEXT", 1)
6519                 .Case("SKIP_1", 2)
6520                 .Case("SKIP_2", 3)
6521                 .Case("SKIP_3", 4)
6522                 .Case("SKIP_4", 5)
6523                 .Default(-1);
6524   } else {
6525     // Parse values for instid0 and instid1.
6526     Value = StringSwitch<int>(ValueName)
6527                 .Case("NO_DEP", 0)
6528                 .Case("VALU_DEP_1", 1)
6529                 .Case("VALU_DEP_2", 2)
6530                 .Case("VALU_DEP_3", 3)
6531                 .Case("VALU_DEP_4", 4)
6532                 .Case("TRANS32_DEP_1", 5)
6533                 .Case("TRANS32_DEP_2", 6)
6534                 .Case("TRANS32_DEP_3", 7)
6535                 .Case("FMA_ACCUM_CYCLE_1", 8)
6536                 .Case("SALU_CYCLE_1", 9)
6537                 .Case("SALU_CYCLE_2", 10)
6538                 .Case("SALU_CYCLE_3", 11)
6539                 .Default(-1);
6540   }
6541   if (Value < 0) {
6542     Error(ValueLoc, "invalid value name " + ValueName);
6543     return false;
6544   }
6545 
6546   Delay |= Value << Shift;
6547   return true;
6548 }
6549 
6550 OperandMatchResultTy
6551 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) {
6552   int64_t Delay = 0;
6553   SMLoc S = getLoc();
6554 
6555   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6556     do {
6557       if (!parseDelay(Delay))
6558         return MatchOperand_ParseFail;
6559     } while (trySkipToken(AsmToken::Pipe));
6560   } else {
6561     if (!parseExpr(Delay))
6562       return MatchOperand_ParseFail;
6563   }
6564 
6565   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6566   return MatchOperand_Success;
6567 }
6568 
6569 bool
6570 AMDGPUOperand::isSWaitCnt() const {
6571   return isImm();
6572 }
6573 
6574 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); }
6575 
6576 //===----------------------------------------------------------------------===//
6577 // DepCtr
6578 //===----------------------------------------------------------------------===//
6579 
6580 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6581                                   StringRef DepCtrName) {
6582   switch (ErrorId) {
6583   case OPR_ID_UNKNOWN:
6584     Error(Loc, Twine("invalid counter name ", DepCtrName));
6585     return;
6586   case OPR_ID_UNSUPPORTED:
6587     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6588     return;
6589   case OPR_ID_DUPLICATE:
6590     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6591     return;
6592   case OPR_VAL_INVALID:
6593     Error(Loc, Twine("invalid value for ", DepCtrName));
6594     return;
6595   default:
6596     assert(false);
6597   }
6598 }
6599 
6600 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6601 
6602   using namespace llvm::AMDGPU::DepCtr;
6603 
6604   SMLoc DepCtrLoc = getLoc();
6605   StringRef DepCtrName = getTokenStr();
6606 
6607   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6608       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6609     return false;
6610 
6611   int64_t ExprVal;
6612   if (!parseExpr(ExprVal))
6613     return false;
6614 
6615   unsigned PrevOprMask = UsedOprMask;
6616   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6617 
6618   if (CntVal < 0) {
6619     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6620     return false;
6621   }
6622 
6623   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6624     return false;
6625 
6626   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6627     if (isToken(AsmToken::EndOfStatement)) {
6628       Error(getLoc(), "expected a counter name");
6629       return false;
6630     }
6631   }
6632 
6633   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6634   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6635   return true;
6636 }
6637 
6638 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6639   using namespace llvm::AMDGPU::DepCtr;
6640 
6641   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6642   SMLoc Loc = getLoc();
6643 
6644   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6645     unsigned UsedOprMask = 0;
6646     while (!isToken(AsmToken::EndOfStatement)) {
6647       if (!parseDepCtr(DepCtr, UsedOprMask))
6648         return MatchOperand_ParseFail;
6649     }
6650   } else {
6651     if (!parseExpr(DepCtr))
6652       return MatchOperand_ParseFail;
6653   }
6654 
6655   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6656   return MatchOperand_Success;
6657 }
6658 
6659 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6660 
6661 //===----------------------------------------------------------------------===//
6662 // hwreg
6663 //===----------------------------------------------------------------------===//
6664 
6665 bool
6666 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6667                                 OperandInfoTy &Offset,
6668                                 OperandInfoTy &Width) {
6669   using namespace llvm::AMDGPU::Hwreg;
6670 
6671   // The register may be specified by name or using a numeric code
6672   HwReg.Loc = getLoc();
6673   if (isToken(AsmToken::Identifier) &&
6674       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6675     HwReg.IsSymbolic = true;
6676     lex(); // skip register name
6677   } else if (!parseExpr(HwReg.Id, "a register name")) {
6678     return false;
6679   }
6680 
6681   if (trySkipToken(AsmToken::RParen))
6682     return true;
6683 
6684   // parse optional params
6685   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6686     return false;
6687 
6688   Offset.Loc = getLoc();
6689   if (!parseExpr(Offset.Id))
6690     return false;
6691 
6692   if (!skipToken(AsmToken::Comma, "expected a comma"))
6693     return false;
6694 
6695   Width.Loc = getLoc();
6696   return parseExpr(Width.Id) &&
6697          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6698 }
6699 
6700 bool
6701 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6702                                const OperandInfoTy &Offset,
6703                                const OperandInfoTy &Width) {
6704 
6705   using namespace llvm::AMDGPU::Hwreg;
6706 
6707   if (HwReg.IsSymbolic) {
6708     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6709       Error(HwReg.Loc,
6710             "specified hardware register is not supported on this GPU");
6711       return false;
6712     }
6713   } else {
6714     if (!isValidHwreg(HwReg.Id)) {
6715       Error(HwReg.Loc,
6716             "invalid code of hardware register: only 6-bit values are legal");
6717       return false;
6718     }
6719   }
6720   if (!isValidHwregOffset(Offset.Id)) {
6721     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6722     return false;
6723   }
6724   if (!isValidHwregWidth(Width.Id)) {
6725     Error(Width.Loc,
6726           "invalid bitfield width: only values from 1 to 32 are legal");
6727     return false;
6728   }
6729   return true;
6730 }
6731 
6732 OperandMatchResultTy
6733 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6734   using namespace llvm::AMDGPU::Hwreg;
6735 
6736   int64_t ImmVal = 0;
6737   SMLoc Loc = getLoc();
6738 
6739   if (trySkipId("hwreg", AsmToken::LParen)) {
6740     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6741     OperandInfoTy Offset(OFFSET_DEFAULT_);
6742     OperandInfoTy Width(WIDTH_DEFAULT_);
6743     if (parseHwregBody(HwReg, Offset, Width) &&
6744         validateHwreg(HwReg, Offset, Width)) {
6745       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6746     } else {
6747       return MatchOperand_ParseFail;
6748     }
6749   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6750     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6751       Error(Loc, "invalid immediate: only 16-bit values are legal");
6752       return MatchOperand_ParseFail;
6753     }
6754   } else {
6755     return MatchOperand_ParseFail;
6756   }
6757 
6758   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6759   return MatchOperand_Success;
6760 }
6761 
6762 bool AMDGPUOperand::isHwreg() const {
6763   return isImmTy(ImmTyHwreg);
6764 }
6765 
6766 //===----------------------------------------------------------------------===//
6767 // sendmsg
6768 //===----------------------------------------------------------------------===//
6769 
6770 bool
6771 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6772                                   OperandInfoTy &Op,
6773                                   OperandInfoTy &Stream) {
6774   using namespace llvm::AMDGPU::SendMsg;
6775 
6776   Msg.Loc = getLoc();
6777   if (isToken(AsmToken::Identifier) &&
6778       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6779     Msg.IsSymbolic = true;
6780     lex(); // skip message name
6781   } else if (!parseExpr(Msg.Id, "a message name")) {
6782     return false;
6783   }
6784 
6785   if (trySkipToken(AsmToken::Comma)) {
6786     Op.IsDefined = true;
6787     Op.Loc = getLoc();
6788     if (isToken(AsmToken::Identifier) &&
6789         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6790       lex(); // skip operation name
6791     } else if (!parseExpr(Op.Id, "an operation name")) {
6792       return false;
6793     }
6794 
6795     if (trySkipToken(AsmToken::Comma)) {
6796       Stream.IsDefined = true;
6797       Stream.Loc = getLoc();
6798       if (!parseExpr(Stream.Id))
6799         return false;
6800     }
6801   }
6802 
6803   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6804 }
6805 
6806 bool
6807 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6808                                  const OperandInfoTy &Op,
6809                                  const OperandInfoTy &Stream) {
6810   using namespace llvm::AMDGPU::SendMsg;
6811 
6812   // Validation strictness depends on whether message is specified
6813   // in a symbolic or in a numeric form. In the latter case
6814   // only encoding possibility is checked.
6815   bool Strict = Msg.IsSymbolic;
6816 
6817   if (Strict) {
6818     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6819       Error(Msg.Loc, "specified message id is not supported on this GPU");
6820       return false;
6821     }
6822   } else {
6823     if (!isValidMsgId(Msg.Id, getSTI())) {
6824       Error(Msg.Loc, "invalid message id");
6825       return false;
6826     }
6827   }
6828   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6829     if (Op.IsDefined) {
6830       Error(Op.Loc, "message does not support operations");
6831     } else {
6832       Error(Msg.Loc, "missing message operation");
6833     }
6834     return false;
6835   }
6836   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6837     Error(Op.Loc, "invalid operation id");
6838     return false;
6839   }
6840   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6841       Stream.IsDefined) {
6842     Error(Stream.Loc, "message operation does not support streams");
6843     return false;
6844   }
6845   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6846     Error(Stream.Loc, "invalid message stream id");
6847     return false;
6848   }
6849   return true;
6850 }
6851 
6852 OperandMatchResultTy
6853 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6854   using namespace llvm::AMDGPU::SendMsg;
6855 
6856   int64_t ImmVal = 0;
6857   SMLoc Loc = getLoc();
6858 
6859   if (trySkipId("sendmsg", AsmToken::LParen)) {
6860     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6861     OperandInfoTy Op(OP_NONE_);
6862     OperandInfoTy Stream(STREAM_ID_NONE_);
6863     if (parseSendMsgBody(Msg, Op, Stream) &&
6864         validateSendMsg(Msg, Op, Stream)) {
6865       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6866     } else {
6867       return MatchOperand_ParseFail;
6868     }
6869   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6870     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6871       Error(Loc, "invalid immediate: only 16-bit values are legal");
6872       return MatchOperand_ParseFail;
6873     }
6874   } else {
6875     return MatchOperand_ParseFail;
6876   }
6877 
6878   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6879   return MatchOperand_Success;
6880 }
6881 
6882 bool AMDGPUOperand::isSendMsg() const {
6883   return isImmTy(ImmTySendMsg);
6884 }
6885 
6886 //===----------------------------------------------------------------------===//
6887 // v_interp
6888 //===----------------------------------------------------------------------===//
6889 
6890 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6891   StringRef Str;
6892   SMLoc S = getLoc();
6893 
6894   if (!parseId(Str))
6895     return MatchOperand_NoMatch;
6896 
6897   int Slot = StringSwitch<int>(Str)
6898     .Case("p10", 0)
6899     .Case("p20", 1)
6900     .Case("p0", 2)
6901     .Default(-1);
6902 
6903   if (Slot == -1) {
6904     Error(S, "invalid interpolation slot");
6905     return MatchOperand_ParseFail;
6906   }
6907 
6908   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6909                                               AMDGPUOperand::ImmTyInterpSlot));
6910   return MatchOperand_Success;
6911 }
6912 
6913 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6914   StringRef Str;
6915   SMLoc S = getLoc();
6916 
6917   if (!parseId(Str))
6918     return MatchOperand_NoMatch;
6919 
6920   if (!Str.startswith("attr")) {
6921     Error(S, "invalid interpolation attribute");
6922     return MatchOperand_ParseFail;
6923   }
6924 
6925   StringRef Chan = Str.take_back(2);
6926   int AttrChan = StringSwitch<int>(Chan)
6927     .Case(".x", 0)
6928     .Case(".y", 1)
6929     .Case(".z", 2)
6930     .Case(".w", 3)
6931     .Default(-1);
6932   if (AttrChan == -1) {
6933     Error(S, "invalid or missing interpolation attribute channel");
6934     return MatchOperand_ParseFail;
6935   }
6936 
6937   Str = Str.drop_back(2).drop_front(4);
6938 
6939   uint8_t Attr;
6940   if (Str.getAsInteger(10, Attr)) {
6941     Error(S, "invalid or missing interpolation attribute number");
6942     return MatchOperand_ParseFail;
6943   }
6944 
6945   if (Attr > 63) {
6946     Error(S, "out of bounds interpolation attribute number");
6947     return MatchOperand_ParseFail;
6948   }
6949 
6950   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6951 
6952   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6953                                               AMDGPUOperand::ImmTyInterpAttr));
6954   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6955                                               AMDGPUOperand::ImmTyAttrChan));
6956   return MatchOperand_Success;
6957 }
6958 
6959 //===----------------------------------------------------------------------===//
6960 // exp
6961 //===----------------------------------------------------------------------===//
6962 
6963 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6964   using namespace llvm::AMDGPU::Exp;
6965 
6966   StringRef Str;
6967   SMLoc S = getLoc();
6968 
6969   if (!parseId(Str))
6970     return MatchOperand_NoMatch;
6971 
6972   unsigned Id = getTgtId(Str);
6973   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6974     Error(S, (Id == ET_INVALID) ?
6975                 "invalid exp target" :
6976                 "exp target is not supported on this GPU");
6977     return MatchOperand_ParseFail;
6978   }
6979 
6980   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6981                                               AMDGPUOperand::ImmTyExpTgt));
6982   return MatchOperand_Success;
6983 }
6984 
6985 //===----------------------------------------------------------------------===//
6986 // parser helpers
6987 //===----------------------------------------------------------------------===//
6988 
6989 bool
6990 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6991   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6992 }
6993 
6994 bool
6995 AMDGPUAsmParser::isId(const StringRef Id) const {
6996   return isId(getToken(), Id);
6997 }
6998 
6999 bool
7000 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7001   return getTokenKind() == Kind;
7002 }
7003 
7004 bool
7005 AMDGPUAsmParser::trySkipId(const StringRef Id) {
7006   if (isId(Id)) {
7007     lex();
7008     return true;
7009   }
7010   return false;
7011 }
7012 
7013 bool
7014 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7015   if (isToken(AsmToken::Identifier)) {
7016     StringRef Tok = getTokenStr();
7017     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
7018       lex();
7019       return true;
7020     }
7021   }
7022   return false;
7023 }
7024 
7025 bool
7026 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7027   if (isId(Id) && peekToken().is(Kind)) {
7028     lex();
7029     lex();
7030     return true;
7031   }
7032   return false;
7033 }
7034 
7035 bool
7036 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7037   if (isToken(Kind)) {
7038     lex();
7039     return true;
7040   }
7041   return false;
7042 }
7043 
7044 bool
7045 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7046                            const StringRef ErrMsg) {
7047   if (!trySkipToken(Kind)) {
7048     Error(getLoc(), ErrMsg);
7049     return false;
7050   }
7051   return true;
7052 }
7053 
7054 bool
7055 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7056   SMLoc S = getLoc();
7057 
7058   const MCExpr *Expr;
7059   if (Parser.parseExpression(Expr))
7060     return false;
7061 
7062   if (Expr->evaluateAsAbsolute(Imm))
7063     return true;
7064 
7065   if (Expected.empty()) {
7066     Error(S, "expected absolute expression");
7067   } else {
7068     Error(S, Twine("expected ", Expected) +
7069              Twine(" or an absolute expression"));
7070   }
7071   return false;
7072 }
7073 
7074 bool
7075 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7076   SMLoc S = getLoc();
7077 
7078   const MCExpr *Expr;
7079   if (Parser.parseExpression(Expr))
7080     return false;
7081 
7082   int64_t IntVal;
7083   if (Expr->evaluateAsAbsolute(IntVal)) {
7084     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7085   } else {
7086     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7087   }
7088   return true;
7089 }
7090 
7091 bool
7092 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7093   if (isToken(AsmToken::String)) {
7094     Val = getToken().getStringContents();
7095     lex();
7096     return true;
7097   } else {
7098     Error(getLoc(), ErrMsg);
7099     return false;
7100   }
7101 }
7102 
7103 bool
7104 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7105   if (isToken(AsmToken::Identifier)) {
7106     Val = getTokenStr();
7107     lex();
7108     return true;
7109   } else {
7110     if (!ErrMsg.empty())
7111       Error(getLoc(), ErrMsg);
7112     return false;
7113   }
7114 }
7115 
7116 AsmToken
7117 AMDGPUAsmParser::getToken() const {
7118   return Parser.getTok();
7119 }
7120 
7121 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7122   return isToken(AsmToken::EndOfStatement)
7123              ? getToken()
7124              : getLexer().peekTok(ShouldSkipSpace);
7125 }
7126 
7127 void
7128 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7129   auto TokCount = getLexer().peekTokens(Tokens);
7130 
7131   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7132     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7133 }
7134 
7135 AsmToken::TokenKind
7136 AMDGPUAsmParser::getTokenKind() const {
7137   return getLexer().getKind();
7138 }
7139 
7140 SMLoc
7141 AMDGPUAsmParser::getLoc() const {
7142   return getToken().getLoc();
7143 }
7144 
7145 StringRef
7146 AMDGPUAsmParser::getTokenStr() const {
7147   return getToken().getString();
7148 }
7149 
7150 void
7151 AMDGPUAsmParser::lex() {
7152   Parser.Lex();
7153 }
7154 
7155 SMLoc
7156 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7157                                const OperandVector &Operands) const {
7158   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7159     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7160     if (Test(Op))
7161       return Op.getStartLoc();
7162   }
7163   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7164 }
7165 
7166 SMLoc
7167 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7168                            const OperandVector &Operands) const {
7169   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7170   return getOperandLoc(Test, Operands);
7171 }
7172 
7173 SMLoc
7174 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7175                            const OperandVector &Operands) const {
7176   auto Test = [=](const AMDGPUOperand& Op) {
7177     return Op.isRegKind() && Op.getReg() == Reg;
7178   };
7179   return getOperandLoc(Test, Operands);
7180 }
7181 
7182 SMLoc
7183 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
7184   auto Test = [](const AMDGPUOperand& Op) {
7185     return Op.IsImmKindLiteral() || Op.isExpr();
7186   };
7187   return getOperandLoc(Test, Operands);
7188 }
7189 
7190 SMLoc
7191 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7192   auto Test = [](const AMDGPUOperand& Op) {
7193     return Op.isImmKindConst();
7194   };
7195   return getOperandLoc(Test, Operands);
7196 }
7197 
7198 //===----------------------------------------------------------------------===//
7199 // swizzle
7200 //===----------------------------------------------------------------------===//
7201 
7202 LLVM_READNONE
7203 static unsigned
7204 encodeBitmaskPerm(const unsigned AndMask,
7205                   const unsigned OrMask,
7206                   const unsigned XorMask) {
7207   using namespace llvm::AMDGPU::Swizzle;
7208 
7209   return BITMASK_PERM_ENC |
7210          (AndMask << BITMASK_AND_SHIFT) |
7211          (OrMask  << BITMASK_OR_SHIFT)  |
7212          (XorMask << BITMASK_XOR_SHIFT);
7213 }
7214 
7215 bool
7216 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7217                                      const unsigned MinVal,
7218                                      const unsigned MaxVal,
7219                                      const StringRef ErrMsg,
7220                                      SMLoc &Loc) {
7221   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7222     return false;
7223   }
7224   Loc = getLoc();
7225   if (!parseExpr(Op)) {
7226     return false;
7227   }
7228   if (Op < MinVal || Op > MaxVal) {
7229     Error(Loc, ErrMsg);
7230     return false;
7231   }
7232 
7233   return true;
7234 }
7235 
7236 bool
7237 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7238                                       const unsigned MinVal,
7239                                       const unsigned MaxVal,
7240                                       const StringRef ErrMsg) {
7241   SMLoc Loc;
7242   for (unsigned i = 0; i < OpNum; ++i) {
7243     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7244       return false;
7245   }
7246 
7247   return true;
7248 }
7249 
7250 bool
7251 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7252   using namespace llvm::AMDGPU::Swizzle;
7253 
7254   int64_t Lane[LANE_NUM];
7255   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7256                            "expected a 2-bit lane id")) {
7257     Imm = QUAD_PERM_ENC;
7258     for (unsigned I = 0; I < LANE_NUM; ++I) {
7259       Imm |= Lane[I] << (LANE_SHIFT * I);
7260     }
7261     return true;
7262   }
7263   return false;
7264 }
7265 
7266 bool
7267 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7268   using namespace llvm::AMDGPU::Swizzle;
7269 
7270   SMLoc Loc;
7271   int64_t GroupSize;
7272   int64_t LaneIdx;
7273 
7274   if (!parseSwizzleOperand(GroupSize,
7275                            2, 32,
7276                            "group size must be in the interval [2,32]",
7277                            Loc)) {
7278     return false;
7279   }
7280   if (!isPowerOf2_64(GroupSize)) {
7281     Error(Loc, "group size must be a power of two");
7282     return false;
7283   }
7284   if (parseSwizzleOperand(LaneIdx,
7285                           0, GroupSize - 1,
7286                           "lane id must be in the interval [0,group size - 1]",
7287                           Loc)) {
7288     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7289     return true;
7290   }
7291   return false;
7292 }
7293 
7294 bool
7295 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7296   using namespace llvm::AMDGPU::Swizzle;
7297 
7298   SMLoc Loc;
7299   int64_t GroupSize;
7300 
7301   if (!parseSwizzleOperand(GroupSize,
7302                            2, 32,
7303                            "group size must be in the interval [2,32]",
7304                            Loc)) {
7305     return false;
7306   }
7307   if (!isPowerOf2_64(GroupSize)) {
7308     Error(Loc, "group size must be a power of two");
7309     return false;
7310   }
7311 
7312   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7313   return true;
7314 }
7315 
7316 bool
7317 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7318   using namespace llvm::AMDGPU::Swizzle;
7319 
7320   SMLoc Loc;
7321   int64_t GroupSize;
7322 
7323   if (!parseSwizzleOperand(GroupSize,
7324                            1, 16,
7325                            "group size must be in the interval [1,16]",
7326                            Loc)) {
7327     return false;
7328   }
7329   if (!isPowerOf2_64(GroupSize)) {
7330     Error(Loc, "group size must be a power of two");
7331     return false;
7332   }
7333 
7334   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7335   return true;
7336 }
7337 
7338 bool
7339 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7340   using namespace llvm::AMDGPU::Swizzle;
7341 
7342   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7343     return false;
7344   }
7345 
7346   StringRef Ctl;
7347   SMLoc StrLoc = getLoc();
7348   if (!parseString(Ctl)) {
7349     return false;
7350   }
7351   if (Ctl.size() != BITMASK_WIDTH) {
7352     Error(StrLoc, "expected a 5-character mask");
7353     return false;
7354   }
7355 
7356   unsigned AndMask = 0;
7357   unsigned OrMask = 0;
7358   unsigned XorMask = 0;
7359 
7360   for (size_t i = 0; i < Ctl.size(); ++i) {
7361     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7362     switch(Ctl[i]) {
7363     default:
7364       Error(StrLoc, "invalid mask");
7365       return false;
7366     case '0':
7367       break;
7368     case '1':
7369       OrMask |= Mask;
7370       break;
7371     case 'p':
7372       AndMask |= Mask;
7373       break;
7374     case 'i':
7375       AndMask |= Mask;
7376       XorMask |= Mask;
7377       break;
7378     }
7379   }
7380 
7381   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7382   return true;
7383 }
7384 
7385 bool
7386 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7387 
7388   SMLoc OffsetLoc = getLoc();
7389 
7390   if (!parseExpr(Imm, "a swizzle macro")) {
7391     return false;
7392   }
7393   if (!isUInt<16>(Imm)) {
7394     Error(OffsetLoc, "expected a 16-bit offset");
7395     return false;
7396   }
7397   return true;
7398 }
7399 
7400 bool
7401 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7402   using namespace llvm::AMDGPU::Swizzle;
7403 
7404   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7405 
7406     SMLoc ModeLoc = getLoc();
7407     bool Ok = false;
7408 
7409     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7410       Ok = parseSwizzleQuadPerm(Imm);
7411     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7412       Ok = parseSwizzleBitmaskPerm(Imm);
7413     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7414       Ok = parseSwizzleBroadcast(Imm);
7415     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7416       Ok = parseSwizzleSwap(Imm);
7417     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7418       Ok = parseSwizzleReverse(Imm);
7419     } else {
7420       Error(ModeLoc, "expected a swizzle mode");
7421     }
7422 
7423     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7424   }
7425 
7426   return false;
7427 }
7428 
7429 OperandMatchResultTy
7430 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7431   SMLoc S = getLoc();
7432   int64_t Imm = 0;
7433 
7434   if (trySkipId("offset")) {
7435 
7436     bool Ok = false;
7437     if (skipToken(AsmToken::Colon, "expected a colon")) {
7438       if (trySkipId("swizzle")) {
7439         Ok = parseSwizzleMacro(Imm);
7440       } else {
7441         Ok = parseSwizzleOffset(Imm);
7442       }
7443     }
7444 
7445     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7446 
7447     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7448   } else {
7449     // Swizzle "offset" operand is optional.
7450     // If it is omitted, try parsing other optional operands.
7451     return parseOptionalOpr(Operands);
7452   }
7453 }
7454 
7455 bool
7456 AMDGPUOperand::isSwizzle() const {
7457   return isImmTy(ImmTySwizzle);
7458 }
7459 
7460 //===----------------------------------------------------------------------===//
7461 // VGPR Index Mode
7462 //===----------------------------------------------------------------------===//
7463 
7464 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7465 
7466   using namespace llvm::AMDGPU::VGPRIndexMode;
7467 
7468   if (trySkipToken(AsmToken::RParen)) {
7469     return OFF;
7470   }
7471 
7472   int64_t Imm = 0;
7473 
7474   while (true) {
7475     unsigned Mode = 0;
7476     SMLoc S = getLoc();
7477 
7478     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7479       if (trySkipId(IdSymbolic[ModeId])) {
7480         Mode = 1 << ModeId;
7481         break;
7482       }
7483     }
7484 
7485     if (Mode == 0) {
7486       Error(S, (Imm == 0)?
7487                "expected a VGPR index mode or a closing parenthesis" :
7488                "expected a VGPR index mode");
7489       return UNDEF;
7490     }
7491 
7492     if (Imm & Mode) {
7493       Error(S, "duplicate VGPR index mode");
7494       return UNDEF;
7495     }
7496     Imm |= Mode;
7497 
7498     if (trySkipToken(AsmToken::RParen))
7499       break;
7500     if (!skipToken(AsmToken::Comma,
7501                    "expected a comma or a closing parenthesis"))
7502       return UNDEF;
7503   }
7504 
7505   return Imm;
7506 }
7507 
7508 OperandMatchResultTy
7509 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7510 
7511   using namespace llvm::AMDGPU::VGPRIndexMode;
7512 
7513   int64_t Imm = 0;
7514   SMLoc S = getLoc();
7515 
7516   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7517     Imm = parseGPRIdxMacro();
7518     if (Imm == UNDEF)
7519       return MatchOperand_ParseFail;
7520   } else {
7521     if (getParser().parseAbsoluteExpression(Imm))
7522       return MatchOperand_ParseFail;
7523     if (Imm < 0 || !isUInt<4>(Imm)) {
7524       Error(S, "invalid immediate: only 4-bit values are legal");
7525       return MatchOperand_ParseFail;
7526     }
7527   }
7528 
7529   Operands.push_back(
7530       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7531   return MatchOperand_Success;
7532 }
7533 
7534 bool AMDGPUOperand::isGPRIdxMode() const {
7535   return isImmTy(ImmTyGprIdxMode);
7536 }
7537 
7538 //===----------------------------------------------------------------------===//
7539 // sopp branch targets
7540 //===----------------------------------------------------------------------===//
7541 
7542 OperandMatchResultTy
7543 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7544 
7545   // Make sure we are not parsing something
7546   // that looks like a label or an expression but is not.
7547   // This will improve error messages.
7548   if (isRegister() || isModifier())
7549     return MatchOperand_NoMatch;
7550 
7551   if (!parseExpr(Operands))
7552     return MatchOperand_ParseFail;
7553 
7554   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7555   assert(Opr.isImm() || Opr.isExpr());
7556   SMLoc Loc = Opr.getStartLoc();
7557 
7558   // Currently we do not support arbitrary expressions as branch targets.
7559   // Only labels and absolute expressions are accepted.
7560   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7561     Error(Loc, "expected an absolute expression or a label");
7562   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7563     Error(Loc, "expected a 16-bit signed jump offset");
7564   }
7565 
7566   return MatchOperand_Success;
7567 }
7568 
7569 //===----------------------------------------------------------------------===//
7570 // Boolean holding registers
7571 //===----------------------------------------------------------------------===//
7572 
7573 OperandMatchResultTy
7574 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7575   return parseReg(Operands);
7576 }
7577 
7578 //===----------------------------------------------------------------------===//
7579 // mubuf
7580 //===----------------------------------------------------------------------===//
7581 
7582 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7583   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7584 }
7585 
7586 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7587                                    const OperandVector &Operands,
7588                                    bool IsAtomic,
7589                                    bool IsLds) {
7590   OptionalImmIndexMap OptionalIdx;
7591   unsigned FirstOperandIdx = 1;
7592   bool IsAtomicReturn = false;
7593 
7594   if (IsAtomic) {
7595     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7596       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7597       if (!Op.isCPol())
7598         continue;
7599       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7600       break;
7601     }
7602 
7603     if (!IsAtomicReturn) {
7604       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7605       if (NewOpc != -1)
7606         Inst.setOpcode(NewOpc);
7607     }
7608 
7609     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7610                       SIInstrFlags::IsAtomicRet;
7611   }
7612 
7613   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7614     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7615 
7616     // Add the register arguments
7617     if (Op.isReg()) {
7618       Op.addRegOperands(Inst, 1);
7619       // Insert a tied src for atomic return dst.
7620       // This cannot be postponed as subsequent calls to
7621       // addImmOperands rely on correct number of MC operands.
7622       if (IsAtomicReturn && i == FirstOperandIdx)
7623         Op.addRegOperands(Inst, 1);
7624       continue;
7625     }
7626 
7627     // Handle the case where soffset is an immediate
7628     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7629       Op.addImmOperands(Inst, 1);
7630       continue;
7631     }
7632 
7633     // Handle tokens like 'offen' which are sometimes hard-coded into the
7634     // asm string.  There are no MCInst operands for these.
7635     if (Op.isToken()) {
7636       continue;
7637     }
7638     assert(Op.isImm());
7639 
7640     // Handle optional arguments
7641     OptionalIdx[Op.getImmTy()] = i;
7642   }
7643 
7644   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7645   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7646 
7647   if (!IsLds) { // tfe is not legal with lds opcodes
7648     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7649   }
7650   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7651 }
7652 
7653 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7654   OptionalImmIndexMap OptionalIdx;
7655 
7656   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7657     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7658 
7659     // Add the register arguments
7660     if (Op.isReg()) {
7661       Op.addRegOperands(Inst, 1);
7662       continue;
7663     }
7664 
7665     // Handle the case where soffset is an immediate
7666     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7667       Op.addImmOperands(Inst, 1);
7668       continue;
7669     }
7670 
7671     // Handle tokens like 'offen' which are sometimes hard-coded into the
7672     // asm string.  There are no MCInst operands for these.
7673     if (Op.isToken()) {
7674       continue;
7675     }
7676     assert(Op.isImm());
7677 
7678     // Handle optional arguments
7679     OptionalIdx[Op.getImmTy()] = i;
7680   }
7681 
7682   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7683                         AMDGPUOperand::ImmTyOffset);
7684   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7685   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7686   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7687   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7688 }
7689 
7690 //===----------------------------------------------------------------------===//
7691 // mimg
7692 //===----------------------------------------------------------------------===//
7693 
7694 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7695                               bool IsAtomic) {
7696   unsigned I = 1;
7697   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7698   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7699     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7700   }
7701 
7702   if (IsAtomic) {
7703     // Add src, same as dst
7704     assert(Desc.getNumDefs() == 1);
7705     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7706   }
7707 
7708   OptionalImmIndexMap OptionalIdx;
7709 
7710   for (unsigned E = Operands.size(); I != E; ++I) {
7711     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7712 
7713     // Add the register arguments
7714     if (Op.isReg()) {
7715       Op.addRegOperands(Inst, 1);
7716     } else if (Op.isImmModifier()) {
7717       OptionalIdx[Op.getImmTy()] = I;
7718     } else if (!Op.isToken()) {
7719       llvm_unreachable("unexpected operand type");
7720     }
7721   }
7722 
7723   bool IsGFX10Plus = isGFX10Plus();
7724 
7725   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7726   if (IsGFX10Plus)
7727     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7728   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7729   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7730   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7731   if (IsGFX10Plus)
7732     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7733   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7734     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7735   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7736   if (!IsGFX10Plus)
7737     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7738   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7739 }
7740 
7741 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7742   cvtMIMG(Inst, Operands, true);
7743 }
7744 
7745 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7746   OptionalImmIndexMap OptionalIdx;
7747   bool IsAtomicReturn = false;
7748 
7749   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7750     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7751     if (!Op.isCPol())
7752       continue;
7753     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7754     break;
7755   }
7756 
7757   if (!IsAtomicReturn) {
7758     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7759     if (NewOpc != -1)
7760       Inst.setOpcode(NewOpc);
7761   }
7762 
7763   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7764                     SIInstrFlags::IsAtomicRet;
7765 
7766   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7767     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7768 
7769     // Add the register arguments
7770     if (Op.isReg()) {
7771       Op.addRegOperands(Inst, 1);
7772       if (IsAtomicReturn && i == 1)
7773         Op.addRegOperands(Inst, 1);
7774       continue;
7775     }
7776 
7777     // Handle the case where soffset is an immediate
7778     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7779       Op.addImmOperands(Inst, 1);
7780       continue;
7781     }
7782 
7783     // Handle tokens like 'offen' which are sometimes hard-coded into the
7784     // asm string.  There are no MCInst operands for these.
7785     if (Op.isToken()) {
7786       continue;
7787     }
7788     assert(Op.isImm());
7789 
7790     // Handle optional arguments
7791     OptionalIdx[Op.getImmTy()] = i;
7792   }
7793 
7794   if ((int)Inst.getNumOperands() <=
7795       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7796     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7797   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7798 }
7799 
7800 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7801                                       const OperandVector &Operands) {
7802   for (unsigned I = 1; I < Operands.size(); ++I) {
7803     auto &Operand = (AMDGPUOperand &)*Operands[I];
7804     if (Operand.isReg())
7805       Operand.addRegOperands(Inst, 1);
7806   }
7807 
7808   Inst.addOperand(MCOperand::createImm(1)); // a16
7809 }
7810 
7811 //===----------------------------------------------------------------------===//
7812 // smrd
7813 //===----------------------------------------------------------------------===//
7814 
7815 bool AMDGPUOperand::isSMRDOffset8() const {
7816   return isImm() && isUInt<8>(getImm());
7817 }
7818 
7819 bool AMDGPUOperand::isSMEMOffset() const {
7820   return isImmTy(ImmTyNone) ||
7821          isImmTy(ImmTyOffset); // Offset range is checked later by validator.
7822 }
7823 
7824 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7825   // 32-bit literals are only supported on CI and we only want to use them
7826   // when the offset is > 8-bits.
7827   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7828 }
7829 
7830 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7831   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7832 }
7833 
7834 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7835   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7836 }
7837 
7838 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7839   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7840 }
7841 
7842 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7843   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7844 }
7845 
7846 //===----------------------------------------------------------------------===//
7847 // vop3
7848 //===----------------------------------------------------------------------===//
7849 
7850 static bool ConvertOmodMul(int64_t &Mul) {
7851   if (Mul != 1 && Mul != 2 && Mul != 4)
7852     return false;
7853 
7854   Mul >>= 1;
7855   return true;
7856 }
7857 
7858 static bool ConvertOmodDiv(int64_t &Div) {
7859   if (Div == 1) {
7860     Div = 0;
7861     return true;
7862   }
7863 
7864   if (Div == 2) {
7865     Div = 3;
7866     return true;
7867   }
7868 
7869   return false;
7870 }
7871 
7872 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7873 // This is intentional and ensures compatibility with sp3.
7874 // See bug 35397 for details.
7875 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7876   if (BoundCtrl == 0 || BoundCtrl == 1) {
7877     BoundCtrl = 1;
7878     return true;
7879   }
7880   return false;
7881 }
7882 
7883 // Note: the order in this table matches the order of operands in AsmString.
7884 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7885   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7886   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7887   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7888   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7889   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7890   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7891   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7892   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7893   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7894   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7895   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7896   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7897   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7898   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7899   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7900   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7901   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7902   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7903   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7904   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7905   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7906   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7907   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7908   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7909   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7910   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7911   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7912   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7913   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7914   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7915   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7916   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7917   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7918   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7919   {"dpp8",     AMDGPUOperand::ImmTyDPP8, false, nullptr},
7920   {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr},
7921   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7922   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7923   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7924   {"fi",   AMDGPUOperand::ImmTyDppFi, false, nullptr},
7925   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7926   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7927   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr},
7928   {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr},
7929   {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr}
7930 };
7931 
7932 void AMDGPUAsmParser::onBeginOfFile() {
7933   if (!getParser().getStreamer().getTargetStreamer() ||
7934       getSTI().getTargetTriple().getArch() == Triple::r600)
7935     return;
7936 
7937   if (!getTargetStreamer().getTargetID())
7938     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7939 
7940   if (isHsaAbiVersion3AndAbove(&getSTI()))
7941     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7942 }
7943 
7944 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7945 
7946   OperandMatchResultTy res = parseOptionalOpr(Operands);
7947 
7948   // This is a hack to enable hardcoded mandatory operands which follow
7949   // optional operands.
7950   //
7951   // Current design assumes that all operands after the first optional operand
7952   // are also optional. However implementation of some instructions violates
7953   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7954   //
7955   // To alleviate this problem, we have to (implicitly) parse extra operands
7956   // to make sure autogenerated parser of custom operands never hit hardcoded
7957   // mandatory operands.
7958 
7959   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7960     if (res != MatchOperand_Success ||
7961         isToken(AsmToken::EndOfStatement))
7962       break;
7963 
7964     trySkipToken(AsmToken::Comma);
7965     res = parseOptionalOpr(Operands);
7966   }
7967 
7968   return res;
7969 }
7970 
7971 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7972   OperandMatchResultTy res;
7973   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7974     // try to parse any optional operand here
7975     if (Op.IsBit) {
7976       res = parseNamedBit(Op.Name, Operands, Op.Type);
7977     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7978       res = parseOModOperand(Operands);
7979     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7980                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7981                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7982       res = parseSDWASel(Operands, Op.Name, Op.Type);
7983     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7984       res = parseSDWADstUnused(Operands);
7985     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7986                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7987                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7988                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7989       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7990                                         Op.ConvertResult);
7991     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7992       res = parseDim(Operands);
7993     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7994       res = parseCPol(Operands);
7995     } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) {
7996       res = parseDPP8(Operands);
7997     } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) {
7998       res = parseDPPCtrl(Operands);
7999     } else {
8000       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
8001       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
8002         res = parseOperandArrayWithPrefix("neg", Operands,
8003                                           AMDGPUOperand::ImmTyBLGP,
8004                                           nullptr);
8005       }
8006     }
8007     if (res != MatchOperand_NoMatch) {
8008       return res;
8009     }
8010   }
8011   return MatchOperand_NoMatch;
8012 }
8013 
8014 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
8015   StringRef Name = getTokenStr();
8016   if (Name == "mul") {
8017     return parseIntWithPrefix("mul", Operands,
8018                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8019   }
8020 
8021   if (Name == "div") {
8022     return parseIntWithPrefix("div", Operands,
8023                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8024   }
8025 
8026   return MatchOperand_NoMatch;
8027 }
8028 
8029 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8030 // the number of src operands present, then copies that bit into src0_modifiers.
8031 void cvtVOP3DstOpSelOnly(MCInst &Inst) {
8032   int Opc = Inst.getOpcode();
8033   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8034   if (OpSelIdx == -1)
8035     return;
8036 
8037   int SrcNum;
8038   const int Ops[] = { AMDGPU::OpName::src0,
8039                       AMDGPU::OpName::src1,
8040                       AMDGPU::OpName::src2 };
8041   for (SrcNum = 0;
8042        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
8043        ++SrcNum);
8044   assert(SrcNum > 0);
8045 
8046   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8047 
8048   if ((OpSel & (1 << SrcNum)) != 0) {
8049     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8050     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8051     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
8052   }
8053 }
8054 
8055 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8056                                    const OperandVector &Operands) {
8057   cvtVOP3P(Inst, Operands);
8058   cvtVOP3DstOpSelOnly(Inst);
8059 }
8060 
8061 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8062                                    OptionalImmIndexMap &OptionalIdx) {
8063   cvtVOP3P(Inst, Operands, OptionalIdx);
8064   cvtVOP3DstOpSelOnly(Inst);
8065 }
8066 
8067 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8068       // 1. This operand is input modifiers
8069   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8070       // 2. This is not last operand
8071       && Desc.NumOperands > (OpNum + 1)
8072       // 3. Next operand is register class
8073       && Desc.OpInfo[OpNum + 1].RegClass != -1
8074       // 4. Next register is not tied to any other operand
8075       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
8076 }
8077 
8078 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8079 {
8080   OptionalImmIndexMap OptionalIdx;
8081   unsigned Opc = Inst.getOpcode();
8082 
8083   unsigned I = 1;
8084   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8085   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8086     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8087   }
8088 
8089   for (unsigned E = Operands.size(); I != E; ++I) {
8090     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8091     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8092       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8093     } else if (Op.isInterpSlot() ||
8094                Op.isInterpAttr() ||
8095                Op.isAttrChan()) {
8096       Inst.addOperand(MCOperand::createImm(Op.getImm()));
8097     } else if (Op.isImmModifier()) {
8098       OptionalIdx[Op.getImmTy()] = I;
8099     } else {
8100       llvm_unreachable("unhandled operand type");
8101     }
8102   }
8103 
8104   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
8105     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
8106   }
8107 
8108   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8109     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8110   }
8111 
8112   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8113     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8114   }
8115 }
8116 
8117 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8118 {
8119   OptionalImmIndexMap OptionalIdx;
8120   unsigned Opc = Inst.getOpcode();
8121 
8122   unsigned I = 1;
8123   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8124   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8125     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8126   }
8127 
8128   for (unsigned E = Operands.size(); I != E; ++I) {
8129     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8130     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8131       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8132     } else if (Op.isImmModifier()) {
8133       OptionalIdx[Op.getImmTy()] = I;
8134     } else {
8135       llvm_unreachable("unhandled operand type");
8136     }
8137   }
8138 
8139   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8140 
8141   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8142   if (OpSelIdx != -1)
8143     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8144 
8145   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8146 
8147   if (OpSelIdx == -1)
8148     return;
8149 
8150   const int Ops[] = { AMDGPU::OpName::src0,
8151                       AMDGPU::OpName::src1,
8152                       AMDGPU::OpName::src2 };
8153   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8154                          AMDGPU::OpName::src1_modifiers,
8155                          AMDGPU::OpName::src2_modifiers };
8156 
8157   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8158 
8159   for (int J = 0; J < 3; ++J) {
8160     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8161     if (OpIdx == -1)
8162       break;
8163 
8164     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8165     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8166 
8167     if ((OpSel & (1 << J)) != 0)
8168       ModVal |= SISrcMods::OP_SEL_0;
8169     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8170         (OpSel & (1 << 3)) != 0)
8171       ModVal |= SISrcMods::DST_OP_SEL;
8172 
8173     Inst.getOperand(ModIdx).setImm(ModVal);
8174   }
8175 }
8176 
8177 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8178                               OptionalImmIndexMap &OptionalIdx) {
8179   unsigned Opc = Inst.getOpcode();
8180 
8181   unsigned I = 1;
8182   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8183   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8184     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8185   }
8186 
8187   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
8188     // This instruction has src modifiers
8189     for (unsigned E = Operands.size(); I != E; ++I) {
8190       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8191       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8192         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8193       } else if (Op.isImmModifier()) {
8194         OptionalIdx[Op.getImmTy()] = I;
8195       } else if (Op.isRegOrImm()) {
8196         Op.addRegOrImmOperands(Inst, 1);
8197       } else {
8198         llvm_unreachable("unhandled operand type");
8199       }
8200     }
8201   } else {
8202     // No src modifiers
8203     for (unsigned E = Operands.size(); I != E; ++I) {
8204       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8205       if (Op.isMod()) {
8206         OptionalIdx[Op.getImmTy()] = I;
8207       } else {
8208         Op.addRegOrImmOperands(Inst, 1);
8209       }
8210     }
8211   }
8212 
8213   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8214     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8215   }
8216 
8217   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8218     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8219   }
8220 
8221   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8222   // it has src2 register operand that is tied to dst operand
8223   // we don't allow modifiers for this operand in assembler so src2_modifiers
8224   // should be 0.
8225   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
8226       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
8227       Opc == AMDGPU::V_MAC_F32_e64_vi ||
8228       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
8229       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
8230       Opc == AMDGPU::V_MAC_F16_e64_vi ||
8231       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
8232       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
8233       Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
8234       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
8235       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
8236       Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
8237       Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
8238       Opc == AMDGPU::V_FMAC_F16_e64_gfx11) {
8239     auto it = Inst.begin();
8240     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8241     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8242     ++it;
8243     // Copy the operand to ensure it's not invalidated when Inst grows.
8244     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8245   }
8246 }
8247 
8248 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8249   OptionalImmIndexMap OptionalIdx;
8250   cvtVOP3(Inst, Operands, OptionalIdx);
8251 }
8252 
8253 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8254                                OptionalImmIndexMap &OptIdx) {
8255   const int Opc = Inst.getOpcode();
8256   const MCInstrDesc &Desc = MII.get(Opc);
8257 
8258   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8259 
8260   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
8261     assert(!IsPacked);
8262     Inst.addOperand(Inst.getOperand(0));
8263   }
8264 
8265   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8266   // instruction, and then figure out where to actually put the modifiers
8267 
8268   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8269   if (OpSelIdx != -1) {
8270     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8271   }
8272 
8273   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8274   if (OpSelHiIdx != -1) {
8275     int DefaultVal = IsPacked ? -1 : 0;
8276     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8277                           DefaultVal);
8278   }
8279 
8280   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8281   if (NegLoIdx != -1) {
8282     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8283     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8284   }
8285 
8286   const int Ops[] = { AMDGPU::OpName::src0,
8287                       AMDGPU::OpName::src1,
8288                       AMDGPU::OpName::src2 };
8289   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8290                          AMDGPU::OpName::src1_modifiers,
8291                          AMDGPU::OpName::src2_modifiers };
8292 
8293   unsigned OpSel = 0;
8294   unsigned OpSelHi = 0;
8295   unsigned NegLo = 0;
8296   unsigned NegHi = 0;
8297 
8298   if (OpSelIdx != -1)
8299     OpSel = Inst.getOperand(OpSelIdx).getImm();
8300 
8301   if (OpSelHiIdx != -1)
8302     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8303 
8304   if (NegLoIdx != -1) {
8305     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8306     NegLo = Inst.getOperand(NegLoIdx).getImm();
8307     NegHi = Inst.getOperand(NegHiIdx).getImm();
8308   }
8309 
8310   for (int J = 0; J < 3; ++J) {
8311     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8312     if (OpIdx == -1)
8313       break;
8314 
8315     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8316 
8317     if (ModIdx == -1)
8318       continue;
8319 
8320     uint32_t ModVal = 0;
8321 
8322     if ((OpSel & (1 << J)) != 0)
8323       ModVal |= SISrcMods::OP_SEL_0;
8324 
8325     if ((OpSelHi & (1 << J)) != 0)
8326       ModVal |= SISrcMods::OP_SEL_1;
8327 
8328     if ((NegLo & (1 << J)) != 0)
8329       ModVal |= SISrcMods::NEG;
8330 
8331     if ((NegHi & (1 << J)) != 0)
8332       ModVal |= SISrcMods::NEG_HI;
8333 
8334     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8335   }
8336 }
8337 
8338 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8339   OptionalImmIndexMap OptIdx;
8340   cvtVOP3(Inst, Operands, OptIdx);
8341   cvtVOP3P(Inst, Operands, OptIdx);
8342 }
8343 
8344 //===----------------------------------------------------------------------===//
8345 // VOPD
8346 //===----------------------------------------------------------------------===//
8347 
8348 OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8349   if (!hasVOPD(getSTI()))
8350     return MatchOperand_NoMatch;
8351 
8352   if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8353     SMLoc S = getLoc();
8354     lex();
8355     lex();
8356     Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8357     const MCExpr *Expr;
8358     if (isToken(AsmToken::Identifier) && !Parser.parseExpression(Expr)) {
8359       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8360       return MatchOperand_Success;
8361     }
8362     Error(S, "invalid VOPD :: usage");
8363     return MatchOperand_ParseFail;
8364   }
8365   return MatchOperand_NoMatch;
8366 }
8367 
8368 // Create VOPD MCInst operands using parsed assembler operands.
8369 // Parsed VOPD operands are ordered as follows:
8370 //   OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
8371 //   OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
8372 // If both OpX and OpY have an imm, the first imm has a different name:
8373 //   OpXMnemo dstX src0X [vsrc1X|immDeferred vsrc1X|vsrc1X immDeferred] '::'
8374 //   OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
8375 // MCInst operands have the following order:
8376 //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8377 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8378   auto addOp = [&](uint16_t i) { // NOLINT:function pointer
8379     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8380     if (Op.isReg()) {
8381       Op.addRegOperands(Inst, 1);
8382       return;
8383     }
8384     if (Op.isImm()) {
8385       Op.addImmOperands(Inst, 1);
8386       return;
8387     }
8388     // Handle tokens like 'offen' which are sometimes hard-coded into the
8389     // asm string.  There are no MCInst operands for these.
8390     if (Op.isToken()) {
8391       return;
8392     }
8393     llvm_unreachable("Unhandled operand type in cvtVOPD");
8394   };
8395 
8396   // Indices into MCInst.Operands
8397   const auto FmamkOpXImmMCIndex = 3; // dstX, dstY, src0X, imm, ...
8398   const auto FmaakOpXImmMCIndex = 4; // dstX, dstY, src0X, src1X, imm, ...
8399   const auto MinOpYImmMCIndex = 4;   // dstX, dstY, src0X, src0Y, imm, ...
8400 
8401   unsigned Opc = Inst.getOpcode();
8402   bool HasVsrc1X =
8403       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1X) != -1;
8404   bool HasImmX =
8405       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 ||
8406       (HasVsrc1X && (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) ==
8407                          FmamkOpXImmMCIndex ||
8408                      AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) ==
8409                          FmaakOpXImmMCIndex));
8410 
8411   bool HasVsrc1Y =
8412       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1Y) != -1;
8413   bool HasImmY =
8414       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 ||
8415       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) >=
8416           MinOpYImmMCIndex + HasVsrc1X;
8417 
8418   // Indices of parsed operands relative to dst
8419   const auto DstIdx = 0;
8420   const auto Src0Idx = 1;
8421   const auto Vsrc1OrImmIdx = 2;
8422 
8423   const auto OpXOperandsSize = 2 + HasImmX + HasVsrc1X;
8424   const auto BridgeTokensSize = 2; // Special VOPD tokens ('::' and OpYMnemo)
8425 
8426   // Offsets into parsed operands
8427   const auto OpXFirstOperandOffset = 1;
8428   const auto OpYFirstOperandOffset =
8429       OpXFirstOperandOffset + OpXOperandsSize + BridgeTokensSize;
8430 
8431   // Order of addOp calls determines MC operand order
8432   addOp(OpXFirstOperandOffset + DstIdx); // vdstX
8433   addOp(OpYFirstOperandOffset + DstIdx); // vdstY
8434 
8435   addOp(OpXFirstOperandOffset + Src0Idx); // src0X
8436   if (HasImmX) {
8437     // immX then vsrc1X for fmamk, vsrc1X then immX for fmaak
8438     addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx);
8439     addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx + 1);
8440   } else {
8441     if (HasVsrc1X) // all except v_mov
8442       addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1X
8443   }
8444 
8445   addOp(OpYFirstOperandOffset + Src0Idx); // src0Y
8446   if (HasImmY) {
8447     // immY then vsrc1Y for fmamk, vsrc1Y then immY for fmaak
8448     addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx);
8449     addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx + 1);
8450   } else {
8451     if (HasVsrc1Y) // all except v_mov
8452       addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1Y
8453   }
8454 }
8455 
8456 //===----------------------------------------------------------------------===//
8457 // dpp
8458 //===----------------------------------------------------------------------===//
8459 
8460 bool AMDGPUOperand::isDPP8() const {
8461   return isImmTy(ImmTyDPP8);
8462 }
8463 
8464 bool AMDGPUOperand::isDPPCtrl() const {
8465   using namespace AMDGPU::DPP;
8466 
8467   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8468   if (result) {
8469     int64_t Imm = getImm();
8470     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8471            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8472            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8473            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8474            (Imm == DppCtrl::WAVE_SHL1) ||
8475            (Imm == DppCtrl::WAVE_ROL1) ||
8476            (Imm == DppCtrl::WAVE_SHR1) ||
8477            (Imm == DppCtrl::WAVE_ROR1) ||
8478            (Imm == DppCtrl::ROW_MIRROR) ||
8479            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8480            (Imm == DppCtrl::BCAST15) ||
8481            (Imm == DppCtrl::BCAST31) ||
8482            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8483            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8484   }
8485   return false;
8486 }
8487 
8488 //===----------------------------------------------------------------------===//
8489 // mAI
8490 //===----------------------------------------------------------------------===//
8491 
8492 bool AMDGPUOperand::isBLGP() const {
8493   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8494 }
8495 
8496 bool AMDGPUOperand::isCBSZ() const {
8497   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8498 }
8499 
8500 bool AMDGPUOperand::isABID() const {
8501   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8502 }
8503 
8504 bool AMDGPUOperand::isS16Imm() const {
8505   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8506 }
8507 
8508 bool AMDGPUOperand::isU16Imm() const {
8509   return isImm() && isUInt<16>(getImm());
8510 }
8511 
8512 //===----------------------------------------------------------------------===//
8513 // dim
8514 //===----------------------------------------------------------------------===//
8515 
8516 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8517   // We want to allow "dim:1D" etc.,
8518   // but the initial 1 is tokenized as an integer.
8519   std::string Token;
8520   if (isToken(AsmToken::Integer)) {
8521     SMLoc Loc = getToken().getEndLoc();
8522     Token = std::string(getTokenStr());
8523     lex();
8524     if (getLoc() != Loc)
8525       return false;
8526   }
8527 
8528   StringRef Suffix;
8529   if (!parseId(Suffix))
8530     return false;
8531   Token += Suffix;
8532 
8533   StringRef DimId = Token;
8534   if (DimId.startswith("SQ_RSRC_IMG_"))
8535     DimId = DimId.drop_front(12);
8536 
8537   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8538   if (!DimInfo)
8539     return false;
8540 
8541   Encoding = DimInfo->Encoding;
8542   return true;
8543 }
8544 
8545 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8546   if (!isGFX10Plus())
8547     return MatchOperand_NoMatch;
8548 
8549   SMLoc S = getLoc();
8550 
8551   if (!trySkipId("dim", AsmToken::Colon))
8552     return MatchOperand_NoMatch;
8553 
8554   unsigned Encoding;
8555   SMLoc Loc = getLoc();
8556   if (!parseDimId(Encoding)) {
8557     Error(Loc, "invalid dim value");
8558     return MatchOperand_ParseFail;
8559   }
8560 
8561   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8562                                               AMDGPUOperand::ImmTyDim));
8563   return MatchOperand_Success;
8564 }
8565 
8566 //===----------------------------------------------------------------------===//
8567 // dpp
8568 //===----------------------------------------------------------------------===//
8569 
8570 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8571   SMLoc S = getLoc();
8572 
8573   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8574     return MatchOperand_NoMatch;
8575 
8576   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8577 
8578   int64_t Sels[8];
8579 
8580   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8581     return MatchOperand_ParseFail;
8582 
8583   for (size_t i = 0; i < 8; ++i) {
8584     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8585       return MatchOperand_ParseFail;
8586 
8587     SMLoc Loc = getLoc();
8588     if (getParser().parseAbsoluteExpression(Sels[i]))
8589       return MatchOperand_ParseFail;
8590     if (0 > Sels[i] || 7 < Sels[i]) {
8591       Error(Loc, "expected a 3-bit value");
8592       return MatchOperand_ParseFail;
8593     }
8594   }
8595 
8596   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8597     return MatchOperand_ParseFail;
8598 
8599   unsigned DPP8 = 0;
8600   for (size_t i = 0; i < 8; ++i)
8601     DPP8 |= (Sels[i] << (i * 3));
8602 
8603   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8604   return MatchOperand_Success;
8605 }
8606 
8607 bool
8608 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8609                                     const OperandVector &Operands) {
8610   if (Ctrl == "row_newbcast")
8611     return isGFX90A();
8612 
8613   if (Ctrl == "row_share" ||
8614       Ctrl == "row_xmask")
8615     return isGFX10Plus();
8616 
8617   if (Ctrl == "wave_shl" ||
8618       Ctrl == "wave_shr" ||
8619       Ctrl == "wave_rol" ||
8620       Ctrl == "wave_ror" ||
8621       Ctrl == "row_bcast")
8622     return isVI() || isGFX9();
8623 
8624   return Ctrl == "row_mirror" ||
8625          Ctrl == "row_half_mirror" ||
8626          Ctrl == "quad_perm" ||
8627          Ctrl == "row_shl" ||
8628          Ctrl == "row_shr" ||
8629          Ctrl == "row_ror";
8630 }
8631 
8632 int64_t
8633 AMDGPUAsmParser::parseDPPCtrlPerm() {
8634   // quad_perm:[%d,%d,%d,%d]
8635 
8636   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8637     return -1;
8638 
8639   int64_t Val = 0;
8640   for (int i = 0; i < 4; ++i) {
8641     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8642       return -1;
8643 
8644     int64_t Temp;
8645     SMLoc Loc = getLoc();
8646     if (getParser().parseAbsoluteExpression(Temp))
8647       return -1;
8648     if (Temp < 0 || Temp > 3) {
8649       Error(Loc, "expected a 2-bit value");
8650       return -1;
8651     }
8652 
8653     Val += (Temp << i * 2);
8654   }
8655 
8656   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8657     return -1;
8658 
8659   return Val;
8660 }
8661 
8662 int64_t
8663 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8664   using namespace AMDGPU::DPP;
8665 
8666   // sel:%d
8667 
8668   int64_t Val;
8669   SMLoc Loc = getLoc();
8670 
8671   if (getParser().parseAbsoluteExpression(Val))
8672     return -1;
8673 
8674   struct DppCtrlCheck {
8675     int64_t Ctrl;
8676     int Lo;
8677     int Hi;
8678   };
8679 
8680   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8681     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8682     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8683     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8684     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8685     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8686     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8687     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8688     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8689     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8690     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8691     .Default({-1, 0, 0});
8692 
8693   bool Valid;
8694   if (Check.Ctrl == -1) {
8695     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8696     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8697   } else {
8698     Valid = Check.Lo <= Val && Val <= Check.Hi;
8699     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8700   }
8701 
8702   if (!Valid) {
8703     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8704     return -1;
8705   }
8706 
8707   return Val;
8708 }
8709 
8710 OperandMatchResultTy
8711 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8712   using namespace AMDGPU::DPP;
8713 
8714   if (!isToken(AsmToken::Identifier) ||
8715       !isSupportedDPPCtrl(getTokenStr(), Operands))
8716     return MatchOperand_NoMatch;
8717 
8718   SMLoc S = getLoc();
8719   int64_t Val = -1;
8720   StringRef Ctrl;
8721 
8722   parseId(Ctrl);
8723 
8724   if (Ctrl == "row_mirror") {
8725     Val = DppCtrl::ROW_MIRROR;
8726   } else if (Ctrl == "row_half_mirror") {
8727     Val = DppCtrl::ROW_HALF_MIRROR;
8728   } else {
8729     if (skipToken(AsmToken::Colon, "expected a colon")) {
8730       if (Ctrl == "quad_perm") {
8731         Val = parseDPPCtrlPerm();
8732       } else {
8733         Val = parseDPPCtrlSel(Ctrl);
8734       }
8735     }
8736   }
8737 
8738   if (Val == -1)
8739     return MatchOperand_ParseFail;
8740 
8741   Operands.push_back(
8742     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8743   return MatchOperand_Success;
8744 }
8745 
8746 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8747   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8748 }
8749 
8750 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8751   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8752 }
8753 
8754 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8755   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8756 }
8757 
8758 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8759   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8760 }
8761 
8762 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8763   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8764 }
8765 
8766 // Add dummy $old operand
8767 void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst,
8768                                         const OperandVector &Operands,
8769                                         bool IsDPP8) {
8770   Inst.addOperand(MCOperand::createReg(0));
8771   cvtVOP3DPP(Inst, Operands, IsDPP8);
8772 }
8773 
8774 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8775   OptionalImmIndexMap OptionalIdx;
8776   unsigned Opc = Inst.getOpcode();
8777   bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8778   unsigned I = 1;
8779   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8780   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8781     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8782   }
8783 
8784   int Fi = 0;
8785   for (unsigned E = Operands.size(); I != E; ++I) {
8786     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8787                                             MCOI::TIED_TO);
8788     if (TiedTo != -1) {
8789       assert((unsigned)TiedTo < Inst.getNumOperands());
8790       // handle tied old or src2 for MAC instructions
8791       Inst.addOperand(Inst.getOperand(TiedTo));
8792     }
8793     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8794     // Add the register arguments
8795     if (IsDPP8 && Op.isFI()) {
8796       Fi = Op.getImm();
8797     } else if (HasModifiers &&
8798                isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8799       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8800     } else if (Op.isReg()) {
8801       Op.addRegOperands(Inst, 1);
8802     } else if (Op.isImm() &&
8803                Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) {
8804       assert(!HasModifiers && "Case should be unreachable with modifiers");
8805       assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
8806       Op.addImmOperands(Inst, 1);
8807     } else if (Op.isImm()) {
8808       OptionalIdx[Op.getImmTy()] = I;
8809     } else {
8810       llvm_unreachable("unhandled operand type");
8811     }
8812   }
8813   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8814     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8815   }
8816   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8817     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8818   }
8819   if (Desc.TSFlags & SIInstrFlags::VOP3P)
8820     cvtVOP3P(Inst, Operands, OptionalIdx);
8821   else if (Desc.TSFlags & SIInstrFlags::VOP3)
8822     cvtVOP3OpSel(Inst, Operands, OptionalIdx);
8823   else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) {
8824     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8825   }
8826 
8827   if (IsDPP8) {
8828     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
8829     using namespace llvm::AMDGPU::DPP;
8830     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8831   } else {
8832     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
8833     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8834     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8835     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8836     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8837       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8838     }
8839   }
8840 }
8841 
8842 // Add dummy $old operand
8843 void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst,
8844                                       const OperandVector &Operands,
8845                                       bool IsDPP8) {
8846   Inst.addOperand(MCOperand::createReg(0));
8847   cvtDPP(Inst, Operands, IsDPP8);
8848 }
8849 
8850 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8851   OptionalImmIndexMap OptionalIdx;
8852 
8853   unsigned Opc = Inst.getOpcode();
8854   bool HasModifiers =
8855       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8856   unsigned I = 1;
8857   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8858   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8859     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8860   }
8861 
8862   int Fi = 0;
8863   for (unsigned E = Operands.size(); I != E; ++I) {
8864     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8865                                             MCOI::TIED_TO);
8866     if (TiedTo != -1) {
8867       assert((unsigned)TiedTo < Inst.getNumOperands());
8868       // handle tied old or src2 for MAC instructions
8869       Inst.addOperand(Inst.getOperand(TiedTo));
8870     }
8871     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8872     // Add the register arguments
8873     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8874       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8875       // Skip it.
8876       continue;
8877     }
8878 
8879     if (IsDPP8) {
8880       if (Op.isDPP8()) {
8881         Op.addImmOperands(Inst, 1);
8882       } else if (HasModifiers &&
8883                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8884         Op.addRegWithFPInputModsOperands(Inst, 2);
8885       } else if (Op.isFI()) {
8886         Fi = Op.getImm();
8887       } else if (Op.isReg()) {
8888         Op.addRegOperands(Inst, 1);
8889       } else {
8890         llvm_unreachable("Invalid operand type");
8891       }
8892     } else {
8893       if (HasModifiers &&
8894           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8895         Op.addRegWithFPInputModsOperands(Inst, 2);
8896       } else if (Op.isReg()) {
8897         Op.addRegOperands(Inst, 1);
8898       } else if (Op.isDPPCtrl()) {
8899         Op.addImmOperands(Inst, 1);
8900       } else if (Op.isImm()) {
8901         // Handle optional arguments
8902         OptionalIdx[Op.getImmTy()] = I;
8903       } else {
8904         llvm_unreachable("Invalid operand type");
8905       }
8906     }
8907   }
8908 
8909   if (IsDPP8) {
8910     using namespace llvm::AMDGPU::DPP;
8911     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8912   } else {
8913     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8914     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8915     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8916     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8917       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8918     }
8919   }
8920 }
8921 
8922 //===----------------------------------------------------------------------===//
8923 // sdwa
8924 //===----------------------------------------------------------------------===//
8925 
8926 OperandMatchResultTy
8927 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8928                               AMDGPUOperand::ImmTy Type) {
8929   using namespace llvm::AMDGPU::SDWA;
8930 
8931   SMLoc S = getLoc();
8932   StringRef Value;
8933   OperandMatchResultTy res;
8934 
8935   SMLoc StringLoc;
8936   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8937   if (res != MatchOperand_Success) {
8938     return res;
8939   }
8940 
8941   int64_t Int;
8942   Int = StringSwitch<int64_t>(Value)
8943         .Case("BYTE_0", SdwaSel::BYTE_0)
8944         .Case("BYTE_1", SdwaSel::BYTE_1)
8945         .Case("BYTE_2", SdwaSel::BYTE_2)
8946         .Case("BYTE_3", SdwaSel::BYTE_3)
8947         .Case("WORD_0", SdwaSel::WORD_0)
8948         .Case("WORD_1", SdwaSel::WORD_1)
8949         .Case("DWORD", SdwaSel::DWORD)
8950         .Default(0xffffffff);
8951 
8952   if (Int == 0xffffffff) {
8953     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8954     return MatchOperand_ParseFail;
8955   }
8956 
8957   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8958   return MatchOperand_Success;
8959 }
8960 
8961 OperandMatchResultTy
8962 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8963   using namespace llvm::AMDGPU::SDWA;
8964 
8965   SMLoc S = getLoc();
8966   StringRef Value;
8967   OperandMatchResultTy res;
8968 
8969   SMLoc StringLoc;
8970   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8971   if (res != MatchOperand_Success) {
8972     return res;
8973   }
8974 
8975   int64_t Int;
8976   Int = StringSwitch<int64_t>(Value)
8977         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8978         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8979         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8980         .Default(0xffffffff);
8981 
8982   if (Int == 0xffffffff) {
8983     Error(StringLoc, "invalid dst_unused value");
8984     return MatchOperand_ParseFail;
8985   }
8986 
8987   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8988   return MatchOperand_Success;
8989 }
8990 
8991 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8992   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8993 }
8994 
8995 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8996   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8997 }
8998 
8999 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9000   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9001 }
9002 
9003 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9004   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9005 }
9006 
9007 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9008   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9009 }
9010 
9011 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9012                               uint64_t BasicInstType,
9013                               bool SkipDstVcc,
9014                               bool SkipSrcVcc) {
9015   using namespace llvm::AMDGPU::SDWA;
9016 
9017   OptionalImmIndexMap OptionalIdx;
9018   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9019   bool SkippedVcc = false;
9020 
9021   unsigned I = 1;
9022   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9023   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9024     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9025   }
9026 
9027   for (unsigned E = Operands.size(); I != E; ++I) {
9028     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9029     if (SkipVcc && !SkippedVcc && Op.isReg() &&
9030         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9031       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9032       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9033       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9034       // Skip VCC only if we didn't skip it on previous iteration.
9035       // Note that src0 and src1 occupy 2 slots each because of modifiers.
9036       if (BasicInstType == SIInstrFlags::VOP2 &&
9037           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9038            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9039         SkippedVcc = true;
9040         continue;
9041       } else if (BasicInstType == SIInstrFlags::VOPC &&
9042                  Inst.getNumOperands() == 0) {
9043         SkippedVcc = true;
9044         continue;
9045       }
9046     }
9047     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9048       Op.addRegOrImmWithInputModsOperands(Inst, 2);
9049     } else if (Op.isImm()) {
9050       // Handle optional arguments
9051       OptionalIdx[Op.getImmTy()] = I;
9052     } else {
9053       llvm_unreachable("Invalid operand type");
9054     }
9055     SkippedVcc = false;
9056   }
9057 
9058   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
9059       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
9060       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
9061     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9062     switch (BasicInstType) {
9063     case SIInstrFlags::VOP1:
9064       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9065       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
9066         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9067       }
9068       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
9069       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
9070       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
9071       break;
9072 
9073     case SIInstrFlags::VOP2:
9074       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9075       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
9076         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9077       }
9078       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
9079       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
9080       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
9081       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
9082       break;
9083 
9084     case SIInstrFlags::VOPC:
9085       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
9086         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9087       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
9088       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
9089       break;
9090 
9091     default:
9092       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9093     }
9094   }
9095 
9096   // special case v_mac_{f16, f32}:
9097   // it has src2 register operand that is tied to dst operand
9098   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9099       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
9100     auto it = Inst.begin();
9101     std::advance(
9102       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9103     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9104   }
9105 }
9106 
9107 //===----------------------------------------------------------------------===//
9108 // mAI
9109 //===----------------------------------------------------------------------===//
9110 
9111 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
9112   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
9113 }
9114 
9115 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
9116   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
9117 }
9118 
9119 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
9120   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
9121 }
9122 
9123 /// Force static initialization.
9124 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
9125   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
9126   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
9127 }
9128 
9129 #define GET_REGISTER_MATCHER
9130 #define GET_MATCHER_IMPLEMENTATION
9131 #define GET_MNEMONIC_SPELL_CHECKER
9132 #define GET_MNEMONIC_CHECKER
9133 #include "AMDGPUGenAsmMatcher.inc"
9134 
9135 // This function should be defined after auto-generated include so that we have
9136 // MatchClassKind enum defined
9137 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9138                                                      unsigned Kind) {
9139   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9140   // But MatchInstructionImpl() expects to meet token and fails to validate
9141   // operand. This method checks if we are given immediate operand but expect to
9142   // get corresponding token.
9143   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9144   switch (Kind) {
9145   case MCK_addr64:
9146     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9147   case MCK_gds:
9148     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9149   case MCK_lds:
9150     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9151   case MCK_idxen:
9152     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9153   case MCK_offen:
9154     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9155   case MCK_SSrcB32:
9156     // When operands have expression values, they will return true for isToken,
9157     // because it is not possible to distinguish between a token and an
9158     // expression at parse time. MatchInstructionImpl() will always try to
9159     // match an operand as a token, when isToken returns true, and when the
9160     // name of the expression is not a valid token, the match will fail,
9161     // so we need to handle it here.
9162     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
9163   case MCK_SSrcF32:
9164     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
9165   case MCK_SoppBrTarget:
9166     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
9167   case MCK_VReg32OrOff:
9168     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9169   case MCK_InterpSlot:
9170     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9171   case MCK_Attr:
9172     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9173   case MCK_AttrChan:
9174     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
9175   case MCK_ImmSMEMOffset:
9176     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
9177   case MCK_SReg_64:
9178   case MCK_SReg_64_XEXEC:
9179     // Null is defined as a 32-bit register but
9180     // it should also be enabled with 64-bit operands.
9181     // The following code enables it for SReg_64 operands
9182     // used as source and destination. Remaining source
9183     // operands are handled in isInlinableImm.
9184     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9185   default:
9186     return Match_InvalidOperand;
9187   }
9188 }
9189 
9190 //===----------------------------------------------------------------------===//
9191 // endpgm
9192 //===----------------------------------------------------------------------===//
9193 
9194 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
9195   SMLoc S = getLoc();
9196   int64_t Imm = 0;
9197 
9198   if (!parseExpr(Imm)) {
9199     // The operand is optional, if not present default to 0
9200     Imm = 0;
9201   }
9202 
9203   if (!isUInt<16>(Imm)) {
9204     Error(S, "expected a 16-bit value");
9205     return MatchOperand_ParseFail;
9206   }
9207 
9208   Operands.push_back(
9209       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9210   return MatchOperand_Success;
9211 }
9212 
9213 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9214 
9215 //===----------------------------------------------------------------------===//
9216 // LDSDIR
9217 //===----------------------------------------------------------------------===//
9218 
9219 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const {
9220   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST);
9221 }
9222 
9223 bool AMDGPUOperand::isWaitVDST() const {
9224   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9225 }
9226 
9227 //===----------------------------------------------------------------------===//
9228 // VINTERP
9229 //===----------------------------------------------------------------------===//
9230 
9231 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const {
9232   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP);
9233 }
9234 
9235 bool AMDGPUOperand::isWaitEXP() const {
9236   return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9237 }
9238