1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCInstrDesc.h"
29 #include "llvm/MC/MCParser/MCAsmLexer.h"
30 #include "llvm/MC/MCParser/MCAsmParser.h"
31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
32 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/AMDGPUMetadata.h"
36 #include "llvm/Support/AMDHSAKernelDescriptor.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/MachineValueType.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/TargetParser.h"
41 
42 using namespace llvm;
43 using namespace llvm::AMDGPU;
44 using namespace llvm::amdhsa;
45 
46 namespace {
47 
48 class AMDGPUAsmParser;
49 
50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
51 
52 //===----------------------------------------------------------------------===//
53 // Operand
54 //===----------------------------------------------------------------------===//
55 
56 class AMDGPUOperand : public MCParsedAsmOperand {
57   enum KindTy {
58     Token,
59     Immediate,
60     Register,
61     Expression
62   } Kind;
63 
64   SMLoc StartLoc, EndLoc;
65   const AMDGPUAsmParser *AsmParser;
66 
67 public:
68   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
69       : Kind(Kind_), AsmParser(AsmParser_) {}
70 
71   using Ptr = std::unique_ptr<AMDGPUOperand>;
72 
73   struct Modifiers {
74     bool Abs = false;
75     bool Neg = false;
76     bool Sext = false;
77 
78     bool hasFPModifiers() const { return Abs || Neg; }
79     bool hasIntModifiers() const { return Sext; }
80     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
81 
82     int64_t getFPModifiersOperand() const {
83       int64_t Operand = 0;
84       Operand |= Abs ? SISrcMods::ABS : 0u;
85       Operand |= Neg ? SISrcMods::NEG : 0u;
86       return Operand;
87     }
88 
89     int64_t getIntModifiersOperand() const {
90       int64_t Operand = 0;
91       Operand |= Sext ? SISrcMods::SEXT : 0u;
92       return Operand;
93     }
94 
95     int64_t getModifiersOperand() const {
96       assert(!(hasFPModifiers() && hasIntModifiers())
97            && "fp and int modifiers should not be used simultaneously");
98       if (hasFPModifiers()) {
99         return getFPModifiersOperand();
100       } else if (hasIntModifiers()) {
101         return getIntModifiersOperand();
102       } else {
103         return 0;
104       }
105     }
106 
107     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
108   };
109 
110   enum ImmTy {
111     ImmTyNone,
112     ImmTyGDS,
113     ImmTyLDS,
114     ImmTyOffen,
115     ImmTyIdxen,
116     ImmTyAddr64,
117     ImmTyOffset,
118     ImmTyInstOffset,
119     ImmTyOffset0,
120     ImmTyOffset1,
121     ImmTyCPol,
122     ImmTySWZ,
123     ImmTyTFE,
124     ImmTyD16,
125     ImmTyClampSI,
126     ImmTyOModSI,
127     ImmTySdwaDstSel,
128     ImmTySdwaSrc0Sel,
129     ImmTySdwaSrc1Sel,
130     ImmTySdwaDstUnused,
131     ImmTyDMask,
132     ImmTyDim,
133     ImmTyUNorm,
134     ImmTyDA,
135     ImmTyR128A16,
136     ImmTyA16,
137     ImmTyLWE,
138     ImmTyExpTgt,
139     ImmTyExpCompr,
140     ImmTyExpVM,
141     ImmTyFORMAT,
142     ImmTyHwreg,
143     ImmTyOff,
144     ImmTySendMsg,
145     ImmTyInterpSlot,
146     ImmTyInterpAttr,
147     ImmTyAttrChan,
148     ImmTyOpSel,
149     ImmTyOpSelHi,
150     ImmTyNegLo,
151     ImmTyNegHi,
152     ImmTyDPP8,
153     ImmTyDppCtrl,
154     ImmTyDppRowMask,
155     ImmTyDppBankMask,
156     ImmTyDppBoundCtrl,
157     ImmTyDppFi,
158     ImmTySwizzle,
159     ImmTyGprIdxMode,
160     ImmTyHigh,
161     ImmTyBLGP,
162     ImmTyCBSZ,
163     ImmTyABID,
164     ImmTyEndpgm,
165     ImmTyWaitVDST,
166     ImmTyWaitEXP,
167   };
168 
169   enum ImmKindTy {
170     ImmKindTyNone,
171     ImmKindTyLiteral,
172     ImmKindTyConst,
173   };
174 
175 private:
176   struct TokOp {
177     const char *Data;
178     unsigned Length;
179   };
180 
181   struct ImmOp {
182     int64_t Val;
183     ImmTy Type;
184     bool IsFPImm;
185     mutable ImmKindTy Kind;
186     Modifiers Mods;
187   };
188 
189   struct RegOp {
190     unsigned RegNo;
191     Modifiers Mods;
192   };
193 
194   union {
195     TokOp Tok;
196     ImmOp Imm;
197     RegOp Reg;
198     const MCExpr *Expr;
199   };
200 
201 public:
202   bool isToken() const override {
203     if (Kind == Token)
204       return true;
205 
206     // When parsing operands, we can't always tell if something was meant to be
207     // a token, like 'gds', or an expression that references a global variable.
208     // In this case, we assume the string is an expression, and if we need to
209     // interpret is a token, then we treat the symbol name as the token.
210     return isSymbolRefExpr();
211   }
212 
213   bool isSymbolRefExpr() const {
214     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
215   }
216 
217   bool isImm() const override {
218     return Kind == Immediate;
219   }
220 
221   void setImmKindNone() const {
222     assert(isImm());
223     Imm.Kind = ImmKindTyNone;
224   }
225 
226   void setImmKindLiteral() const {
227     assert(isImm());
228     Imm.Kind = ImmKindTyLiteral;
229   }
230 
231   void setImmKindConst() const {
232     assert(isImm());
233     Imm.Kind = ImmKindTyConst;
234   }
235 
236   bool IsImmKindLiteral() const {
237     return isImm() && Imm.Kind == ImmKindTyLiteral;
238   }
239 
240   bool isImmKindConst() const {
241     return isImm() && Imm.Kind == ImmKindTyConst;
242   }
243 
244   bool isInlinableImm(MVT type) const;
245   bool isLiteralImm(MVT type) const;
246 
247   bool isRegKind() const {
248     return Kind == Register;
249   }
250 
251   bool isReg() const override {
252     return isRegKind() && !hasModifiers();
253   }
254 
255   bool isRegOrInline(unsigned RCID, MVT type) const {
256     return isRegClass(RCID) || isInlinableImm(type);
257   }
258 
259   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
260     return isRegOrInline(RCID, type) || isLiteralImm(type);
261   }
262 
263   bool isRegOrImmWithInt16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
265   }
266 
267   bool isRegOrImmWithInt32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
269   }
270 
271   bool isRegOrInlineImmWithInt16InputMods() const {
272     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
273   }
274 
275   bool isRegOrInlineImmWithInt32InputMods() const {
276     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
277   }
278 
279   bool isRegOrImmWithInt64InputMods() const {
280     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
281   }
282 
283   bool isRegOrImmWithFP16InputMods() const {
284     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
285   }
286 
287   bool isRegOrImmWithFP32InputMods() const {
288     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
289   }
290 
291   bool isRegOrImmWithFP64InputMods() const {
292     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
293   }
294 
295   bool isRegOrInlineImmWithFP16InputMods() const {
296     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
297   }
298 
299   bool isRegOrInlineImmWithFP32InputMods() const {
300     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
301   }
302 
303 
304   bool isVReg() const {
305     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
306            isRegClass(AMDGPU::VReg_64RegClassID) ||
307            isRegClass(AMDGPU::VReg_96RegClassID) ||
308            isRegClass(AMDGPU::VReg_128RegClassID) ||
309            isRegClass(AMDGPU::VReg_160RegClassID) ||
310            isRegClass(AMDGPU::VReg_192RegClassID) ||
311            isRegClass(AMDGPU::VReg_256RegClassID) ||
312            isRegClass(AMDGPU::VReg_512RegClassID) ||
313            isRegClass(AMDGPU::VReg_1024RegClassID);
314   }
315 
316   bool isVReg32() const {
317     return isRegClass(AMDGPU::VGPR_32RegClassID);
318   }
319 
320   bool isVReg32OrOff() const {
321     return isOff() || isVReg32();
322   }
323 
324   bool isNull() const {
325     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
326   }
327 
328   bool isVRegWithInputMods() const;
329 
330   bool isSDWAOperand(MVT type) const;
331   bool isSDWAFP16Operand() const;
332   bool isSDWAFP32Operand() const;
333   bool isSDWAInt16Operand() const;
334   bool isSDWAInt32Operand() const;
335 
336   bool isImmTy(ImmTy ImmT) const {
337     return isImm() && Imm.Type == ImmT;
338   }
339 
340   bool isImmModifier() const {
341     return isImm() && Imm.Type != ImmTyNone;
342   }
343 
344   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
345   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
346   bool isDMask() const { return isImmTy(ImmTyDMask); }
347   bool isDim() const { return isImmTy(ImmTyDim); }
348   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
349   bool isDA() const { return isImmTy(ImmTyDA); }
350   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
351   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
352   bool isLWE() const { return isImmTy(ImmTyLWE); }
353   bool isOff() const { return isImmTy(ImmTyOff); }
354   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
355   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
356   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
357   bool isOffen() const { return isImmTy(ImmTyOffen); }
358   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
359   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
360   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
361   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
362   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
363 
364   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
365   bool isGDS() const { return isImmTy(ImmTyGDS); }
366   bool isLDS() const { return isImmTy(ImmTyLDS); }
367   bool isCPol() const { return isImmTy(ImmTyCPol); }
368   bool isSWZ() const { return isImmTy(ImmTySWZ); }
369   bool isTFE() const { return isImmTy(ImmTyTFE); }
370   bool isD16() const { return isImmTy(ImmTyD16); }
371   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
372   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
373   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
374   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
375   bool isFI() const { return isImmTy(ImmTyDppFi); }
376   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
377   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
378   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
379   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
380   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
381   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
382   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
383   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
384   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
385   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
386   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
387   bool isHigh() const { return isImmTy(ImmTyHigh); }
388 
389   bool isMod() const {
390     return isClampSI() || isOModSI();
391   }
392 
393   bool isRegOrImm() const {
394     return isReg() || isImm();
395   }
396 
397   bool isRegClass(unsigned RCID) const;
398 
399   bool isInlineValue() const;
400 
401   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
402     return isRegOrInline(RCID, type) && !hasModifiers();
403   }
404 
405   bool isSCSrcB16() const {
406     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
407   }
408 
409   bool isSCSrcV2B16() const {
410     return isSCSrcB16();
411   }
412 
413   bool isSCSrcB32() const {
414     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
415   }
416 
417   bool isSCSrcB64() const {
418     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
419   }
420 
421   bool isBoolReg() const;
422 
423   bool isSCSrcF16() const {
424     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
425   }
426 
427   bool isSCSrcV2F16() const {
428     return isSCSrcF16();
429   }
430 
431   bool isSCSrcF32() const {
432     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
433   }
434 
435   bool isSCSrcF64() const {
436     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
437   }
438 
439   bool isSSrcB32() const {
440     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
441   }
442 
443   bool isSSrcB16() const {
444     return isSCSrcB16() || isLiteralImm(MVT::i16);
445   }
446 
447   bool isSSrcV2B16() const {
448     llvm_unreachable("cannot happen");
449     return isSSrcB16();
450   }
451 
452   bool isSSrcB64() const {
453     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
454     // See isVSrc64().
455     return isSCSrcB64() || isLiteralImm(MVT::i64);
456   }
457 
458   bool isSSrcF32() const {
459     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
460   }
461 
462   bool isSSrcF64() const {
463     return isSCSrcB64() || isLiteralImm(MVT::f64);
464   }
465 
466   bool isSSrcF16() const {
467     return isSCSrcB16() || isLiteralImm(MVT::f16);
468   }
469 
470   bool isSSrcV2F16() const {
471     llvm_unreachable("cannot happen");
472     return isSSrcF16();
473   }
474 
475   bool isSSrcV2FP32() const {
476     llvm_unreachable("cannot happen");
477     return isSSrcF32();
478   }
479 
480   bool isSCSrcV2FP32() const {
481     llvm_unreachable("cannot happen");
482     return isSCSrcF32();
483   }
484 
485   bool isSSrcV2INT32() const {
486     llvm_unreachable("cannot happen");
487     return isSSrcB32();
488   }
489 
490   bool isSCSrcV2INT32() const {
491     llvm_unreachable("cannot happen");
492     return isSCSrcB32();
493   }
494 
495   bool isSSrcOrLdsB32() const {
496     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
497            isLiteralImm(MVT::i32) || isExpr();
498   }
499 
500   bool isVCSrcB32() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
502   }
503 
504   bool isVCSrcB64() const {
505     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
506   }
507 
508   bool isVCSrcB16() const {
509     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
510   }
511 
512   bool isVCSrcV2B16() const {
513     return isVCSrcB16();
514   }
515 
516   bool isVCSrcF32() const {
517     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
518   }
519 
520   bool isVCSrcF64() const {
521     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
522   }
523 
524   bool isVCSrcF16() const {
525     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
526   }
527 
528   bool isVCSrcV2F16() const {
529     return isVCSrcF16();
530   }
531 
532   bool isVSrcB32() const {
533     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
534   }
535 
536   bool isVSrcB64() const {
537     return isVCSrcF64() || isLiteralImm(MVT::i64);
538   }
539 
540   bool isVSrcB16() const {
541     return isVCSrcB16() || isLiteralImm(MVT::i16);
542   }
543 
544   bool isVSrcV2B16() const {
545     return isVSrcB16() || isLiteralImm(MVT::v2i16);
546   }
547 
548   bool isVCSrcV2FP32() const {
549     return isVCSrcF64();
550   }
551 
552   bool isVSrcV2FP32() const {
553     return isVSrcF64() || isLiteralImm(MVT::v2f32);
554   }
555 
556   bool isVCSrcV2INT32() const {
557     return isVCSrcB64();
558   }
559 
560   bool isVSrcV2INT32() const {
561     return isVSrcB64() || isLiteralImm(MVT::v2i32);
562   }
563 
564   bool isVSrcF32() const {
565     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
566   }
567 
568   bool isVSrcF64() const {
569     return isVCSrcF64() || isLiteralImm(MVT::f64);
570   }
571 
572   bool isVSrcF16() const {
573     return isVCSrcF16() || isLiteralImm(MVT::f16);
574   }
575 
576   bool isVSrcV2F16() const {
577     return isVSrcF16() || isLiteralImm(MVT::v2f16);
578   }
579 
580   bool isVISrcB32() const {
581     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
582   }
583 
584   bool isVISrcB16() const {
585     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
586   }
587 
588   bool isVISrcV2B16() const {
589     return isVISrcB16();
590   }
591 
592   bool isVISrcF32() const {
593     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
594   }
595 
596   bool isVISrcF16() const {
597     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
598   }
599 
600   bool isVISrcV2F16() const {
601     return isVISrcF16() || isVISrcB32();
602   }
603 
604   bool isVISrc_64B64() const {
605     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
606   }
607 
608   bool isVISrc_64F64() const {
609     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
610   }
611 
612   bool isVISrc_64V2FP32() const {
613     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
614   }
615 
616   bool isVISrc_64V2INT32() const {
617     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
618   }
619 
620   bool isVISrc_256B64() const {
621     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
622   }
623 
624   bool isVISrc_256F64() const {
625     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
626   }
627 
628   bool isVISrc_128B16() const {
629     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
630   }
631 
632   bool isVISrc_128V2B16() const {
633     return isVISrc_128B16();
634   }
635 
636   bool isVISrc_128B32() const {
637     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
638   }
639 
640   bool isVISrc_128F32() const {
641     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
642   }
643 
644   bool isVISrc_256V2FP32() const {
645     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
646   }
647 
648   bool isVISrc_256V2INT32() const {
649     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
650   }
651 
652   bool isVISrc_512B32() const {
653     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
654   }
655 
656   bool isVISrc_512B16() const {
657     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
658   }
659 
660   bool isVISrc_512V2B16() const {
661     return isVISrc_512B16();
662   }
663 
664   bool isVISrc_512F32() const {
665     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
666   }
667 
668   bool isVISrc_512F16() const {
669     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
670   }
671 
672   bool isVISrc_512V2F16() const {
673     return isVISrc_512F16() || isVISrc_512B32();
674   }
675 
676   bool isVISrc_1024B32() const {
677     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
678   }
679 
680   bool isVISrc_1024B16() const {
681     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
682   }
683 
684   bool isVISrc_1024V2B16() const {
685     return isVISrc_1024B16();
686   }
687 
688   bool isVISrc_1024F32() const {
689     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
690   }
691 
692   bool isVISrc_1024F16() const {
693     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
694   }
695 
696   bool isVISrc_1024V2F16() const {
697     return isVISrc_1024F16() || isVISrc_1024B32();
698   }
699 
700   bool isAISrcB32() const {
701     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
702   }
703 
704   bool isAISrcB16() const {
705     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
706   }
707 
708   bool isAISrcV2B16() const {
709     return isAISrcB16();
710   }
711 
712   bool isAISrcF32() const {
713     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
714   }
715 
716   bool isAISrcF16() const {
717     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
718   }
719 
720   bool isAISrcV2F16() const {
721     return isAISrcF16() || isAISrcB32();
722   }
723 
724   bool isAISrc_64B64() const {
725     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
726   }
727 
728   bool isAISrc_64F64() const {
729     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
730   }
731 
732   bool isAISrc_128B32() const {
733     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
734   }
735 
736   bool isAISrc_128B16() const {
737     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
738   }
739 
740   bool isAISrc_128V2B16() const {
741     return isAISrc_128B16();
742   }
743 
744   bool isAISrc_128F32() const {
745     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
746   }
747 
748   bool isAISrc_128F16() const {
749     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
750   }
751 
752   bool isAISrc_128V2F16() const {
753     return isAISrc_128F16() || isAISrc_128B32();
754   }
755 
756   bool isVISrc_128F16() const {
757     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
758   }
759 
760   bool isVISrc_128V2F16() const {
761     return isVISrc_128F16() || isVISrc_128B32();
762   }
763 
764   bool isAISrc_256B64() const {
765     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
766   }
767 
768   bool isAISrc_256F64() const {
769     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
770   }
771 
772   bool isAISrc_512B32() const {
773     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
774   }
775 
776   bool isAISrc_512B16() const {
777     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
778   }
779 
780   bool isAISrc_512V2B16() const {
781     return isAISrc_512B16();
782   }
783 
784   bool isAISrc_512F32() const {
785     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
786   }
787 
788   bool isAISrc_512F16() const {
789     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
790   }
791 
792   bool isAISrc_512V2F16() const {
793     return isAISrc_512F16() || isAISrc_512B32();
794   }
795 
796   bool isAISrc_1024B32() const {
797     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
798   }
799 
800   bool isAISrc_1024B16() const {
801     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
802   }
803 
804   bool isAISrc_1024V2B16() const {
805     return isAISrc_1024B16();
806   }
807 
808   bool isAISrc_1024F32() const {
809     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
810   }
811 
812   bool isAISrc_1024F16() const {
813     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
814   }
815 
816   bool isAISrc_1024V2F16() const {
817     return isAISrc_1024F16() || isAISrc_1024B32();
818   }
819 
820   bool isKImmFP32() const {
821     return isLiteralImm(MVT::f32);
822   }
823 
824   bool isKImmFP16() const {
825     return isLiteralImm(MVT::f16);
826   }
827 
828   bool isMem() const override {
829     return false;
830   }
831 
832   bool isExpr() const {
833     return Kind == Expression;
834   }
835 
836   bool isSoppBrTarget() const {
837     return isExpr() || isImm();
838   }
839 
840   bool isSWaitCnt() const;
841   bool isDepCtr() const;
842   bool isSDelayAlu() const;
843   bool isHwreg() const;
844   bool isSendMsg() const;
845   bool isSwizzle() const;
846   bool isSMRDOffset8() const;
847   bool isSMEMOffset() const;
848   bool isSMRDLiteralOffset() const;
849   bool isDPP8() const;
850   bool isDPPCtrl() const;
851   bool isBLGP() const;
852   bool isCBSZ() const;
853   bool isABID() const;
854   bool isGPRIdxMode() const;
855   bool isS16Imm() const;
856   bool isU16Imm() const;
857   bool isEndpgm() const;
858   bool isWaitVDST() const;
859   bool isWaitEXP() const;
860 
861   StringRef getExpressionAsToken() const {
862     assert(isExpr());
863     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
864     return S->getSymbol().getName();
865   }
866 
867   StringRef getToken() const {
868     assert(isToken());
869 
870     if (Kind == Expression)
871       return getExpressionAsToken();
872 
873     return StringRef(Tok.Data, Tok.Length);
874   }
875 
876   int64_t getImm() const {
877     assert(isImm());
878     return Imm.Val;
879   }
880 
881   void setImm(int64_t Val) {
882     assert(isImm());
883     Imm.Val = Val;
884   }
885 
886   ImmTy getImmTy() const {
887     assert(isImm());
888     return Imm.Type;
889   }
890 
891   unsigned getReg() const override {
892     assert(isRegKind());
893     return Reg.RegNo;
894   }
895 
896   SMLoc getStartLoc() const override {
897     return StartLoc;
898   }
899 
900   SMLoc getEndLoc() const override {
901     return EndLoc;
902   }
903 
904   SMRange getLocRange() const {
905     return SMRange(StartLoc, EndLoc);
906   }
907 
908   Modifiers getModifiers() const {
909     assert(isRegKind() || isImmTy(ImmTyNone));
910     return isRegKind() ? Reg.Mods : Imm.Mods;
911   }
912 
913   void setModifiers(Modifiers Mods) {
914     assert(isRegKind() || isImmTy(ImmTyNone));
915     if (isRegKind())
916       Reg.Mods = Mods;
917     else
918       Imm.Mods = Mods;
919   }
920 
921   bool hasModifiers() const {
922     return getModifiers().hasModifiers();
923   }
924 
925   bool hasFPModifiers() const {
926     return getModifiers().hasFPModifiers();
927   }
928 
929   bool hasIntModifiers() const {
930     return getModifiers().hasIntModifiers();
931   }
932 
933   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
934 
935   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
936 
937   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
938 
939   template <unsigned Bitwidth>
940   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
941 
942   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
943     addKImmFPOperands<16>(Inst, N);
944   }
945 
946   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
947     addKImmFPOperands<32>(Inst, N);
948   }
949 
950   void addRegOperands(MCInst &Inst, unsigned N) const;
951 
952   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
953     addRegOperands(Inst, N);
954   }
955 
956   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
957     if (isRegKind())
958       addRegOperands(Inst, N);
959     else if (isExpr())
960       Inst.addOperand(MCOperand::createExpr(Expr));
961     else
962       addImmOperands(Inst, N);
963   }
964 
965   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
966     Modifiers Mods = getModifiers();
967     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
968     if (isRegKind()) {
969       addRegOperands(Inst, N);
970     } else {
971       addImmOperands(Inst, N, false);
972     }
973   }
974 
975   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
976     assert(!hasIntModifiers());
977     addRegOrImmWithInputModsOperands(Inst, N);
978   }
979 
980   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
981     assert(!hasFPModifiers());
982     addRegOrImmWithInputModsOperands(Inst, N);
983   }
984 
985   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
986     Modifiers Mods = getModifiers();
987     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
988     assert(isRegKind());
989     addRegOperands(Inst, N);
990   }
991 
992   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
993     assert(!hasIntModifiers());
994     addRegWithInputModsOperands(Inst, N);
995   }
996 
997   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
998     assert(!hasFPModifiers());
999     addRegWithInputModsOperands(Inst, N);
1000   }
1001 
1002   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
1003     if (isImm())
1004       addImmOperands(Inst, N);
1005     else {
1006       assert(isExpr());
1007       Inst.addOperand(MCOperand::createExpr(Expr));
1008     }
1009   }
1010 
1011   static void printImmTy(raw_ostream& OS, ImmTy Type) {
1012     switch (Type) {
1013     case ImmTyNone: OS << "None"; break;
1014     case ImmTyGDS: OS << "GDS"; break;
1015     case ImmTyLDS: OS << "LDS"; break;
1016     case ImmTyOffen: OS << "Offen"; break;
1017     case ImmTyIdxen: OS << "Idxen"; break;
1018     case ImmTyAddr64: OS << "Addr64"; break;
1019     case ImmTyOffset: OS << "Offset"; break;
1020     case ImmTyInstOffset: OS << "InstOffset"; break;
1021     case ImmTyOffset0: OS << "Offset0"; break;
1022     case ImmTyOffset1: OS << "Offset1"; break;
1023     case ImmTyCPol: OS << "CPol"; break;
1024     case ImmTySWZ: OS << "SWZ"; break;
1025     case ImmTyTFE: OS << "TFE"; break;
1026     case ImmTyD16: OS << "D16"; break;
1027     case ImmTyFORMAT: OS << "FORMAT"; break;
1028     case ImmTyClampSI: OS << "ClampSI"; break;
1029     case ImmTyOModSI: OS << "OModSI"; break;
1030     case ImmTyDPP8: OS << "DPP8"; break;
1031     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1032     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1033     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1034     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1035     case ImmTyDppFi: OS << "FI"; break;
1036     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1037     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1038     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1039     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1040     case ImmTyDMask: OS << "DMask"; break;
1041     case ImmTyDim: OS << "Dim"; break;
1042     case ImmTyUNorm: OS << "UNorm"; break;
1043     case ImmTyDA: OS << "DA"; break;
1044     case ImmTyR128A16: OS << "R128A16"; break;
1045     case ImmTyA16: OS << "A16"; break;
1046     case ImmTyLWE: OS << "LWE"; break;
1047     case ImmTyOff: OS << "Off"; break;
1048     case ImmTyExpTgt: OS << "ExpTgt"; break;
1049     case ImmTyExpCompr: OS << "ExpCompr"; break;
1050     case ImmTyExpVM: OS << "ExpVM"; break;
1051     case ImmTyHwreg: OS << "Hwreg"; break;
1052     case ImmTySendMsg: OS << "SendMsg"; break;
1053     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1054     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1055     case ImmTyAttrChan: OS << "AttrChan"; break;
1056     case ImmTyOpSel: OS << "OpSel"; break;
1057     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1058     case ImmTyNegLo: OS << "NegLo"; break;
1059     case ImmTyNegHi: OS << "NegHi"; break;
1060     case ImmTySwizzle: OS << "Swizzle"; break;
1061     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1062     case ImmTyHigh: OS << "High"; break;
1063     case ImmTyBLGP: OS << "BLGP"; break;
1064     case ImmTyCBSZ: OS << "CBSZ"; break;
1065     case ImmTyABID: OS << "ABID"; break;
1066     case ImmTyEndpgm: OS << "Endpgm"; break;
1067     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1068     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1069     }
1070   }
1071 
1072   void print(raw_ostream &OS) const override {
1073     switch (Kind) {
1074     case Register:
1075       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1076       break;
1077     case Immediate:
1078       OS << '<' << getImm();
1079       if (getImmTy() != ImmTyNone) {
1080         OS << " type: "; printImmTy(OS, getImmTy());
1081       }
1082       OS << " mods: " << Imm.Mods << '>';
1083       break;
1084     case Token:
1085       OS << '\'' << getToken() << '\'';
1086       break;
1087     case Expression:
1088       OS << "<expr " << *Expr << '>';
1089       break;
1090     }
1091   }
1092 
1093   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1094                                       int64_t Val, SMLoc Loc,
1095                                       ImmTy Type = ImmTyNone,
1096                                       bool IsFPImm = false) {
1097     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1098     Op->Imm.Val = Val;
1099     Op->Imm.IsFPImm = IsFPImm;
1100     Op->Imm.Kind = ImmKindTyNone;
1101     Op->Imm.Type = Type;
1102     Op->Imm.Mods = Modifiers();
1103     Op->StartLoc = Loc;
1104     Op->EndLoc = Loc;
1105     return Op;
1106   }
1107 
1108   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1109                                         StringRef Str, SMLoc Loc,
1110                                         bool HasExplicitEncodingSize = true) {
1111     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1112     Res->Tok.Data = Str.data();
1113     Res->Tok.Length = Str.size();
1114     Res->StartLoc = Loc;
1115     Res->EndLoc = Loc;
1116     return Res;
1117   }
1118 
1119   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1120                                       unsigned RegNo, SMLoc S,
1121                                       SMLoc E) {
1122     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1123     Op->Reg.RegNo = RegNo;
1124     Op->Reg.Mods = Modifiers();
1125     Op->StartLoc = S;
1126     Op->EndLoc = E;
1127     return Op;
1128   }
1129 
1130   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1131                                        const class MCExpr *Expr, SMLoc S) {
1132     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1133     Op->Expr = Expr;
1134     Op->StartLoc = S;
1135     Op->EndLoc = S;
1136     return Op;
1137   }
1138 };
1139 
1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1141   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1142   return OS;
1143 }
1144 
1145 //===----------------------------------------------------------------------===//
1146 // AsmParser
1147 //===----------------------------------------------------------------------===//
1148 
1149 // Holds info related to the current kernel, e.g. count of SGPRs used.
1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1151 // .amdgpu_hsa_kernel or at EOF.
1152 class KernelScopeInfo {
1153   int SgprIndexUnusedMin = -1;
1154   int VgprIndexUnusedMin = -1;
1155   int AgprIndexUnusedMin = -1;
1156   MCContext *Ctx = nullptr;
1157   MCSubtargetInfo const *MSTI = nullptr;
1158 
1159   void usesSgprAt(int i) {
1160     if (i >= SgprIndexUnusedMin) {
1161       SgprIndexUnusedMin = ++i;
1162       if (Ctx) {
1163         MCSymbol* const Sym =
1164           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1165         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1166       }
1167     }
1168   }
1169 
1170   void usesVgprAt(int i) {
1171     if (i >= VgprIndexUnusedMin) {
1172       VgprIndexUnusedMin = ++i;
1173       if (Ctx) {
1174         MCSymbol* const Sym =
1175           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1176         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1177                                          VgprIndexUnusedMin);
1178         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1179       }
1180     }
1181   }
1182 
1183   void usesAgprAt(int i) {
1184     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1185     if (!hasMAIInsts(*MSTI))
1186       return;
1187 
1188     if (i >= AgprIndexUnusedMin) {
1189       AgprIndexUnusedMin = ++i;
1190       if (Ctx) {
1191         MCSymbol* const Sym =
1192           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1193         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1194 
1195         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1196         MCSymbol* const vSym =
1197           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1198         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1199                                          VgprIndexUnusedMin);
1200         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1201       }
1202     }
1203   }
1204 
1205 public:
1206   KernelScopeInfo() = default;
1207 
1208   void initialize(MCContext &Context) {
1209     Ctx = &Context;
1210     MSTI = Ctx->getSubtargetInfo();
1211 
1212     usesSgprAt(SgprIndexUnusedMin = -1);
1213     usesVgprAt(VgprIndexUnusedMin = -1);
1214     if (hasMAIInsts(*MSTI)) {
1215       usesAgprAt(AgprIndexUnusedMin = -1);
1216     }
1217   }
1218 
1219   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1220                     unsigned RegWidth) {
1221     switch (RegKind) {
1222     case IS_SGPR:
1223       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1224       break;
1225     case IS_AGPR:
1226       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1227       break;
1228     case IS_VGPR:
1229       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1230       break;
1231     default:
1232       break;
1233     }
1234   }
1235 };
1236 
1237 class AMDGPUAsmParser : public MCTargetAsmParser {
1238   MCAsmParser &Parser;
1239 
1240   // Number of extra operands parsed after the first optional operand.
1241   // This may be necessary to skip hardcoded mandatory operands.
1242   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1243 
1244   unsigned ForcedEncodingSize = 0;
1245   bool ForcedDPP = false;
1246   bool ForcedSDWA = false;
1247   KernelScopeInfo KernelScope;
1248   unsigned CPolSeen;
1249 
1250   /// @name Auto-generated Match Functions
1251   /// {
1252 
1253 #define GET_ASSEMBLER_HEADER
1254 #include "AMDGPUGenAsmMatcher.inc"
1255 
1256   /// }
1257 
1258 private:
1259   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1260   bool OutOfRangeError(SMRange Range);
1261   /// Calculate VGPR/SGPR blocks required for given target, reserved
1262   /// registers, and user-specified NextFreeXGPR values.
1263   ///
1264   /// \param Features [in] Target features, used for bug corrections.
1265   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1266   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1267   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1268   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1269   /// descriptor field, if valid.
1270   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1271   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1272   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1273   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1274   /// \param VGPRBlocks [out] Result VGPR block count.
1275   /// \param SGPRBlocks [out] Result SGPR block count.
1276   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1277                           bool FlatScrUsed, bool XNACKUsed,
1278                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1279                           SMRange VGPRRange, unsigned NextFreeSGPR,
1280                           SMRange SGPRRange, unsigned &VGPRBlocks,
1281                           unsigned &SGPRBlocks);
1282   bool ParseDirectiveAMDGCNTarget();
1283   bool ParseDirectiveAMDHSAKernel();
1284   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1285   bool ParseDirectiveHSACodeObjectVersion();
1286   bool ParseDirectiveHSACodeObjectISA();
1287   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1288   bool ParseDirectiveAMDKernelCodeT();
1289   // TODO: Possibly make subtargetHasRegister const.
1290   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1291   bool ParseDirectiveAMDGPUHsaKernel();
1292 
1293   bool ParseDirectiveISAVersion();
1294   bool ParseDirectiveHSAMetadata();
1295   bool ParseDirectivePALMetadataBegin();
1296   bool ParseDirectivePALMetadata();
1297   bool ParseDirectiveAMDGPULDS();
1298 
1299   /// Common code to parse out a block of text (typically YAML) between start and
1300   /// end directives.
1301   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1302                            const char *AssemblerDirectiveEnd,
1303                            std::string &CollectString);
1304 
1305   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1306                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1307   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1308                            unsigned &RegNum, unsigned &RegWidth,
1309                            bool RestoreOnFailure = false);
1310   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1311                            unsigned &RegNum, unsigned &RegWidth,
1312                            SmallVectorImpl<AsmToken> &Tokens);
1313   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1314                            unsigned &RegWidth,
1315                            SmallVectorImpl<AsmToken> &Tokens);
1316   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1317                            unsigned &RegWidth,
1318                            SmallVectorImpl<AsmToken> &Tokens);
1319   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1320                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1321   bool ParseRegRange(unsigned& Num, unsigned& Width);
1322   unsigned getRegularReg(RegisterKind RegKind,
1323                          unsigned RegNum,
1324                          unsigned RegWidth,
1325                          SMLoc Loc);
1326 
1327   bool isRegister();
1328   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1329   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1330   void initializeGprCountSymbol(RegisterKind RegKind);
1331   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1332                              unsigned RegWidth);
1333   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1334                     bool IsAtomic, bool IsLds = false);
1335   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1336                  bool IsGdsHardcoded);
1337 
1338 public:
1339   enum AMDGPUMatchResultTy {
1340     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1341   };
1342   enum OperandMode {
1343     OperandMode_Default,
1344     OperandMode_NSA,
1345   };
1346 
1347   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1348 
1349   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1350                const MCInstrInfo &MII,
1351                const MCTargetOptions &Options)
1352       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1353     MCAsmParserExtension::Initialize(Parser);
1354 
1355     if (getFeatureBits().none()) {
1356       // Set default features.
1357       copySTI().ToggleFeature("southern-islands");
1358     }
1359 
1360     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1361 
1362     {
1363       // TODO: make those pre-defined variables read-only.
1364       // Currently there is none suitable machinery in the core llvm-mc for this.
1365       // MCSymbol::isRedefinable is intended for another purpose, and
1366       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1367       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1368       MCContext &Ctx = getContext();
1369       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1370         MCSymbol *Sym =
1371             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1372         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1373         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1374         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1375         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1376         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1377       } else {
1378         MCSymbol *Sym =
1379             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1380         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1381         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1382         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1383         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1384         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1385       }
1386       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1387         initializeGprCountSymbol(IS_VGPR);
1388         initializeGprCountSymbol(IS_SGPR);
1389       } else
1390         KernelScope.initialize(getContext());
1391     }
1392   }
1393 
1394   bool hasMIMG_R128() const {
1395     return AMDGPU::hasMIMG_R128(getSTI());
1396   }
1397 
1398   bool hasPackedD16() const {
1399     return AMDGPU::hasPackedD16(getSTI());
1400   }
1401 
1402   bool hasGFX10A16() const {
1403     return AMDGPU::hasGFX10A16(getSTI());
1404   }
1405 
1406   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1407 
1408   bool isSI() const {
1409     return AMDGPU::isSI(getSTI());
1410   }
1411 
1412   bool isCI() const {
1413     return AMDGPU::isCI(getSTI());
1414   }
1415 
1416   bool isVI() const {
1417     return AMDGPU::isVI(getSTI());
1418   }
1419 
1420   bool isGFX9() const {
1421     return AMDGPU::isGFX9(getSTI());
1422   }
1423 
1424   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1425   bool isGFX90A() const {
1426     return AMDGPU::isGFX90A(getSTI());
1427   }
1428 
1429   bool isGFX940() const {
1430     return AMDGPU::isGFX940(getSTI());
1431   }
1432 
1433   bool isGFX9Plus() const {
1434     return AMDGPU::isGFX9Plus(getSTI());
1435   }
1436 
1437   bool isGFX10() const {
1438     return AMDGPU::isGFX10(getSTI());
1439   }
1440 
1441   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1442 
1443   bool isGFX11() const {
1444     return AMDGPU::isGFX11(getSTI());
1445   }
1446 
1447   bool isGFX11Plus() const {
1448     return AMDGPU::isGFX11Plus(getSTI());
1449   }
1450 
1451   bool isGFX10_BEncoding() const {
1452     return AMDGPU::isGFX10_BEncoding(getSTI());
1453   }
1454 
1455   bool hasInv2PiInlineImm() const {
1456     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1457   }
1458 
1459   bool hasFlatOffsets() const {
1460     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1461   }
1462 
1463   bool hasArchitectedFlatScratch() const {
1464     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1465   }
1466 
1467   bool hasSGPR102_SGPR103() const {
1468     return !isVI() && !isGFX9();
1469   }
1470 
1471   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1472 
1473   bool hasIntClamp() const {
1474     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1475   }
1476 
1477   AMDGPUTargetStreamer &getTargetStreamer() {
1478     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1479     return static_cast<AMDGPUTargetStreamer &>(TS);
1480   }
1481 
1482   const MCRegisterInfo *getMRI() const {
1483     // We need this const_cast because for some reason getContext() is not const
1484     // in MCAsmParser.
1485     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1486   }
1487 
1488   const MCInstrInfo *getMII() const {
1489     return &MII;
1490   }
1491 
1492   const FeatureBitset &getFeatureBits() const {
1493     return getSTI().getFeatureBits();
1494   }
1495 
1496   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1497   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1498   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1499 
1500   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1501   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1502   bool isForcedDPP() const { return ForcedDPP; }
1503   bool isForcedSDWA() const { return ForcedSDWA; }
1504   ArrayRef<unsigned> getMatchedVariants() const;
1505   StringRef getMatchedVariantName() const;
1506 
1507   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1508   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1509                      bool RestoreOnFailure);
1510   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1511   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1512                                         SMLoc &EndLoc) override;
1513   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1514   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1515                                       unsigned Kind) override;
1516   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1517                                OperandVector &Operands, MCStreamer &Out,
1518                                uint64_t &ErrorInfo,
1519                                bool MatchingInlineAsm) override;
1520   bool ParseDirective(AsmToken DirectiveID) override;
1521   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1522                                     OperandMode Mode = OperandMode_Default);
1523   StringRef parseMnemonicSuffix(StringRef Name);
1524   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1525                         SMLoc NameLoc, OperandVector &Operands) override;
1526   //bool ProcessInstruction(MCInst &Inst);
1527 
1528   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1529 
1530   OperandMatchResultTy
1531   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1532                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1533                      bool (*ConvertResult)(int64_t &) = nullptr);
1534 
1535   OperandMatchResultTy
1536   parseOperandArrayWithPrefix(const char *Prefix,
1537                               OperandVector &Operands,
1538                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1539                               bool (*ConvertResult)(int64_t&) = nullptr);
1540 
1541   OperandMatchResultTy
1542   parseNamedBit(StringRef Name, OperandVector &Operands,
1543                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1544   OperandMatchResultTy parseCPol(OperandVector &Operands);
1545   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1546                                              StringRef &Value,
1547                                              SMLoc &StringLoc);
1548 
1549   bool isModifier();
1550   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1551   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1552   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1553   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1554   bool parseSP3NegModifier();
1555   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1556   OperandMatchResultTy parseReg(OperandVector &Operands);
1557   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1558   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1559   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1560   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1561   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1562   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1563   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1564   OperandMatchResultTy parseUfmt(int64_t &Format);
1565   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1566   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1567   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1568   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1569   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1570   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1571   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1572 
1573   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1574   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1575   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1576   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1577 
1578   bool parseCnt(int64_t &IntVal);
1579   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1580 
1581   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1582   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1583   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1584 
1585   bool parseDelay(int64_t &Delay);
1586   OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1587 
1588   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1589 
1590 private:
1591   struct OperandInfoTy {
1592     SMLoc Loc;
1593     int64_t Id;
1594     bool IsSymbolic = false;
1595     bool IsDefined = false;
1596 
1597     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1598   };
1599 
1600   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1601   bool validateSendMsg(const OperandInfoTy &Msg,
1602                        const OperandInfoTy &Op,
1603                        const OperandInfoTy &Stream);
1604 
1605   bool parseHwregBody(OperandInfoTy &HwReg,
1606                       OperandInfoTy &Offset,
1607                       OperandInfoTy &Width);
1608   bool validateHwreg(const OperandInfoTy &HwReg,
1609                      const OperandInfoTy &Offset,
1610                      const OperandInfoTy &Width);
1611 
1612   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1613   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1614   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1615 
1616   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1617                       const OperandVector &Operands) const;
1618   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1619   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1620   SMLoc getLitLoc(const OperandVector &Operands) const;
1621   SMLoc getConstLoc(const OperandVector &Operands) const;
1622 
1623   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1624   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1625   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1626   bool validateSOPLiteral(const MCInst &Inst) const;
1627   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1628   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1629   bool validateIntClampSupported(const MCInst &Inst);
1630   bool validateMIMGAtomicDMask(const MCInst &Inst);
1631   bool validateMIMGGatherDMask(const MCInst &Inst);
1632   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1633   Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1634   bool validateMIMGAddrSize(const MCInst &Inst);
1635   bool validateMIMGD16(const MCInst &Inst);
1636   bool validateMIMGDim(const MCInst &Inst);
1637   bool validateMIMGMSAA(const MCInst &Inst);
1638   bool validateOpSel(const MCInst &Inst);
1639   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1640   bool validateVccOperand(unsigned Reg) const;
1641   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1642   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1643   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1644   bool validateAGPRLdSt(const MCInst &Inst) const;
1645   bool validateVGPRAlign(const MCInst &Inst) const;
1646   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1647   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1648   bool validateDivScale(const MCInst &Inst);
1649   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1650                              const SMLoc &IDLoc);
1651   bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands,
1652                           const SMLoc &IDLoc);
1653   bool validateExeczVcczOperands(const OperandVector &Operands);
1654   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1655   unsigned getConstantBusLimit(unsigned Opcode) const;
1656   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1657   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1658   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1659 
1660   bool isSupportedMnemo(StringRef Mnemo,
1661                         const FeatureBitset &FBS);
1662   bool isSupportedMnemo(StringRef Mnemo,
1663                         const FeatureBitset &FBS,
1664                         ArrayRef<unsigned> Variants);
1665   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1666 
1667   bool isId(const StringRef Id) const;
1668   bool isId(const AsmToken &Token, const StringRef Id) const;
1669   bool isToken(const AsmToken::TokenKind Kind) const;
1670   bool trySkipId(const StringRef Id);
1671   bool trySkipId(const StringRef Pref, const StringRef Id);
1672   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1673   bool trySkipToken(const AsmToken::TokenKind Kind);
1674   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1675   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1676   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1677 
1678   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1679   AsmToken::TokenKind getTokenKind() const;
1680   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1681   bool parseExpr(OperandVector &Operands);
1682   StringRef getTokenStr() const;
1683   AsmToken peekToken();
1684   AsmToken getToken() const;
1685   SMLoc getLoc() const;
1686   void lex();
1687 
1688 public:
1689   void onBeginOfFile() override;
1690 
1691   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1692   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1693 
1694   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1695   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1696   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1697   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1698   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1699   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1700 
1701   bool parseSwizzleOperand(int64_t &Op,
1702                            const unsigned MinVal,
1703                            const unsigned MaxVal,
1704                            const StringRef ErrMsg,
1705                            SMLoc &Loc);
1706   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1707                             const unsigned MinVal,
1708                             const unsigned MaxVal,
1709                             const StringRef ErrMsg);
1710   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1711   bool parseSwizzleOffset(int64_t &Imm);
1712   bool parseSwizzleMacro(int64_t &Imm);
1713   bool parseSwizzleQuadPerm(int64_t &Imm);
1714   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1715   bool parseSwizzleBroadcast(int64_t &Imm);
1716   bool parseSwizzleSwap(int64_t &Imm);
1717   bool parseSwizzleReverse(int64_t &Imm);
1718 
1719   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1720   int64_t parseGPRIdxMacro();
1721 
1722   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1723   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1724   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1725   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1726 
1727   AMDGPUOperand::Ptr defaultCPol() const;
1728 
1729   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1730   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1731   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1732   AMDGPUOperand::Ptr defaultFlatOffset() const;
1733 
1734   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1735 
1736   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1737                OptionalImmIndexMap &OptionalIdx);
1738   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1739   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1740   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1741   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1742                 OptionalImmIndexMap &OptionalIdx);
1743 
1744   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1745   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1746 
1747   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1748                bool IsAtomic = false);
1749   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1750   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1751 
1752   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1753 
1754   bool parseDimId(unsigned &Encoding);
1755   OperandMatchResultTy parseDim(OperandVector &Operands);
1756   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1757   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1758   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1759   int64_t parseDPPCtrlSel(StringRef Ctrl);
1760   int64_t parseDPPCtrlPerm();
1761   AMDGPUOperand::Ptr defaultRowMask() const;
1762   AMDGPUOperand::Ptr defaultBankMask() const;
1763   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1764   AMDGPUOperand::Ptr defaultFI() const;
1765   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1766   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1767     cvtDPP(Inst, Operands, true);
1768   }
1769   void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands,
1770                        bool IsDPP8 = false);
1771   void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) {
1772     cvtVOPCNoDstDPP(Inst, Operands, true);
1773   }
1774   void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1775                   bool IsDPP8 = false);
1776   void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1777     cvtVOP3DPP(Inst, Operands, true);
1778   }
1779   void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands,
1780                          bool IsDPP8 = false);
1781   void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) {
1782     cvtVOPC64NoDstDPP(Inst, Operands, true);
1783   }
1784 
1785   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1786                                     AMDGPUOperand::ImmTy Type);
1787   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1788   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1789   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1790   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1791   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1792   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1793   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1794                uint64_t BasicInstType,
1795                bool SkipDstVcc = false,
1796                bool SkipSrcVcc = false);
1797 
1798   AMDGPUOperand::Ptr defaultBLGP() const;
1799   AMDGPUOperand::Ptr defaultCBSZ() const;
1800   AMDGPUOperand::Ptr defaultABID() const;
1801 
1802   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1803   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1804 
1805   AMDGPUOperand::Ptr defaultWaitVDST() const;
1806   AMDGPUOperand::Ptr defaultWaitEXP() const;
1807 };
1808 
1809 struct OptionalOperand {
1810   const char *Name;
1811   AMDGPUOperand::ImmTy Type;
1812   bool IsBit;
1813   bool (*ConvertResult)(int64_t&);
1814 };
1815 
1816 } // end anonymous namespace
1817 
1818 // May be called with integer type with equivalent bitwidth.
1819 static const fltSemantics *getFltSemantics(unsigned Size) {
1820   switch (Size) {
1821   case 4:
1822     return &APFloat::IEEEsingle();
1823   case 8:
1824     return &APFloat::IEEEdouble();
1825   case 2:
1826     return &APFloat::IEEEhalf();
1827   default:
1828     llvm_unreachable("unsupported fp type");
1829   }
1830 }
1831 
1832 static const fltSemantics *getFltSemantics(MVT VT) {
1833   return getFltSemantics(VT.getSizeInBits() / 8);
1834 }
1835 
1836 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1837   switch (OperandType) {
1838   case AMDGPU::OPERAND_REG_IMM_INT32:
1839   case AMDGPU::OPERAND_REG_IMM_FP32:
1840   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1841   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1842   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1843   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1844   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1845   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1846   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1847   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1848   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1849   case AMDGPU::OPERAND_KIMM32:
1850     return &APFloat::IEEEsingle();
1851   case AMDGPU::OPERAND_REG_IMM_INT64:
1852   case AMDGPU::OPERAND_REG_IMM_FP64:
1853   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1854   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1855   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1856     return &APFloat::IEEEdouble();
1857   case AMDGPU::OPERAND_REG_IMM_INT16:
1858   case AMDGPU::OPERAND_REG_IMM_FP16:
1859   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1860   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1861   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1862   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1863   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1864   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1865   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1866   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1867   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1868   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1869   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1870   case AMDGPU::OPERAND_KIMM16:
1871     return &APFloat::IEEEhalf();
1872   default:
1873     llvm_unreachable("unsupported fp type");
1874   }
1875 }
1876 
1877 //===----------------------------------------------------------------------===//
1878 // Operand
1879 //===----------------------------------------------------------------------===//
1880 
1881 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1882   bool Lost;
1883 
1884   // Convert literal to single precision
1885   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1886                                                APFloat::rmNearestTiesToEven,
1887                                                &Lost);
1888   // We allow precision lost but not overflow or underflow
1889   if (Status != APFloat::opOK &&
1890       Lost &&
1891       ((Status & APFloat::opOverflow)  != 0 ||
1892        (Status & APFloat::opUnderflow) != 0)) {
1893     return false;
1894   }
1895 
1896   return true;
1897 }
1898 
1899 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1900   return isUIntN(Size, Val) || isIntN(Size, Val);
1901 }
1902 
1903 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1904   if (VT.getScalarType() == MVT::i16) {
1905     // FP immediate values are broken.
1906     return isInlinableIntLiteral(Val);
1907   }
1908 
1909   // f16/v2f16 operands work correctly for all values.
1910   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1911 }
1912 
1913 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1914 
1915   // This is a hack to enable named inline values like
1916   // shared_base with both 32-bit and 64-bit operands.
1917   // Note that these values are defined as
1918   // 32-bit operands only.
1919   if (isInlineValue()) {
1920     return true;
1921   }
1922 
1923   if (!isImmTy(ImmTyNone)) {
1924     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1925     return false;
1926   }
1927   // TODO: We should avoid using host float here. It would be better to
1928   // check the float bit values which is what a few other places do.
1929   // We've had bot failures before due to weird NaN support on mips hosts.
1930 
1931   APInt Literal(64, Imm.Val);
1932 
1933   if (Imm.IsFPImm) { // We got fp literal token
1934     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1935       return AMDGPU::isInlinableLiteral64(Imm.Val,
1936                                           AsmParser->hasInv2PiInlineImm());
1937     }
1938 
1939     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1940     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1941       return false;
1942 
1943     if (type.getScalarSizeInBits() == 16) {
1944       return isInlineableLiteralOp16(
1945         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1946         type, AsmParser->hasInv2PiInlineImm());
1947     }
1948 
1949     // Check if single precision literal is inlinable
1950     return AMDGPU::isInlinableLiteral32(
1951       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1952       AsmParser->hasInv2PiInlineImm());
1953   }
1954 
1955   // We got int literal token.
1956   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1957     return AMDGPU::isInlinableLiteral64(Imm.Val,
1958                                         AsmParser->hasInv2PiInlineImm());
1959   }
1960 
1961   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1962     return false;
1963   }
1964 
1965   if (type.getScalarSizeInBits() == 16) {
1966     return isInlineableLiteralOp16(
1967       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1968       type, AsmParser->hasInv2PiInlineImm());
1969   }
1970 
1971   return AMDGPU::isInlinableLiteral32(
1972     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1973     AsmParser->hasInv2PiInlineImm());
1974 }
1975 
1976 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1977   // Check that this immediate can be added as literal
1978   if (!isImmTy(ImmTyNone)) {
1979     return false;
1980   }
1981 
1982   if (!Imm.IsFPImm) {
1983     // We got int literal token.
1984 
1985     if (type == MVT::f64 && hasFPModifiers()) {
1986       // Cannot apply fp modifiers to int literals preserving the same semantics
1987       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1988       // disable these cases.
1989       return false;
1990     }
1991 
1992     unsigned Size = type.getSizeInBits();
1993     if (Size == 64)
1994       Size = 32;
1995 
1996     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1997     // types.
1998     return isSafeTruncation(Imm.Val, Size);
1999   }
2000 
2001   // We got fp literal token
2002   if (type == MVT::f64) { // Expected 64-bit fp operand
2003     // We would set low 64-bits of literal to zeroes but we accept this literals
2004     return true;
2005   }
2006 
2007   if (type == MVT::i64) { // Expected 64-bit int operand
2008     // We don't allow fp literals in 64-bit integer instructions. It is
2009     // unclear how we should encode them.
2010     return false;
2011   }
2012 
2013   // We allow fp literals with f16x2 operands assuming that the specified
2014   // literal goes into the lower half and the upper half is zero. We also
2015   // require that the literal may be losslessly converted to f16.
2016   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2017                      (type == MVT::v2i16)? MVT::i16 :
2018                      (type == MVT::v2f32)? MVT::f32 : type;
2019 
2020   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2021   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2022 }
2023 
2024 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2025   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2026 }
2027 
2028 bool AMDGPUOperand::isVRegWithInputMods() const {
2029   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2030          // GFX90A allows DPP on 64-bit operands.
2031          (isRegClass(AMDGPU::VReg_64RegClassID) &&
2032           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
2033 }
2034 
2035 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2036   if (AsmParser->isVI())
2037     return isVReg32();
2038   else if (AsmParser->isGFX9Plus())
2039     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2040   else
2041     return false;
2042 }
2043 
2044 bool AMDGPUOperand::isSDWAFP16Operand() const {
2045   return isSDWAOperand(MVT::f16);
2046 }
2047 
2048 bool AMDGPUOperand::isSDWAFP32Operand() const {
2049   return isSDWAOperand(MVT::f32);
2050 }
2051 
2052 bool AMDGPUOperand::isSDWAInt16Operand() const {
2053   return isSDWAOperand(MVT::i16);
2054 }
2055 
2056 bool AMDGPUOperand::isSDWAInt32Operand() const {
2057   return isSDWAOperand(MVT::i32);
2058 }
2059 
2060 bool AMDGPUOperand::isBoolReg() const {
2061   auto FB = AsmParser->getFeatureBits();
2062   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2063                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2064 }
2065 
2066 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2067 {
2068   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2069   assert(Size == 2 || Size == 4 || Size == 8);
2070 
2071   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2072 
2073   if (Imm.Mods.Abs) {
2074     Val &= ~FpSignMask;
2075   }
2076   if (Imm.Mods.Neg) {
2077     Val ^= FpSignMask;
2078   }
2079 
2080   return Val;
2081 }
2082 
2083 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2084   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2085                              Inst.getNumOperands())) {
2086     addLiteralImmOperand(Inst, Imm.Val,
2087                          ApplyModifiers &
2088                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2089   } else {
2090     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2091     Inst.addOperand(MCOperand::createImm(Imm.Val));
2092     setImmKindNone();
2093   }
2094 }
2095 
2096 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2097   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2098   auto OpNum = Inst.getNumOperands();
2099   // Check that this operand accepts literals
2100   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2101 
2102   if (ApplyModifiers) {
2103     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2104     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2105     Val = applyInputFPModifiers(Val, Size);
2106   }
2107 
2108   APInt Literal(64, Val);
2109   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2110 
2111   if (Imm.IsFPImm) { // We got fp literal token
2112     switch (OpTy) {
2113     case AMDGPU::OPERAND_REG_IMM_INT64:
2114     case AMDGPU::OPERAND_REG_IMM_FP64:
2115     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2116     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2117     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2118       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2119                                        AsmParser->hasInv2PiInlineImm())) {
2120         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2121         setImmKindConst();
2122         return;
2123       }
2124 
2125       // Non-inlineable
2126       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2127         // For fp operands we check if low 32 bits are zeros
2128         if (Literal.getLoBits(32) != 0) {
2129           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2130           "Can't encode literal as exact 64-bit floating-point operand. "
2131           "Low 32-bits will be set to zero");
2132         }
2133 
2134         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2135         setImmKindLiteral();
2136         return;
2137       }
2138 
2139       // We don't allow fp literals in 64-bit integer instructions. It is
2140       // unclear how we should encode them. This case should be checked earlier
2141       // in predicate methods (isLiteralImm())
2142       llvm_unreachable("fp literal in 64-bit integer instruction.");
2143 
2144     case AMDGPU::OPERAND_REG_IMM_INT32:
2145     case AMDGPU::OPERAND_REG_IMM_FP32:
2146     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2147     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2148     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2149     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2150     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2151     case AMDGPU::OPERAND_REG_IMM_INT16:
2152     case AMDGPU::OPERAND_REG_IMM_FP16:
2153     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2154     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2155     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2156     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2157     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2158     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2159     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2160     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2161     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2162     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2163     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2164     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2165     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2166     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2167     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2168     case AMDGPU::OPERAND_KIMM32:
2169     case AMDGPU::OPERAND_KIMM16: {
2170       bool lost;
2171       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2172       // Convert literal to single precision
2173       FPLiteral.convert(*getOpFltSemantics(OpTy),
2174                         APFloat::rmNearestTiesToEven, &lost);
2175       // We allow precision lost but not overflow or underflow. This should be
2176       // checked earlier in isLiteralImm()
2177 
2178       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2179       Inst.addOperand(MCOperand::createImm(ImmVal));
2180       setImmKindLiteral();
2181       return;
2182     }
2183     default:
2184       llvm_unreachable("invalid operand size");
2185     }
2186 
2187     return;
2188   }
2189 
2190   // We got int literal token.
2191   // Only sign extend inline immediates.
2192   switch (OpTy) {
2193   case AMDGPU::OPERAND_REG_IMM_INT32:
2194   case AMDGPU::OPERAND_REG_IMM_FP32:
2195   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2196   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2197   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2198   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2199   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2200   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2201   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2202   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2203   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2204   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2205   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2206     if (isSafeTruncation(Val, 32) &&
2207         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2208                                      AsmParser->hasInv2PiInlineImm())) {
2209       Inst.addOperand(MCOperand::createImm(Val));
2210       setImmKindConst();
2211       return;
2212     }
2213 
2214     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2215     setImmKindLiteral();
2216     return;
2217 
2218   case AMDGPU::OPERAND_REG_IMM_INT64:
2219   case AMDGPU::OPERAND_REG_IMM_FP64:
2220   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2221   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2222   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2223     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2224       Inst.addOperand(MCOperand::createImm(Val));
2225       setImmKindConst();
2226       return;
2227     }
2228 
2229     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2230     setImmKindLiteral();
2231     return;
2232 
2233   case AMDGPU::OPERAND_REG_IMM_INT16:
2234   case AMDGPU::OPERAND_REG_IMM_FP16:
2235   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2236   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2237   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2238   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2239   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2240     if (isSafeTruncation(Val, 16) &&
2241         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2242                                      AsmParser->hasInv2PiInlineImm())) {
2243       Inst.addOperand(MCOperand::createImm(Val));
2244       setImmKindConst();
2245       return;
2246     }
2247 
2248     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2249     setImmKindLiteral();
2250     return;
2251 
2252   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2253   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2254   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2255   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2256     assert(isSafeTruncation(Val, 16));
2257     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2258                                         AsmParser->hasInv2PiInlineImm()));
2259 
2260     Inst.addOperand(MCOperand::createImm(Val));
2261     return;
2262   }
2263   case AMDGPU::OPERAND_KIMM32:
2264     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2265     setImmKindNone();
2266     return;
2267   case AMDGPU::OPERAND_KIMM16:
2268     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2269     setImmKindNone();
2270     return;
2271   default:
2272     llvm_unreachable("invalid operand size");
2273   }
2274 }
2275 
2276 template <unsigned Bitwidth>
2277 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2278   APInt Literal(64, Imm.Val);
2279   setImmKindNone();
2280 
2281   if (!Imm.IsFPImm) {
2282     // We got int literal token.
2283     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2284     return;
2285   }
2286 
2287   bool Lost;
2288   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2289   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2290                     APFloat::rmNearestTiesToEven, &Lost);
2291   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2292 }
2293 
2294 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2295   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2296 }
2297 
2298 static bool isInlineValue(unsigned Reg) {
2299   switch (Reg) {
2300   case AMDGPU::SRC_SHARED_BASE:
2301   case AMDGPU::SRC_SHARED_LIMIT:
2302   case AMDGPU::SRC_PRIVATE_BASE:
2303   case AMDGPU::SRC_PRIVATE_LIMIT:
2304   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2305     return true;
2306   case AMDGPU::SRC_VCCZ:
2307   case AMDGPU::SRC_EXECZ:
2308   case AMDGPU::SRC_SCC:
2309     return true;
2310   case AMDGPU::SGPR_NULL:
2311     return true;
2312   default:
2313     return false;
2314   }
2315 }
2316 
2317 bool AMDGPUOperand::isInlineValue() const {
2318   return isRegKind() && ::isInlineValue(getReg());
2319 }
2320 
2321 //===----------------------------------------------------------------------===//
2322 // AsmParser
2323 //===----------------------------------------------------------------------===//
2324 
2325 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2326   if (Is == IS_VGPR) {
2327     switch (RegWidth) {
2328       default: return -1;
2329       case 32:
2330         return AMDGPU::VGPR_32RegClassID;
2331       case 64:
2332         return AMDGPU::VReg_64RegClassID;
2333       case 96:
2334         return AMDGPU::VReg_96RegClassID;
2335       case 128:
2336         return AMDGPU::VReg_128RegClassID;
2337       case 160:
2338         return AMDGPU::VReg_160RegClassID;
2339       case 192:
2340         return AMDGPU::VReg_192RegClassID;
2341       case 224:
2342         return AMDGPU::VReg_224RegClassID;
2343       case 256:
2344         return AMDGPU::VReg_256RegClassID;
2345       case 512:
2346         return AMDGPU::VReg_512RegClassID;
2347       case 1024:
2348         return AMDGPU::VReg_1024RegClassID;
2349     }
2350   } else if (Is == IS_TTMP) {
2351     switch (RegWidth) {
2352       default: return -1;
2353       case 32:
2354         return AMDGPU::TTMP_32RegClassID;
2355       case 64:
2356         return AMDGPU::TTMP_64RegClassID;
2357       case 128:
2358         return AMDGPU::TTMP_128RegClassID;
2359       case 256:
2360         return AMDGPU::TTMP_256RegClassID;
2361       case 512:
2362         return AMDGPU::TTMP_512RegClassID;
2363     }
2364   } else if (Is == IS_SGPR) {
2365     switch (RegWidth) {
2366       default: return -1;
2367       case 32:
2368         return AMDGPU::SGPR_32RegClassID;
2369       case 64:
2370         return AMDGPU::SGPR_64RegClassID;
2371       case 96:
2372         return AMDGPU::SGPR_96RegClassID;
2373       case 128:
2374         return AMDGPU::SGPR_128RegClassID;
2375       case 160:
2376         return AMDGPU::SGPR_160RegClassID;
2377       case 192:
2378         return AMDGPU::SGPR_192RegClassID;
2379       case 224:
2380         return AMDGPU::SGPR_224RegClassID;
2381       case 256:
2382         return AMDGPU::SGPR_256RegClassID;
2383       case 512:
2384         return AMDGPU::SGPR_512RegClassID;
2385     }
2386   } else if (Is == IS_AGPR) {
2387     switch (RegWidth) {
2388       default: return -1;
2389       case 32:
2390         return AMDGPU::AGPR_32RegClassID;
2391       case 64:
2392         return AMDGPU::AReg_64RegClassID;
2393       case 96:
2394         return AMDGPU::AReg_96RegClassID;
2395       case 128:
2396         return AMDGPU::AReg_128RegClassID;
2397       case 160:
2398         return AMDGPU::AReg_160RegClassID;
2399       case 192:
2400         return AMDGPU::AReg_192RegClassID;
2401       case 224:
2402         return AMDGPU::AReg_224RegClassID;
2403       case 256:
2404         return AMDGPU::AReg_256RegClassID;
2405       case 512:
2406         return AMDGPU::AReg_512RegClassID;
2407       case 1024:
2408         return AMDGPU::AReg_1024RegClassID;
2409     }
2410   }
2411   return -1;
2412 }
2413 
2414 static unsigned getSpecialRegForName(StringRef RegName) {
2415   return StringSwitch<unsigned>(RegName)
2416     .Case("exec", AMDGPU::EXEC)
2417     .Case("vcc", AMDGPU::VCC)
2418     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2419     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2420     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2421     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2422     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2423     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2424     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2425     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2426     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2427     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2428     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2429     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2430     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2431     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2432     .Case("m0", AMDGPU::M0)
2433     .Case("vccz", AMDGPU::SRC_VCCZ)
2434     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2435     .Case("execz", AMDGPU::SRC_EXECZ)
2436     .Case("src_execz", AMDGPU::SRC_EXECZ)
2437     .Case("scc", AMDGPU::SRC_SCC)
2438     .Case("src_scc", AMDGPU::SRC_SCC)
2439     .Case("tba", AMDGPU::TBA)
2440     .Case("tma", AMDGPU::TMA)
2441     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2442     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2443     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2444     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2445     .Case("vcc_lo", AMDGPU::VCC_LO)
2446     .Case("vcc_hi", AMDGPU::VCC_HI)
2447     .Case("exec_lo", AMDGPU::EXEC_LO)
2448     .Case("exec_hi", AMDGPU::EXEC_HI)
2449     .Case("tma_lo", AMDGPU::TMA_LO)
2450     .Case("tma_hi", AMDGPU::TMA_HI)
2451     .Case("tba_lo", AMDGPU::TBA_LO)
2452     .Case("tba_hi", AMDGPU::TBA_HI)
2453     .Case("pc", AMDGPU::PC_REG)
2454     .Case("null", AMDGPU::SGPR_NULL)
2455     .Default(AMDGPU::NoRegister);
2456 }
2457 
2458 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2459                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2460   auto R = parseRegister();
2461   if (!R) return true;
2462   assert(R->isReg());
2463   RegNo = R->getReg();
2464   StartLoc = R->getStartLoc();
2465   EndLoc = R->getEndLoc();
2466   return false;
2467 }
2468 
2469 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2470                                     SMLoc &EndLoc) {
2471   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2472 }
2473 
2474 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2475                                                        SMLoc &StartLoc,
2476                                                        SMLoc &EndLoc) {
2477   bool Result =
2478       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2479   bool PendingErrors = getParser().hasPendingError();
2480   getParser().clearPendingErrors();
2481   if (PendingErrors)
2482     return MatchOperand_ParseFail;
2483   if (Result)
2484     return MatchOperand_NoMatch;
2485   return MatchOperand_Success;
2486 }
2487 
2488 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2489                                             RegisterKind RegKind, unsigned Reg1,
2490                                             SMLoc Loc) {
2491   switch (RegKind) {
2492   case IS_SPECIAL:
2493     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2494       Reg = AMDGPU::EXEC;
2495       RegWidth = 64;
2496       return true;
2497     }
2498     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2499       Reg = AMDGPU::FLAT_SCR;
2500       RegWidth = 64;
2501       return true;
2502     }
2503     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2504       Reg = AMDGPU::XNACK_MASK;
2505       RegWidth = 64;
2506       return true;
2507     }
2508     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2509       Reg = AMDGPU::VCC;
2510       RegWidth = 64;
2511       return true;
2512     }
2513     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2514       Reg = AMDGPU::TBA;
2515       RegWidth = 64;
2516       return true;
2517     }
2518     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2519       Reg = AMDGPU::TMA;
2520       RegWidth = 64;
2521       return true;
2522     }
2523     Error(Loc, "register does not fit in the list");
2524     return false;
2525   case IS_VGPR:
2526   case IS_SGPR:
2527   case IS_AGPR:
2528   case IS_TTMP:
2529     if (Reg1 != Reg + RegWidth / 32) {
2530       Error(Loc, "registers in a list must have consecutive indices");
2531       return false;
2532     }
2533     RegWidth += 32;
2534     return true;
2535   default:
2536     llvm_unreachable("unexpected register kind");
2537   }
2538 }
2539 
2540 struct RegInfo {
2541   StringLiteral Name;
2542   RegisterKind Kind;
2543 };
2544 
2545 static constexpr RegInfo RegularRegisters[] = {
2546   {{"v"},    IS_VGPR},
2547   {{"s"},    IS_SGPR},
2548   {{"ttmp"}, IS_TTMP},
2549   {{"acc"},  IS_AGPR},
2550   {{"a"},    IS_AGPR},
2551 };
2552 
2553 static bool isRegularReg(RegisterKind Kind) {
2554   return Kind == IS_VGPR ||
2555          Kind == IS_SGPR ||
2556          Kind == IS_TTMP ||
2557          Kind == IS_AGPR;
2558 }
2559 
2560 static const RegInfo* getRegularRegInfo(StringRef Str) {
2561   for (const RegInfo &Reg : RegularRegisters)
2562     if (Str.startswith(Reg.Name))
2563       return &Reg;
2564   return nullptr;
2565 }
2566 
2567 static bool getRegNum(StringRef Str, unsigned& Num) {
2568   return !Str.getAsInteger(10, Num);
2569 }
2570 
2571 bool
2572 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2573                             const AsmToken &NextToken) const {
2574 
2575   // A list of consecutive registers: [s0,s1,s2,s3]
2576   if (Token.is(AsmToken::LBrac))
2577     return true;
2578 
2579   if (!Token.is(AsmToken::Identifier))
2580     return false;
2581 
2582   // A single register like s0 or a range of registers like s[0:1]
2583 
2584   StringRef Str = Token.getString();
2585   const RegInfo *Reg = getRegularRegInfo(Str);
2586   if (Reg) {
2587     StringRef RegName = Reg->Name;
2588     StringRef RegSuffix = Str.substr(RegName.size());
2589     if (!RegSuffix.empty()) {
2590       unsigned Num;
2591       // A single register with an index: rXX
2592       if (getRegNum(RegSuffix, Num))
2593         return true;
2594     } else {
2595       // A range of registers: r[XX:YY].
2596       if (NextToken.is(AsmToken::LBrac))
2597         return true;
2598     }
2599   }
2600 
2601   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2602 }
2603 
2604 bool
2605 AMDGPUAsmParser::isRegister()
2606 {
2607   return isRegister(getToken(), peekToken());
2608 }
2609 
2610 unsigned
2611 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2612                                unsigned RegNum,
2613                                unsigned RegWidth,
2614                                SMLoc Loc) {
2615 
2616   assert(isRegularReg(RegKind));
2617 
2618   unsigned AlignSize = 1;
2619   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2620     // SGPR and TTMP registers must be aligned.
2621     // Max required alignment is 4 dwords.
2622     AlignSize = std::min(RegWidth / 32, 4u);
2623   }
2624 
2625   if (RegNum % AlignSize != 0) {
2626     Error(Loc, "invalid register alignment");
2627     return AMDGPU::NoRegister;
2628   }
2629 
2630   unsigned RegIdx = RegNum / AlignSize;
2631   int RCID = getRegClass(RegKind, RegWidth);
2632   if (RCID == -1) {
2633     Error(Loc, "invalid or unsupported register size");
2634     return AMDGPU::NoRegister;
2635   }
2636 
2637   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2638   const MCRegisterClass RC = TRI->getRegClass(RCID);
2639   if (RegIdx >= RC.getNumRegs()) {
2640     Error(Loc, "register index is out of range");
2641     return AMDGPU::NoRegister;
2642   }
2643 
2644   return RC.getRegister(RegIdx);
2645 }
2646 
2647 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2648   int64_t RegLo, RegHi;
2649   if (!skipToken(AsmToken::LBrac, "missing register index"))
2650     return false;
2651 
2652   SMLoc FirstIdxLoc = getLoc();
2653   SMLoc SecondIdxLoc;
2654 
2655   if (!parseExpr(RegLo))
2656     return false;
2657 
2658   if (trySkipToken(AsmToken::Colon)) {
2659     SecondIdxLoc = getLoc();
2660     if (!parseExpr(RegHi))
2661       return false;
2662   } else {
2663     RegHi = RegLo;
2664   }
2665 
2666   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2667     return false;
2668 
2669   if (!isUInt<32>(RegLo)) {
2670     Error(FirstIdxLoc, "invalid register index");
2671     return false;
2672   }
2673 
2674   if (!isUInt<32>(RegHi)) {
2675     Error(SecondIdxLoc, "invalid register index");
2676     return false;
2677   }
2678 
2679   if (RegLo > RegHi) {
2680     Error(FirstIdxLoc, "first register index should not exceed second index");
2681     return false;
2682   }
2683 
2684   Num = static_cast<unsigned>(RegLo);
2685   RegWidth = 32 * ((RegHi - RegLo) + 1);
2686   return true;
2687 }
2688 
2689 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2690                                           unsigned &RegNum, unsigned &RegWidth,
2691                                           SmallVectorImpl<AsmToken> &Tokens) {
2692   assert(isToken(AsmToken::Identifier));
2693   unsigned Reg = getSpecialRegForName(getTokenStr());
2694   if (Reg) {
2695     RegNum = 0;
2696     RegWidth = 32;
2697     RegKind = IS_SPECIAL;
2698     Tokens.push_back(getToken());
2699     lex(); // skip register name
2700   }
2701   return Reg;
2702 }
2703 
2704 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2705                                           unsigned &RegNum, unsigned &RegWidth,
2706                                           SmallVectorImpl<AsmToken> &Tokens) {
2707   assert(isToken(AsmToken::Identifier));
2708   StringRef RegName = getTokenStr();
2709   auto Loc = getLoc();
2710 
2711   const RegInfo *RI = getRegularRegInfo(RegName);
2712   if (!RI) {
2713     Error(Loc, "invalid register name");
2714     return AMDGPU::NoRegister;
2715   }
2716 
2717   Tokens.push_back(getToken());
2718   lex(); // skip register name
2719 
2720   RegKind = RI->Kind;
2721   StringRef RegSuffix = RegName.substr(RI->Name.size());
2722   if (!RegSuffix.empty()) {
2723     // Single 32-bit register: vXX.
2724     if (!getRegNum(RegSuffix, RegNum)) {
2725       Error(Loc, "invalid register index");
2726       return AMDGPU::NoRegister;
2727     }
2728     RegWidth = 32;
2729   } else {
2730     // Range of registers: v[XX:YY]. ":YY" is optional.
2731     if (!ParseRegRange(RegNum, RegWidth))
2732       return AMDGPU::NoRegister;
2733   }
2734 
2735   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2736 }
2737 
2738 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2739                                        unsigned &RegWidth,
2740                                        SmallVectorImpl<AsmToken> &Tokens) {
2741   unsigned Reg = AMDGPU::NoRegister;
2742   auto ListLoc = getLoc();
2743 
2744   if (!skipToken(AsmToken::LBrac,
2745                  "expected a register or a list of registers")) {
2746     return AMDGPU::NoRegister;
2747   }
2748 
2749   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2750 
2751   auto Loc = getLoc();
2752   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2753     return AMDGPU::NoRegister;
2754   if (RegWidth != 32) {
2755     Error(Loc, "expected a single 32-bit register");
2756     return AMDGPU::NoRegister;
2757   }
2758 
2759   for (; trySkipToken(AsmToken::Comma); ) {
2760     RegisterKind NextRegKind;
2761     unsigned NextReg, NextRegNum, NextRegWidth;
2762     Loc = getLoc();
2763 
2764     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2765                              NextRegNum, NextRegWidth,
2766                              Tokens)) {
2767       return AMDGPU::NoRegister;
2768     }
2769     if (NextRegWidth != 32) {
2770       Error(Loc, "expected a single 32-bit register");
2771       return AMDGPU::NoRegister;
2772     }
2773     if (NextRegKind != RegKind) {
2774       Error(Loc, "registers in a list must be of the same kind");
2775       return AMDGPU::NoRegister;
2776     }
2777     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2778       return AMDGPU::NoRegister;
2779   }
2780 
2781   if (!skipToken(AsmToken::RBrac,
2782                  "expected a comma or a closing square bracket")) {
2783     return AMDGPU::NoRegister;
2784   }
2785 
2786   if (isRegularReg(RegKind))
2787     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2788 
2789   return Reg;
2790 }
2791 
2792 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2793                                           unsigned &RegNum, unsigned &RegWidth,
2794                                           SmallVectorImpl<AsmToken> &Tokens) {
2795   auto Loc = getLoc();
2796   Reg = AMDGPU::NoRegister;
2797 
2798   if (isToken(AsmToken::Identifier)) {
2799     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2800     if (Reg == AMDGPU::NoRegister)
2801       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2802   } else {
2803     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2804   }
2805 
2806   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2807   if (Reg == AMDGPU::NoRegister) {
2808     assert(Parser.hasPendingError());
2809     return false;
2810   }
2811 
2812   if (!subtargetHasRegister(*TRI, Reg)) {
2813     if (Reg == AMDGPU::SGPR_NULL) {
2814       Error(Loc, "'null' operand is not supported on this GPU");
2815     } else {
2816       Error(Loc, "register not available on this GPU");
2817     }
2818     return false;
2819   }
2820 
2821   return true;
2822 }
2823 
2824 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2825                                           unsigned &RegNum, unsigned &RegWidth,
2826                                           bool RestoreOnFailure /*=false*/) {
2827   Reg = AMDGPU::NoRegister;
2828 
2829   SmallVector<AsmToken, 1> Tokens;
2830   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2831     if (RestoreOnFailure) {
2832       while (!Tokens.empty()) {
2833         getLexer().UnLex(Tokens.pop_back_val());
2834       }
2835     }
2836     return true;
2837   }
2838   return false;
2839 }
2840 
2841 Optional<StringRef>
2842 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2843   switch (RegKind) {
2844   case IS_VGPR:
2845     return StringRef(".amdgcn.next_free_vgpr");
2846   case IS_SGPR:
2847     return StringRef(".amdgcn.next_free_sgpr");
2848   default:
2849     return None;
2850   }
2851 }
2852 
2853 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2854   auto SymbolName = getGprCountSymbolName(RegKind);
2855   assert(SymbolName && "initializing invalid register kind");
2856   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2857   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2858 }
2859 
2860 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2861                                             unsigned DwordRegIndex,
2862                                             unsigned RegWidth) {
2863   // Symbols are only defined for GCN targets
2864   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2865     return true;
2866 
2867   auto SymbolName = getGprCountSymbolName(RegKind);
2868   if (!SymbolName)
2869     return true;
2870   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2871 
2872   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2873   int64_t OldCount;
2874 
2875   if (!Sym->isVariable())
2876     return !Error(getLoc(),
2877                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2878   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2879     return !Error(
2880         getLoc(),
2881         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2882 
2883   if (OldCount <= NewMax)
2884     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2885 
2886   return true;
2887 }
2888 
2889 std::unique_ptr<AMDGPUOperand>
2890 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2891   const auto &Tok = getToken();
2892   SMLoc StartLoc = Tok.getLoc();
2893   SMLoc EndLoc = Tok.getEndLoc();
2894   RegisterKind RegKind;
2895   unsigned Reg, RegNum, RegWidth;
2896 
2897   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2898     return nullptr;
2899   }
2900   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2901     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2902       return nullptr;
2903   } else
2904     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2905   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2906 }
2907 
2908 OperandMatchResultTy
2909 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2910   // TODO: add syntactic sugar for 1/(2*PI)
2911 
2912   assert(!isRegister());
2913   assert(!isModifier());
2914 
2915   const auto& Tok = getToken();
2916   const auto& NextTok = peekToken();
2917   bool IsReal = Tok.is(AsmToken::Real);
2918   SMLoc S = getLoc();
2919   bool Negate = false;
2920 
2921   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2922     lex();
2923     IsReal = true;
2924     Negate = true;
2925   }
2926 
2927   if (IsReal) {
2928     // Floating-point expressions are not supported.
2929     // Can only allow floating-point literals with an
2930     // optional sign.
2931 
2932     StringRef Num = getTokenStr();
2933     lex();
2934 
2935     APFloat RealVal(APFloat::IEEEdouble());
2936     auto roundMode = APFloat::rmNearestTiesToEven;
2937     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2938       return MatchOperand_ParseFail;
2939     }
2940     if (Negate)
2941       RealVal.changeSign();
2942 
2943     Operands.push_back(
2944       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2945                                AMDGPUOperand::ImmTyNone, true));
2946 
2947     return MatchOperand_Success;
2948 
2949   } else {
2950     int64_t IntVal;
2951     const MCExpr *Expr;
2952     SMLoc S = getLoc();
2953 
2954     if (HasSP3AbsModifier) {
2955       // This is a workaround for handling expressions
2956       // as arguments of SP3 'abs' modifier, for example:
2957       //     |1.0|
2958       //     |-1|
2959       //     |1+x|
2960       // This syntax is not compatible with syntax of standard
2961       // MC expressions (due to the trailing '|').
2962       SMLoc EndLoc;
2963       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2964         return MatchOperand_ParseFail;
2965     } else {
2966       if (Parser.parseExpression(Expr))
2967         return MatchOperand_ParseFail;
2968     }
2969 
2970     if (Expr->evaluateAsAbsolute(IntVal)) {
2971       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2972     } else {
2973       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2974     }
2975 
2976     return MatchOperand_Success;
2977   }
2978 
2979   return MatchOperand_NoMatch;
2980 }
2981 
2982 OperandMatchResultTy
2983 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2984   if (!isRegister())
2985     return MatchOperand_NoMatch;
2986 
2987   if (auto R = parseRegister()) {
2988     assert(R->isReg());
2989     Operands.push_back(std::move(R));
2990     return MatchOperand_Success;
2991   }
2992   return MatchOperand_ParseFail;
2993 }
2994 
2995 OperandMatchResultTy
2996 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2997   auto res = parseReg(Operands);
2998   if (res != MatchOperand_NoMatch) {
2999     return res;
3000   } else if (isModifier()) {
3001     return MatchOperand_NoMatch;
3002   } else {
3003     return parseImm(Operands, HasSP3AbsMod);
3004   }
3005 }
3006 
3007 bool
3008 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3009   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3010     const auto &str = Token.getString();
3011     return str == "abs" || str == "neg" || str == "sext";
3012   }
3013   return false;
3014 }
3015 
3016 bool
3017 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3018   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3019 }
3020 
3021 bool
3022 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3023   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3024 }
3025 
3026 bool
3027 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3028   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3029 }
3030 
3031 // Check if this is an operand modifier or an opcode modifier
3032 // which may look like an expression but it is not. We should
3033 // avoid parsing these modifiers as expressions. Currently
3034 // recognized sequences are:
3035 //   |...|
3036 //   abs(...)
3037 //   neg(...)
3038 //   sext(...)
3039 //   -reg
3040 //   -|...|
3041 //   -abs(...)
3042 //   name:...
3043 // Note that simple opcode modifiers like 'gds' may be parsed as
3044 // expressions; this is a special case. See getExpressionAsToken.
3045 //
3046 bool
3047 AMDGPUAsmParser::isModifier() {
3048 
3049   AsmToken Tok = getToken();
3050   AsmToken NextToken[2];
3051   peekTokens(NextToken);
3052 
3053   return isOperandModifier(Tok, NextToken[0]) ||
3054          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3055          isOpcodeModifierWithVal(Tok, NextToken[0]);
3056 }
3057 
3058 // Check if the current token is an SP3 'neg' modifier.
3059 // Currently this modifier is allowed in the following context:
3060 //
3061 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3062 // 2. Before an 'abs' modifier: -abs(...)
3063 // 3. Before an SP3 'abs' modifier: -|...|
3064 //
3065 // In all other cases "-" is handled as a part
3066 // of an expression that follows the sign.
3067 //
3068 // Note: When "-" is followed by an integer literal,
3069 // this is interpreted as integer negation rather
3070 // than a floating-point NEG modifier applied to N.
3071 // Beside being contr-intuitive, such use of floating-point
3072 // NEG modifier would have resulted in different meaning
3073 // of integer literals used with VOP1/2/C and VOP3,
3074 // for example:
3075 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3076 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3077 // Negative fp literals with preceding "-" are
3078 // handled likewise for uniformity
3079 //
3080 bool
3081 AMDGPUAsmParser::parseSP3NegModifier() {
3082 
3083   AsmToken NextToken[2];
3084   peekTokens(NextToken);
3085 
3086   if (isToken(AsmToken::Minus) &&
3087       (isRegister(NextToken[0], NextToken[1]) ||
3088        NextToken[0].is(AsmToken::Pipe) ||
3089        isId(NextToken[0], "abs"))) {
3090     lex();
3091     return true;
3092   }
3093 
3094   return false;
3095 }
3096 
3097 OperandMatchResultTy
3098 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3099                                               bool AllowImm) {
3100   bool Neg, SP3Neg;
3101   bool Abs, SP3Abs;
3102   SMLoc Loc;
3103 
3104   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3105   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3106     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3107     return MatchOperand_ParseFail;
3108   }
3109 
3110   SP3Neg = parseSP3NegModifier();
3111 
3112   Loc = getLoc();
3113   Neg = trySkipId("neg");
3114   if (Neg && SP3Neg) {
3115     Error(Loc, "expected register or immediate");
3116     return MatchOperand_ParseFail;
3117   }
3118   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3119     return MatchOperand_ParseFail;
3120 
3121   Abs = trySkipId("abs");
3122   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3123     return MatchOperand_ParseFail;
3124 
3125   Loc = getLoc();
3126   SP3Abs = trySkipToken(AsmToken::Pipe);
3127   if (Abs && SP3Abs) {
3128     Error(Loc, "expected register or immediate");
3129     return MatchOperand_ParseFail;
3130   }
3131 
3132   OperandMatchResultTy Res;
3133   if (AllowImm) {
3134     Res = parseRegOrImm(Operands, SP3Abs);
3135   } else {
3136     Res = parseReg(Operands);
3137   }
3138   if (Res != MatchOperand_Success) {
3139     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3140   }
3141 
3142   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3143     return MatchOperand_ParseFail;
3144   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3145     return MatchOperand_ParseFail;
3146   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3147     return MatchOperand_ParseFail;
3148 
3149   AMDGPUOperand::Modifiers Mods;
3150   Mods.Abs = Abs || SP3Abs;
3151   Mods.Neg = Neg || SP3Neg;
3152 
3153   if (Mods.hasFPModifiers()) {
3154     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3155     if (Op.isExpr()) {
3156       Error(Op.getStartLoc(), "expected an absolute expression");
3157       return MatchOperand_ParseFail;
3158     }
3159     Op.setModifiers(Mods);
3160   }
3161   return MatchOperand_Success;
3162 }
3163 
3164 OperandMatchResultTy
3165 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3166                                                bool AllowImm) {
3167   bool Sext = trySkipId("sext");
3168   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3169     return MatchOperand_ParseFail;
3170 
3171   OperandMatchResultTy Res;
3172   if (AllowImm) {
3173     Res = parseRegOrImm(Operands);
3174   } else {
3175     Res = parseReg(Operands);
3176   }
3177   if (Res != MatchOperand_Success) {
3178     return Sext? MatchOperand_ParseFail : Res;
3179   }
3180 
3181   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3182     return MatchOperand_ParseFail;
3183 
3184   AMDGPUOperand::Modifiers Mods;
3185   Mods.Sext = Sext;
3186 
3187   if (Mods.hasIntModifiers()) {
3188     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3189     if (Op.isExpr()) {
3190       Error(Op.getStartLoc(), "expected an absolute expression");
3191       return MatchOperand_ParseFail;
3192     }
3193     Op.setModifiers(Mods);
3194   }
3195 
3196   return MatchOperand_Success;
3197 }
3198 
3199 OperandMatchResultTy
3200 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3201   return parseRegOrImmWithFPInputMods(Operands, false);
3202 }
3203 
3204 OperandMatchResultTy
3205 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3206   return parseRegOrImmWithIntInputMods(Operands, false);
3207 }
3208 
3209 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3210   auto Loc = getLoc();
3211   if (trySkipId("off")) {
3212     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3213                                                 AMDGPUOperand::ImmTyOff, false));
3214     return MatchOperand_Success;
3215   }
3216 
3217   if (!isRegister())
3218     return MatchOperand_NoMatch;
3219 
3220   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3221   if (Reg) {
3222     Operands.push_back(std::move(Reg));
3223     return MatchOperand_Success;
3224   }
3225 
3226   return MatchOperand_ParseFail;
3227 
3228 }
3229 
3230 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3231   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3232 
3233   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3234       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3235       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3236       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3237     return Match_InvalidOperand;
3238 
3239   if ((TSFlags & SIInstrFlags::VOP3) &&
3240       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3241       getForcedEncodingSize() != 64)
3242     return Match_PreferE32;
3243 
3244   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3245       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3246     // v_mac_f32/16 allow only dst_sel == DWORD;
3247     auto OpNum =
3248         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3249     const auto &Op = Inst.getOperand(OpNum);
3250     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3251       return Match_InvalidOperand;
3252     }
3253   }
3254 
3255   return Match_Success;
3256 }
3257 
3258 static ArrayRef<unsigned> getAllVariants() {
3259   static const unsigned Variants[] = {
3260     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3261     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3262     AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3263   };
3264 
3265   return makeArrayRef(Variants);
3266 }
3267 
3268 // What asm variants we should check
3269 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3270   if (isForcedDPP() && isForcedVOP3()) {
3271     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3272     return makeArrayRef(Variants);
3273   }
3274   if (getForcedEncodingSize() == 32) {
3275     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3276     return makeArrayRef(Variants);
3277   }
3278 
3279   if (isForcedVOP3()) {
3280     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3281     return makeArrayRef(Variants);
3282   }
3283 
3284   if (isForcedSDWA()) {
3285     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3286                                         AMDGPUAsmVariants::SDWA9};
3287     return makeArrayRef(Variants);
3288   }
3289 
3290   if (isForcedDPP()) {
3291     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3292     return makeArrayRef(Variants);
3293   }
3294 
3295   return getAllVariants();
3296 }
3297 
3298 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3299   if (isForcedDPP() && isForcedVOP3())
3300     return "e64_dpp";
3301 
3302   if (getForcedEncodingSize() == 32)
3303     return "e32";
3304 
3305   if (isForcedVOP3())
3306     return "e64";
3307 
3308   if (isForcedSDWA())
3309     return "sdwa";
3310 
3311   if (isForcedDPP())
3312     return "dpp";
3313 
3314   return "";
3315 }
3316 
3317 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3318   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3319   const unsigned Num = Desc.getNumImplicitUses();
3320   for (unsigned i = 0; i < Num; ++i) {
3321     unsigned Reg = Desc.ImplicitUses[i];
3322     switch (Reg) {
3323     case AMDGPU::FLAT_SCR:
3324     case AMDGPU::VCC:
3325     case AMDGPU::VCC_LO:
3326     case AMDGPU::VCC_HI:
3327     case AMDGPU::M0:
3328       return Reg;
3329     default:
3330       break;
3331     }
3332   }
3333   return AMDGPU::NoRegister;
3334 }
3335 
3336 // NB: This code is correct only when used to check constant
3337 // bus limitations because GFX7 support no f16 inline constants.
3338 // Note that there are no cases when a GFX7 opcode violates
3339 // constant bus limitations due to the use of an f16 constant.
3340 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3341                                        unsigned OpIdx) const {
3342   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3343 
3344   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3345     return false;
3346   }
3347 
3348   const MCOperand &MO = Inst.getOperand(OpIdx);
3349 
3350   int64_t Val = MO.getImm();
3351   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3352 
3353   switch (OpSize) { // expected operand size
3354   case 8:
3355     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3356   case 4:
3357     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3358   case 2: {
3359     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3360     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3361         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3362         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3363       return AMDGPU::isInlinableIntLiteral(Val);
3364 
3365     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3366         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3367         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3368       return AMDGPU::isInlinableIntLiteralV216(Val);
3369 
3370     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3371         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3372         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3373       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3374 
3375     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3376   }
3377   default:
3378     llvm_unreachable("invalid operand size");
3379   }
3380 }
3381 
3382 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3383   if (!isGFX10Plus())
3384     return 1;
3385 
3386   switch (Opcode) {
3387   // 64-bit shift instructions can use only one scalar value input
3388   case AMDGPU::V_LSHLREV_B64_e64:
3389   case AMDGPU::V_LSHLREV_B64_gfx10:
3390   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3391   case AMDGPU::V_LSHRREV_B64_e64:
3392   case AMDGPU::V_LSHRREV_B64_gfx10:
3393   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3394   case AMDGPU::V_ASHRREV_I64_e64:
3395   case AMDGPU::V_ASHRREV_I64_gfx10:
3396   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3397   case AMDGPU::V_LSHL_B64_e64:
3398   case AMDGPU::V_LSHR_B64_e64:
3399   case AMDGPU::V_ASHR_I64_e64:
3400     return 1;
3401   default:
3402     return 2;
3403   }
3404 }
3405 
3406 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3407   const MCOperand &MO = Inst.getOperand(OpIdx);
3408   if (MO.isImm()) {
3409     return !isInlineConstant(Inst, OpIdx);
3410   } else if (MO.isReg()) {
3411     auto Reg = MO.getReg();
3412     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3413     auto PReg = mc2PseudoReg(Reg);
3414     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3415   } else {
3416     return true;
3417   }
3418 }
3419 
3420 bool
3421 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3422                                                 const OperandVector &Operands) {
3423   const unsigned Opcode = Inst.getOpcode();
3424   const MCInstrDesc &Desc = MII.get(Opcode);
3425   unsigned LastSGPR = AMDGPU::NoRegister;
3426   unsigned ConstantBusUseCount = 0;
3427   unsigned NumLiterals = 0;
3428   unsigned LiteralSize;
3429 
3430   if (Desc.TSFlags &
3431       (SIInstrFlags::VOPC |
3432        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3433        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3434        SIInstrFlags::SDWA)) {
3435     // Check special imm operands (used by madmk, etc)
3436     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3437       ++NumLiterals;
3438       LiteralSize = 4;
3439     }
3440 
3441     SmallDenseSet<unsigned> SGPRsUsed;
3442     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3443     if (SGPRUsed != AMDGPU::NoRegister) {
3444       SGPRsUsed.insert(SGPRUsed);
3445       ++ConstantBusUseCount;
3446     }
3447 
3448     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3449     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3450     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3451 
3452     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3453 
3454     for (int OpIdx : OpIndices) {
3455       if (OpIdx == -1) break;
3456 
3457       const MCOperand &MO = Inst.getOperand(OpIdx);
3458       if (usesConstantBus(Inst, OpIdx)) {
3459         if (MO.isReg()) {
3460           LastSGPR = mc2PseudoReg(MO.getReg());
3461           // Pairs of registers with a partial intersections like these
3462           //   s0, s[0:1]
3463           //   flat_scratch_lo, flat_scratch
3464           //   flat_scratch_lo, flat_scratch_hi
3465           // are theoretically valid but they are disabled anyway.
3466           // Note that this code mimics SIInstrInfo::verifyInstruction
3467           if (SGPRsUsed.insert(LastSGPR).second) {
3468             ++ConstantBusUseCount;
3469           }
3470         } else { // Expression or a literal
3471 
3472           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3473             continue; // special operand like VINTERP attr_chan
3474 
3475           // An instruction may use only one literal.
3476           // This has been validated on the previous step.
3477           // See validateVOPLiteral.
3478           // This literal may be used as more than one operand.
3479           // If all these operands are of the same size,
3480           // this literal counts as one scalar value.
3481           // Otherwise it counts as 2 scalar values.
3482           // See "GFX10 Shader Programming", section 3.6.2.3.
3483 
3484           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3485           if (Size < 4) Size = 4;
3486 
3487           if (NumLiterals == 0) {
3488             NumLiterals = 1;
3489             LiteralSize = Size;
3490           } else if (LiteralSize != Size) {
3491             NumLiterals = 2;
3492           }
3493         }
3494       }
3495     }
3496   }
3497   ConstantBusUseCount += NumLiterals;
3498 
3499   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3500     return true;
3501 
3502   SMLoc LitLoc = getLitLoc(Operands);
3503   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3504   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3505   Error(Loc, "invalid operand (violates constant bus restrictions)");
3506   return false;
3507 }
3508 
3509 bool
3510 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3511                                                  const OperandVector &Operands) {
3512   const unsigned Opcode = Inst.getOpcode();
3513   const MCInstrDesc &Desc = MII.get(Opcode);
3514 
3515   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3516   if (DstIdx == -1 ||
3517       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3518     return true;
3519   }
3520 
3521   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3522 
3523   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3524   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3525   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3526 
3527   assert(DstIdx != -1);
3528   const MCOperand &Dst = Inst.getOperand(DstIdx);
3529   assert(Dst.isReg());
3530 
3531   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3532 
3533   for (int SrcIdx : SrcIndices) {
3534     if (SrcIdx == -1) break;
3535     const MCOperand &Src = Inst.getOperand(SrcIdx);
3536     if (Src.isReg()) {
3537       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3538         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3539         Error(getRegLoc(SrcReg, Operands),
3540           "destination must be different than all sources");
3541         return false;
3542       }
3543     }
3544   }
3545 
3546   return true;
3547 }
3548 
3549 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3550 
3551   const unsigned Opc = Inst.getOpcode();
3552   const MCInstrDesc &Desc = MII.get(Opc);
3553 
3554   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3555     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3556     assert(ClampIdx != -1);
3557     return Inst.getOperand(ClampIdx).getImm() == 0;
3558   }
3559 
3560   return true;
3561 }
3562 
3563 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3564 
3565   const unsigned Opc = Inst.getOpcode();
3566   const MCInstrDesc &Desc = MII.get(Opc);
3567 
3568   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3569     return None;
3570 
3571   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3572   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3573   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3574 
3575   assert(VDataIdx != -1);
3576 
3577   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3578     return None;
3579 
3580   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3581   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3582   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3583   if (DMask == 0)
3584     DMask = 1;
3585 
3586   bool isPackedD16 = false;
3587   unsigned DataSize =
3588     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3589   if (hasPackedD16()) {
3590     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3591     isPackedD16 = D16Idx >= 0;
3592     if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3593       DataSize = (DataSize + 1) / 2;
3594   }
3595 
3596   if ((VDataSize / 4) == DataSize + TFESize)
3597     return None;
3598 
3599   return StringRef(isPackedD16
3600                        ? "image data size does not match dmask, d16 and tfe"
3601                        : "image data size does not match dmask and tfe");
3602 }
3603 
3604 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3605   const unsigned Opc = Inst.getOpcode();
3606   const MCInstrDesc &Desc = MII.get(Opc);
3607 
3608   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3609     return true;
3610 
3611   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3612 
3613   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3614       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3615   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3616   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3617   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3618   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3619 
3620   assert(VAddr0Idx != -1);
3621   assert(SrsrcIdx != -1);
3622   assert(SrsrcIdx > VAddr0Idx);
3623 
3624   if (DimIdx == -1)
3625     return true; // intersect_ray
3626 
3627   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3628   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3629   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3630   unsigned ActualAddrSize =
3631       IsNSA ? SrsrcIdx - VAddr0Idx
3632             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3633   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3634 
3635   unsigned ExpectedAddrSize =
3636       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3637 
3638   if (!IsNSA) {
3639     if (ExpectedAddrSize > 8)
3640       ExpectedAddrSize = 16;
3641 
3642     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3643     // This provides backward compatibility for assembly created
3644     // before 160b/192b/224b types were directly supported.
3645     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3646       return true;
3647   }
3648 
3649   return ActualAddrSize == ExpectedAddrSize;
3650 }
3651 
3652 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3653 
3654   const unsigned Opc = Inst.getOpcode();
3655   const MCInstrDesc &Desc = MII.get(Opc);
3656 
3657   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3658     return true;
3659   if (!Desc.mayLoad() || !Desc.mayStore())
3660     return true; // Not atomic
3661 
3662   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3663   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3664 
3665   // This is an incomplete check because image_atomic_cmpswap
3666   // may only use 0x3 and 0xf while other atomic operations
3667   // may use 0x1 and 0x3. However these limitations are
3668   // verified when we check that dmask matches dst size.
3669   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3670 }
3671 
3672 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3673 
3674   const unsigned Opc = Inst.getOpcode();
3675   const MCInstrDesc &Desc = MII.get(Opc);
3676 
3677   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3678     return true;
3679 
3680   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3681   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3682 
3683   // GATHER4 instructions use dmask in a different fashion compared to
3684   // other MIMG instructions. The only useful DMASK values are
3685   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3686   // (red,red,red,red) etc.) The ISA document doesn't mention
3687   // this.
3688   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3689 }
3690 
3691 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3692   const unsigned Opc = Inst.getOpcode();
3693   const MCInstrDesc &Desc = MII.get(Opc);
3694 
3695   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3696     return true;
3697 
3698   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3699   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3700       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3701 
3702   if (!BaseOpcode->MSAA)
3703     return true;
3704 
3705   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3706   assert(DimIdx != -1);
3707 
3708   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3709   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3710 
3711   return DimInfo->MSAA;
3712 }
3713 
3714 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3715 {
3716   switch (Opcode) {
3717   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3718   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3719   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3720     return true;
3721   default:
3722     return false;
3723   }
3724 }
3725 
3726 // movrels* opcodes should only allow VGPRS as src0.
3727 // This is specified in .td description for vop1/vop3,
3728 // but sdwa is handled differently. See isSDWAOperand.
3729 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3730                                       const OperandVector &Operands) {
3731 
3732   const unsigned Opc = Inst.getOpcode();
3733   const MCInstrDesc &Desc = MII.get(Opc);
3734 
3735   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3736     return true;
3737 
3738   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3739   assert(Src0Idx != -1);
3740 
3741   SMLoc ErrLoc;
3742   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3743   if (Src0.isReg()) {
3744     auto Reg = mc2PseudoReg(Src0.getReg());
3745     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3746     if (!isSGPR(Reg, TRI))
3747       return true;
3748     ErrLoc = getRegLoc(Reg, Operands);
3749   } else {
3750     ErrLoc = getConstLoc(Operands);
3751   }
3752 
3753   Error(ErrLoc, "source operand must be a VGPR");
3754   return false;
3755 }
3756 
3757 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3758                                           const OperandVector &Operands) {
3759 
3760   const unsigned Opc = Inst.getOpcode();
3761 
3762   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3763     return true;
3764 
3765   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3766   assert(Src0Idx != -1);
3767 
3768   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3769   if (!Src0.isReg())
3770     return true;
3771 
3772   auto Reg = mc2PseudoReg(Src0.getReg());
3773   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3774   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3775     Error(getRegLoc(Reg, Operands),
3776           "source operand must be either a VGPR or an inline constant");
3777     return false;
3778   }
3779 
3780   return true;
3781 }
3782 
3783 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3784                                    const OperandVector &Operands) {
3785   const unsigned Opc = Inst.getOpcode();
3786   const MCInstrDesc &Desc = MII.get(Opc);
3787 
3788   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3789     return true;
3790 
3791   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3792   if (Src2Idx == -1)
3793     return true;
3794 
3795   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3796   if (!Src2.isReg())
3797     return true;
3798 
3799   MCRegister Src2Reg = Src2.getReg();
3800   MCRegister DstReg = Inst.getOperand(0).getReg();
3801   if (Src2Reg == DstReg)
3802     return true;
3803 
3804   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3805   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3806     return true;
3807 
3808   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3809     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3810           "source 2 operand must not partially overlap with dst");
3811     return false;
3812   }
3813 
3814   return true;
3815 }
3816 
3817 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3818   switch (Inst.getOpcode()) {
3819   default:
3820     return true;
3821   case V_DIV_SCALE_F32_gfx6_gfx7:
3822   case V_DIV_SCALE_F32_vi:
3823   case V_DIV_SCALE_F32_gfx10:
3824   case V_DIV_SCALE_F64_gfx6_gfx7:
3825   case V_DIV_SCALE_F64_vi:
3826   case V_DIV_SCALE_F64_gfx10:
3827     break;
3828   }
3829 
3830   // TODO: Check that src0 = src1 or src2.
3831 
3832   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3833                     AMDGPU::OpName::src2_modifiers,
3834                     AMDGPU::OpName::src2_modifiers}) {
3835     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3836             .getImm() &
3837         SISrcMods::ABS) {
3838       return false;
3839     }
3840   }
3841 
3842   return true;
3843 }
3844 
3845 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3846 
3847   const unsigned Opc = Inst.getOpcode();
3848   const MCInstrDesc &Desc = MII.get(Opc);
3849 
3850   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3851     return true;
3852 
3853   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3854   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3855     if (isCI() || isSI())
3856       return false;
3857   }
3858 
3859   return true;
3860 }
3861 
3862 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3863   const unsigned Opc = Inst.getOpcode();
3864   const MCInstrDesc &Desc = MII.get(Opc);
3865 
3866   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3867     return true;
3868 
3869   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3870   if (DimIdx < 0)
3871     return true;
3872 
3873   long Imm = Inst.getOperand(DimIdx).getImm();
3874   if (Imm < 0 || Imm >= 8)
3875     return false;
3876 
3877   return true;
3878 }
3879 
3880 static bool IsRevOpcode(const unsigned Opcode)
3881 {
3882   switch (Opcode) {
3883   case AMDGPU::V_SUBREV_F32_e32:
3884   case AMDGPU::V_SUBREV_F32_e64:
3885   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3886   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3887   case AMDGPU::V_SUBREV_F32_e32_vi:
3888   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3889   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3890   case AMDGPU::V_SUBREV_F32_e64_vi:
3891 
3892   case AMDGPU::V_SUBREV_CO_U32_e32:
3893   case AMDGPU::V_SUBREV_CO_U32_e64:
3894   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3895   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3896 
3897   case AMDGPU::V_SUBBREV_U32_e32:
3898   case AMDGPU::V_SUBBREV_U32_e64:
3899   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3900   case AMDGPU::V_SUBBREV_U32_e32_vi:
3901   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3902   case AMDGPU::V_SUBBREV_U32_e64_vi:
3903 
3904   case AMDGPU::V_SUBREV_U32_e32:
3905   case AMDGPU::V_SUBREV_U32_e64:
3906   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3907   case AMDGPU::V_SUBREV_U32_e32_vi:
3908   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3909   case AMDGPU::V_SUBREV_U32_e64_vi:
3910 
3911   case AMDGPU::V_SUBREV_F16_e32:
3912   case AMDGPU::V_SUBREV_F16_e64:
3913   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3914   case AMDGPU::V_SUBREV_F16_e32_vi:
3915   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3916   case AMDGPU::V_SUBREV_F16_e64_vi:
3917 
3918   case AMDGPU::V_SUBREV_U16_e32:
3919   case AMDGPU::V_SUBREV_U16_e64:
3920   case AMDGPU::V_SUBREV_U16_e32_vi:
3921   case AMDGPU::V_SUBREV_U16_e64_vi:
3922 
3923   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3924   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3925   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3926 
3927   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3928   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3929 
3930   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3931   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3932 
3933   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3934   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3935 
3936   case AMDGPU::V_LSHRREV_B32_e32:
3937   case AMDGPU::V_LSHRREV_B32_e64:
3938   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3939   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3940   case AMDGPU::V_LSHRREV_B32_e32_vi:
3941   case AMDGPU::V_LSHRREV_B32_e64_vi:
3942   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3943   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3944 
3945   case AMDGPU::V_ASHRREV_I32_e32:
3946   case AMDGPU::V_ASHRREV_I32_e64:
3947   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3948   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3949   case AMDGPU::V_ASHRREV_I32_e32_vi:
3950   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3951   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3952   case AMDGPU::V_ASHRREV_I32_e64_vi:
3953 
3954   case AMDGPU::V_LSHLREV_B32_e32:
3955   case AMDGPU::V_LSHLREV_B32_e64:
3956   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3957   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3958   case AMDGPU::V_LSHLREV_B32_e32_vi:
3959   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3960   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3961   case AMDGPU::V_LSHLREV_B32_e64_vi:
3962 
3963   case AMDGPU::V_LSHLREV_B16_e32:
3964   case AMDGPU::V_LSHLREV_B16_e64:
3965   case AMDGPU::V_LSHLREV_B16_e32_vi:
3966   case AMDGPU::V_LSHLREV_B16_e64_vi:
3967   case AMDGPU::V_LSHLREV_B16_gfx10:
3968 
3969   case AMDGPU::V_LSHRREV_B16_e32:
3970   case AMDGPU::V_LSHRREV_B16_e64:
3971   case AMDGPU::V_LSHRREV_B16_e32_vi:
3972   case AMDGPU::V_LSHRREV_B16_e64_vi:
3973   case AMDGPU::V_LSHRREV_B16_gfx10:
3974 
3975   case AMDGPU::V_ASHRREV_I16_e32:
3976   case AMDGPU::V_ASHRREV_I16_e64:
3977   case AMDGPU::V_ASHRREV_I16_e32_vi:
3978   case AMDGPU::V_ASHRREV_I16_e64_vi:
3979   case AMDGPU::V_ASHRREV_I16_gfx10:
3980 
3981   case AMDGPU::V_LSHLREV_B64_e64:
3982   case AMDGPU::V_LSHLREV_B64_gfx10:
3983   case AMDGPU::V_LSHLREV_B64_vi:
3984 
3985   case AMDGPU::V_LSHRREV_B64_e64:
3986   case AMDGPU::V_LSHRREV_B64_gfx10:
3987   case AMDGPU::V_LSHRREV_B64_vi:
3988 
3989   case AMDGPU::V_ASHRREV_I64_e64:
3990   case AMDGPU::V_ASHRREV_I64_gfx10:
3991   case AMDGPU::V_ASHRREV_I64_vi:
3992 
3993   case AMDGPU::V_PK_LSHLREV_B16:
3994   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3995   case AMDGPU::V_PK_LSHLREV_B16_vi:
3996 
3997   case AMDGPU::V_PK_LSHRREV_B16:
3998   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3999   case AMDGPU::V_PK_LSHRREV_B16_vi:
4000   case AMDGPU::V_PK_ASHRREV_I16:
4001   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4002   case AMDGPU::V_PK_ASHRREV_I16_vi:
4003     return true;
4004   default:
4005     return false;
4006   }
4007 }
4008 
4009 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4010 
4011   using namespace SIInstrFlags;
4012   const unsigned Opcode = Inst.getOpcode();
4013   const MCInstrDesc &Desc = MII.get(Opcode);
4014 
4015   // lds_direct register is defined so that it can be used
4016   // with 9-bit operands only. Ignore encodings which do not accept these.
4017   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4018   if ((Desc.TSFlags & Enc) == 0)
4019     return None;
4020 
4021   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4022     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4023     if (SrcIdx == -1)
4024       break;
4025     const auto &Src = Inst.getOperand(SrcIdx);
4026     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4027 
4028       if (isGFX90A() || isGFX11Plus())
4029         return StringRef("lds_direct is not supported on this GPU");
4030 
4031       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4032         return StringRef("lds_direct cannot be used with this instruction");
4033 
4034       if (SrcName != OpName::src0)
4035         return StringRef("lds_direct may be used as src0 only");
4036     }
4037   }
4038 
4039   return None;
4040 }
4041 
4042 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4043   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4044     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4045     if (Op.isFlatOffset())
4046       return Op.getStartLoc();
4047   }
4048   return getLoc();
4049 }
4050 
4051 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4052                                          const OperandVector &Operands) {
4053   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4054   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4055     return true;
4056 
4057   auto Opcode = Inst.getOpcode();
4058   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4059   assert(OpNum != -1);
4060 
4061   const auto &Op = Inst.getOperand(OpNum);
4062   if (!hasFlatOffsets() && Op.getImm() != 0) {
4063     Error(getFlatOffsetLoc(Operands),
4064           "flat offset modifier is not supported on this GPU");
4065     return false;
4066   }
4067 
4068   // For FLAT segment the offset must be positive;
4069   // MSB is ignored and forced to zero.
4070   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
4071     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4072     if (!isIntN(OffsetSize, Op.getImm())) {
4073       Error(getFlatOffsetLoc(Operands),
4074             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4075       return false;
4076     }
4077   } else {
4078     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4079     if (!isUIntN(OffsetSize, Op.getImm())) {
4080       Error(getFlatOffsetLoc(Operands),
4081             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4082       return false;
4083     }
4084   }
4085 
4086   return true;
4087 }
4088 
4089 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4090   // Start with second operand because SMEM Offset cannot be dst or src0.
4091   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4092     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4093     if (Op.isSMEMOffset())
4094       return Op.getStartLoc();
4095   }
4096   return getLoc();
4097 }
4098 
4099 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4100                                          const OperandVector &Operands) {
4101   if (isCI() || isSI())
4102     return true;
4103 
4104   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4105   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4106     return true;
4107 
4108   auto Opcode = Inst.getOpcode();
4109   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4110   if (OpNum == -1)
4111     return true;
4112 
4113   const auto &Op = Inst.getOperand(OpNum);
4114   if (!Op.isImm())
4115     return true;
4116 
4117   uint64_t Offset = Op.getImm();
4118   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4119   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4120       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4121     return true;
4122 
4123   Error(getSMEMOffsetLoc(Operands),
4124         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4125                                "expected a 21-bit signed offset");
4126 
4127   return false;
4128 }
4129 
4130 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4131   unsigned Opcode = Inst.getOpcode();
4132   const MCInstrDesc &Desc = MII.get(Opcode);
4133   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4134     return true;
4135 
4136   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4137   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4138 
4139   const int OpIndices[] = { Src0Idx, Src1Idx };
4140 
4141   unsigned NumExprs = 0;
4142   unsigned NumLiterals = 0;
4143   uint32_t LiteralValue;
4144 
4145   for (int OpIdx : OpIndices) {
4146     if (OpIdx == -1) break;
4147 
4148     const MCOperand &MO = Inst.getOperand(OpIdx);
4149     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4150     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4151       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4152         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4153         if (NumLiterals == 0 || LiteralValue != Value) {
4154           LiteralValue = Value;
4155           ++NumLiterals;
4156         }
4157       } else if (MO.isExpr()) {
4158         ++NumExprs;
4159       }
4160     }
4161   }
4162 
4163   return NumLiterals + NumExprs <= 1;
4164 }
4165 
4166 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4167   const unsigned Opc = Inst.getOpcode();
4168   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4169       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4170     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4171     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4172 
4173     if (OpSel & ~3)
4174       return false;
4175   }
4176 
4177   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4178     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4179     if (OpSelIdx != -1) {
4180       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4181         return false;
4182     }
4183     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4184     if (OpSelHiIdx != -1) {
4185       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4186         return false;
4187     }
4188   }
4189 
4190   return true;
4191 }
4192 
4193 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4194                                   const OperandVector &Operands) {
4195   const unsigned Opc = Inst.getOpcode();
4196   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4197   if (DppCtrlIdx < 0)
4198     return true;
4199   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4200 
4201   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4202     // DPP64 is supported for row_newbcast only.
4203     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4204     if (Src0Idx >= 0 &&
4205         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4206       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4207       Error(S, "64 bit dpp only supports row_newbcast");
4208       return false;
4209     }
4210   }
4211 
4212   return true;
4213 }
4214 
4215 // Check if VCC register matches wavefront size
4216 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4217   auto FB = getFeatureBits();
4218   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4219     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4220 }
4221 
4222 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4223 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4224                                          const OperandVector &Operands) {
4225   unsigned Opcode = Inst.getOpcode();
4226   const MCInstrDesc &Desc = MII.get(Opcode);
4227   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4228   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4229       ImmIdx == -1)
4230     return true;
4231 
4232   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4233   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4234   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4235 
4236   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4237 
4238   unsigned NumExprs = 0;
4239   unsigned NumLiterals = 0;
4240   uint32_t LiteralValue;
4241 
4242   for (int OpIdx : OpIndices) {
4243     if (OpIdx == -1)
4244       continue;
4245 
4246     const MCOperand &MO = Inst.getOperand(OpIdx);
4247     if (!MO.isImm() && !MO.isExpr())
4248       continue;
4249     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4250       continue;
4251 
4252     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4253         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4254       Error(getConstLoc(Operands),
4255             "inline constants are not allowed for this operand");
4256       return false;
4257     }
4258 
4259     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4260       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4261       if (NumLiterals == 0 || LiteralValue != Value) {
4262         LiteralValue = Value;
4263         ++NumLiterals;
4264       }
4265     } else if (MO.isExpr()) {
4266       ++NumExprs;
4267     }
4268   }
4269   NumLiterals += NumExprs;
4270 
4271   if (!NumLiterals)
4272     return true;
4273 
4274   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4275     Error(getLitLoc(Operands), "literal operands are not supported");
4276     return false;
4277   }
4278 
4279   if (NumLiterals > 1) {
4280     Error(getLitLoc(Operands), "only one literal operand is allowed");
4281     return false;
4282   }
4283 
4284   return true;
4285 }
4286 
4287 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4288 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4289                          const MCRegisterInfo *MRI) {
4290   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4291   if (OpIdx < 0)
4292     return -1;
4293 
4294   const MCOperand &Op = Inst.getOperand(OpIdx);
4295   if (!Op.isReg())
4296     return -1;
4297 
4298   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4299   auto Reg = Sub ? Sub : Op.getReg();
4300   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4301   return AGPR32.contains(Reg) ? 1 : 0;
4302 }
4303 
4304 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4305   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4306   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4307                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4308                   SIInstrFlags::DS)) == 0)
4309     return true;
4310 
4311   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4312                                                       : AMDGPU::OpName::vdata;
4313 
4314   const MCRegisterInfo *MRI = getMRI();
4315   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4316   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4317 
4318   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4319     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4320     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4321       return false;
4322   }
4323 
4324   auto FB = getFeatureBits();
4325   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4326     if (DataAreg < 0 || DstAreg < 0)
4327       return true;
4328     return DstAreg == DataAreg;
4329   }
4330 
4331   return DstAreg < 1 && DataAreg < 1;
4332 }
4333 
4334 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4335   auto FB = getFeatureBits();
4336   if (!FB[AMDGPU::FeatureGFX90AInsts])
4337     return true;
4338 
4339   const MCRegisterInfo *MRI = getMRI();
4340   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4341   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4342   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4343     const MCOperand &Op = Inst.getOperand(I);
4344     if (!Op.isReg())
4345       continue;
4346 
4347     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4348     if (!Sub)
4349       continue;
4350 
4351     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4352       return false;
4353     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4354       return false;
4355   }
4356 
4357   return true;
4358 }
4359 
4360 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4361   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4362     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4363     if (Op.isBLGP())
4364       return Op.getStartLoc();
4365   }
4366   return SMLoc();
4367 }
4368 
4369 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4370                                    const OperandVector &Operands) {
4371   unsigned Opc = Inst.getOpcode();
4372   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4373   if (BlgpIdx == -1)
4374     return true;
4375   SMLoc BLGPLoc = getBLGPLoc(Operands);
4376   if (!BLGPLoc.isValid())
4377     return true;
4378   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4379   auto FB = getFeatureBits();
4380   bool UsesNeg = false;
4381   if (FB[AMDGPU::FeatureGFX940Insts]) {
4382     switch (Opc) {
4383     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4384     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4385     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4386     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4387       UsesNeg = true;
4388     }
4389   }
4390 
4391   if (IsNeg == UsesNeg)
4392     return true;
4393 
4394   Error(BLGPLoc,
4395         UsesNeg ? "invalid modifier: blgp is not supported"
4396                 : "invalid modifier: neg is not supported");
4397 
4398   return false;
4399 }
4400 
4401 // gfx90a has an undocumented limitation:
4402 // DS_GWS opcodes must use even aligned registers.
4403 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4404                                   const OperandVector &Operands) {
4405   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4406     return true;
4407 
4408   int Opc = Inst.getOpcode();
4409   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4410       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4411     return true;
4412 
4413   const MCRegisterInfo *MRI = getMRI();
4414   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4415   int Data0Pos =
4416       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4417   assert(Data0Pos != -1);
4418   auto Reg = Inst.getOperand(Data0Pos).getReg();
4419   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4420   if (RegIdx & 1) {
4421     SMLoc RegLoc = getRegLoc(Reg, Operands);
4422     Error(RegLoc, "vgpr must be even aligned");
4423     return false;
4424   }
4425 
4426   return true;
4427 }
4428 
4429 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4430                                             const OperandVector &Operands,
4431                                             const SMLoc &IDLoc) {
4432   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4433                                            AMDGPU::OpName::cpol);
4434   if (CPolPos == -1)
4435     return true;
4436 
4437   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4438 
4439   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4440   if (TSFlags & SIInstrFlags::SMRD) {
4441     if (CPol && (isSI() || isCI())) {
4442       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4443       Error(S, "cache policy is not supported for SMRD instructions");
4444       return false;
4445     }
4446     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4447       Error(IDLoc, "invalid cache policy for SMEM instruction");
4448       return false;
4449     }
4450   }
4451 
4452   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4453     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4454     StringRef CStr(S.getPointer());
4455     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4456     Error(S, "scc is not supported on this GPU");
4457     return false;
4458   }
4459 
4460   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4461     return true;
4462 
4463   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4464     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4465       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4466                               : "instruction must use glc");
4467       return false;
4468     }
4469   } else {
4470     if (CPol & CPol::GLC) {
4471       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4472       StringRef CStr(S.getPointer());
4473       S = SMLoc::getFromPointer(
4474           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4475       Error(S, isGFX940() ? "instruction must not use sc0"
4476                           : "instruction must not use glc");
4477       return false;
4478     }
4479   }
4480 
4481   return true;
4482 }
4483 
4484 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst,
4485                                          const OperandVector &Operands,
4486                                          const SMLoc &IDLoc) {
4487   if (isGFX940())
4488     return true;
4489 
4490   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4491   if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) !=
4492       (SIInstrFlags::VALU | SIInstrFlags::FLAT))
4493     return true;
4494   // This is FLAT LDS DMA.
4495 
4496   SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands);
4497   StringRef CStr(S.getPointer());
4498   if (!CStr.startswith("lds")) {
4499     // This is incorrectly selected LDS DMA version of a FLAT load opcode.
4500     // And LDS version should have 'lds' modifier, but it follows optional
4501     // operands so its absense is ignored by the matcher.
4502     Error(IDLoc, "invalid operands for instruction");
4503     return false;
4504   }
4505 
4506   return true;
4507 }
4508 
4509 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4510   if (!isGFX11Plus())
4511     return true;
4512   for (auto &Operand : Operands) {
4513     if (!Operand->isReg())
4514       continue;
4515     unsigned Reg = Operand->getReg();
4516     if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4517       Error(getRegLoc(Reg, Operands),
4518             "execz and vccz are not supported on this GPU");
4519       return false;
4520     }
4521   }
4522   return true;
4523 }
4524 
4525 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4526                                           const SMLoc &IDLoc,
4527                                           const OperandVector &Operands) {
4528   if (auto ErrMsg = validateLdsDirect(Inst)) {
4529     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4530     return false;
4531   }
4532   if (!validateSOPLiteral(Inst)) {
4533     Error(getLitLoc(Operands),
4534       "only one literal operand is allowed");
4535     return false;
4536   }
4537   if (!validateVOPLiteral(Inst, Operands)) {
4538     return false;
4539   }
4540   if (!validateConstantBusLimitations(Inst, Operands)) {
4541     return false;
4542   }
4543   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4544     return false;
4545   }
4546   if (!validateIntClampSupported(Inst)) {
4547     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4548       "integer clamping is not supported on this GPU");
4549     return false;
4550   }
4551   if (!validateOpSel(Inst)) {
4552     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4553       "invalid op_sel operand");
4554     return false;
4555   }
4556   if (!validateDPP(Inst, Operands)) {
4557     return false;
4558   }
4559   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4560   if (!validateMIMGD16(Inst)) {
4561     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4562       "d16 modifier is not supported on this GPU");
4563     return false;
4564   }
4565   if (!validateMIMGDim(Inst)) {
4566     Error(IDLoc, "dim modifier is required on this GPU");
4567     return false;
4568   }
4569   if (!validateMIMGMSAA(Inst)) {
4570     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4571           "invalid dim; must be MSAA type");
4572     return false;
4573   }
4574   if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4575     Error(IDLoc, *ErrMsg);
4576     return false;
4577   }
4578   if (!validateMIMGAddrSize(Inst)) {
4579     Error(IDLoc,
4580       "image address size does not match dim and a16");
4581     return false;
4582   }
4583   if (!validateMIMGAtomicDMask(Inst)) {
4584     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4585       "invalid atomic image dmask");
4586     return false;
4587   }
4588   if (!validateMIMGGatherDMask(Inst)) {
4589     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4590       "invalid image_gather dmask: only one bit must be set");
4591     return false;
4592   }
4593   if (!validateMovrels(Inst, Operands)) {
4594     return false;
4595   }
4596   if (!validateFlatOffset(Inst, Operands)) {
4597     return false;
4598   }
4599   if (!validateSMEMOffset(Inst, Operands)) {
4600     return false;
4601   }
4602   if (!validateMAIAccWrite(Inst, Operands)) {
4603     return false;
4604   }
4605   if (!validateMFMA(Inst, Operands)) {
4606     return false;
4607   }
4608   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4609     return false;
4610   }
4611 
4612   if (!validateAGPRLdSt(Inst)) {
4613     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4614     ? "invalid register class: data and dst should be all VGPR or AGPR"
4615     : "invalid register class: agpr loads and stores not supported on this GPU"
4616     );
4617     return false;
4618   }
4619   if (!validateVGPRAlign(Inst)) {
4620     Error(IDLoc,
4621       "invalid register class: vgpr tuples must be 64 bit aligned");
4622     return false;
4623   }
4624   if (!validateGWS(Inst, Operands)) {
4625     return false;
4626   }
4627 
4628   if (!validateBLGP(Inst, Operands)) {
4629     return false;
4630   }
4631 
4632   if (!validateDivScale(Inst)) {
4633     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4634     return false;
4635   }
4636   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4637     return false;
4638   }
4639   if (!validateExeczVcczOperands(Operands)) {
4640     return false;
4641   }
4642 
4643   if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) {
4644     return false;
4645   }
4646 
4647   return true;
4648 }
4649 
4650 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4651                                             const FeatureBitset &FBS,
4652                                             unsigned VariantID = 0);
4653 
4654 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4655                                 const FeatureBitset &AvailableFeatures,
4656                                 unsigned VariantID);
4657 
4658 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4659                                        const FeatureBitset &FBS) {
4660   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4661 }
4662 
4663 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4664                                        const FeatureBitset &FBS,
4665                                        ArrayRef<unsigned> Variants) {
4666   for (auto Variant : Variants) {
4667     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4668       return true;
4669   }
4670 
4671   return false;
4672 }
4673 
4674 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4675                                                   const SMLoc &IDLoc) {
4676   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4677 
4678   // Check if requested instruction variant is supported.
4679   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4680     return false;
4681 
4682   // This instruction is not supported.
4683   // Clear any other pending errors because they are no longer relevant.
4684   getParser().clearPendingErrors();
4685 
4686   // Requested instruction variant is not supported.
4687   // Check if any other variants are supported.
4688   StringRef VariantName = getMatchedVariantName();
4689   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4690     return Error(IDLoc,
4691                  Twine(VariantName,
4692                        " variant of this instruction is not supported"));
4693   }
4694 
4695   // Finally check if this instruction is supported on any other GPU.
4696   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4697     return Error(IDLoc, "instruction not supported on this GPU");
4698   }
4699 
4700   // Instruction not supported on any GPU. Probably a typo.
4701   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4702   return Error(IDLoc, "invalid instruction" + Suggestion);
4703 }
4704 
4705 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4706                                               OperandVector &Operands,
4707                                               MCStreamer &Out,
4708                                               uint64_t &ErrorInfo,
4709                                               bool MatchingInlineAsm) {
4710   MCInst Inst;
4711   unsigned Result = Match_Success;
4712   for (auto Variant : getMatchedVariants()) {
4713     uint64_t EI;
4714     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4715                                   Variant);
4716     // We order match statuses from least to most specific. We use most specific
4717     // status as resulting
4718     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4719     if ((R == Match_Success) ||
4720         (R == Match_PreferE32) ||
4721         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4722         (R == Match_InvalidOperand && Result != Match_MissingFeature
4723                                    && Result != Match_PreferE32) ||
4724         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4725                                    && Result != Match_MissingFeature
4726                                    && Result != Match_PreferE32)) {
4727       Result = R;
4728       ErrorInfo = EI;
4729     }
4730     if (R == Match_Success)
4731       break;
4732   }
4733 
4734   if (Result == Match_Success) {
4735     if (!validateInstruction(Inst, IDLoc, Operands)) {
4736       return true;
4737     }
4738     Inst.setLoc(IDLoc);
4739     Out.emitInstruction(Inst, getSTI());
4740     return false;
4741   }
4742 
4743   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4744   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4745     return true;
4746   }
4747 
4748   switch (Result) {
4749   default: break;
4750   case Match_MissingFeature:
4751     // It has been verified that the specified instruction
4752     // mnemonic is valid. A match was found but it requires
4753     // features which are not supported on this GPU.
4754     return Error(IDLoc, "operands are not valid for this GPU or mode");
4755 
4756   case Match_InvalidOperand: {
4757     SMLoc ErrorLoc = IDLoc;
4758     if (ErrorInfo != ~0ULL) {
4759       if (ErrorInfo >= Operands.size()) {
4760         return Error(IDLoc, "too few operands for instruction");
4761       }
4762       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4763       if (ErrorLoc == SMLoc())
4764         ErrorLoc = IDLoc;
4765     }
4766     return Error(ErrorLoc, "invalid operand for instruction");
4767   }
4768 
4769   case Match_PreferE32:
4770     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4771                         "should be encoded as e32");
4772   case Match_MnemonicFail:
4773     llvm_unreachable("Invalid instructions should have been handled already");
4774   }
4775   llvm_unreachable("Implement any new match types added!");
4776 }
4777 
4778 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4779   int64_t Tmp = -1;
4780   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4781     return true;
4782   }
4783   if (getParser().parseAbsoluteExpression(Tmp)) {
4784     return true;
4785   }
4786   Ret = static_cast<uint32_t>(Tmp);
4787   return false;
4788 }
4789 
4790 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4791                                                uint32_t &Minor) {
4792   if (ParseAsAbsoluteExpression(Major))
4793     return TokError("invalid major version");
4794 
4795   if (!trySkipToken(AsmToken::Comma))
4796     return TokError("minor version number required, comma expected");
4797 
4798   if (ParseAsAbsoluteExpression(Minor))
4799     return TokError("invalid minor version");
4800 
4801   return false;
4802 }
4803 
4804 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4805   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4806     return TokError("directive only supported for amdgcn architecture");
4807 
4808   std::string TargetIDDirective;
4809   SMLoc TargetStart = getTok().getLoc();
4810   if (getParser().parseEscapedString(TargetIDDirective))
4811     return true;
4812 
4813   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4814   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4815     return getParser().Error(TargetRange.Start,
4816         (Twine(".amdgcn_target directive's target id ") +
4817          Twine(TargetIDDirective) +
4818          Twine(" does not match the specified target id ") +
4819          Twine(getTargetStreamer().getTargetID()->toString())).str());
4820 
4821   return false;
4822 }
4823 
4824 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4825   return Error(Range.Start, "value out of range", Range);
4826 }
4827 
4828 bool AMDGPUAsmParser::calculateGPRBlocks(
4829     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4830     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4831     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4832     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4833   // TODO(scott.linder): These calculations are duplicated from
4834   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4835   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4836 
4837   unsigned NumVGPRs = NextFreeVGPR;
4838   unsigned NumSGPRs = NextFreeSGPR;
4839 
4840   if (Version.Major >= 10)
4841     NumSGPRs = 0;
4842   else {
4843     unsigned MaxAddressableNumSGPRs =
4844         IsaInfo::getAddressableNumSGPRs(&getSTI());
4845 
4846     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4847         NumSGPRs > MaxAddressableNumSGPRs)
4848       return OutOfRangeError(SGPRRange);
4849 
4850     NumSGPRs +=
4851         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4852 
4853     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4854         NumSGPRs > MaxAddressableNumSGPRs)
4855       return OutOfRangeError(SGPRRange);
4856 
4857     if (Features.test(FeatureSGPRInitBug))
4858       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4859   }
4860 
4861   VGPRBlocks =
4862       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4863   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4864 
4865   return false;
4866 }
4867 
4868 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4869   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4870     return TokError("directive only supported for amdgcn architecture");
4871 
4872   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4873     return TokError("directive only supported for amdhsa OS");
4874 
4875   StringRef KernelName;
4876   if (getParser().parseIdentifier(KernelName))
4877     return true;
4878 
4879   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4880 
4881   StringSet<> Seen;
4882 
4883   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4884 
4885   SMRange VGPRRange;
4886   uint64_t NextFreeVGPR = 0;
4887   uint64_t AccumOffset = 0;
4888   uint64_t SharedVGPRCount = 0;
4889   SMRange SGPRRange;
4890   uint64_t NextFreeSGPR = 0;
4891 
4892   // Count the number of user SGPRs implied from the enabled feature bits.
4893   unsigned ImpliedUserSGPRCount = 0;
4894 
4895   // Track if the asm explicitly contains the directive for the user SGPR
4896   // count.
4897   Optional<unsigned> ExplicitUserSGPRCount;
4898   bool ReserveVCC = true;
4899   bool ReserveFlatScr = true;
4900   Optional<bool> EnableWavefrontSize32;
4901 
4902   while (true) {
4903     while (trySkipToken(AsmToken::EndOfStatement));
4904 
4905     StringRef ID;
4906     SMRange IDRange = getTok().getLocRange();
4907     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4908       return true;
4909 
4910     if (ID == ".end_amdhsa_kernel")
4911       break;
4912 
4913     if (!Seen.insert(ID).second)
4914       return TokError(".amdhsa_ directives cannot be repeated");
4915 
4916     SMLoc ValStart = getLoc();
4917     int64_t IVal;
4918     if (getParser().parseAbsoluteExpression(IVal))
4919       return true;
4920     SMLoc ValEnd = getLoc();
4921     SMRange ValRange = SMRange(ValStart, ValEnd);
4922 
4923     if (IVal < 0)
4924       return OutOfRangeError(ValRange);
4925 
4926     uint64_t Val = IVal;
4927 
4928 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4929   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4930     return OutOfRangeError(RANGE);                                             \
4931   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4932 
4933     if (ID == ".amdhsa_group_segment_fixed_size") {
4934       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4935         return OutOfRangeError(ValRange);
4936       KD.group_segment_fixed_size = Val;
4937     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4938       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4939         return OutOfRangeError(ValRange);
4940       KD.private_segment_fixed_size = Val;
4941     } else if (ID == ".amdhsa_kernarg_size") {
4942       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4943         return OutOfRangeError(ValRange);
4944       KD.kernarg_size = Val;
4945     } else if (ID == ".amdhsa_user_sgpr_count") {
4946       ExplicitUserSGPRCount = Val;
4947     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4948       if (hasArchitectedFlatScratch())
4949         return Error(IDRange.Start,
4950                      "directive is not supported with architected flat scratch",
4951                      IDRange);
4952       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4953                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4954                        Val, ValRange);
4955       if (Val)
4956         ImpliedUserSGPRCount += 4;
4957     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4958       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4959                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4960                        ValRange);
4961       if (Val)
4962         ImpliedUserSGPRCount += 2;
4963     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4964       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4965                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4966                        ValRange);
4967       if (Val)
4968         ImpliedUserSGPRCount += 2;
4969     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4970       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4971                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4972                        Val, ValRange);
4973       if (Val)
4974         ImpliedUserSGPRCount += 2;
4975     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4976       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4977                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4978                        ValRange);
4979       if (Val)
4980         ImpliedUserSGPRCount += 2;
4981     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4982       if (hasArchitectedFlatScratch())
4983         return Error(IDRange.Start,
4984                      "directive is not supported with architected flat scratch",
4985                      IDRange);
4986       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4987                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4988                        ValRange);
4989       if (Val)
4990         ImpliedUserSGPRCount += 2;
4991     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4992       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4993                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4994                        Val, ValRange);
4995       if (Val)
4996         ImpliedUserSGPRCount += 1;
4997     } else if (ID == ".amdhsa_wavefront_size32") {
4998       if (IVersion.Major < 10)
4999         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5000       EnableWavefrontSize32 = Val;
5001       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5002                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5003                        Val, ValRange);
5004     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5005       if (hasArchitectedFlatScratch())
5006         return Error(IDRange.Start,
5007                      "directive is not supported with architected flat scratch",
5008                      IDRange);
5009       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5010                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5011     } else if (ID == ".amdhsa_enable_private_segment") {
5012       if (!hasArchitectedFlatScratch())
5013         return Error(
5014             IDRange.Start,
5015             "directive is not supported without architected flat scratch",
5016             IDRange);
5017       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5018                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5019     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5020       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5021                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5022                        ValRange);
5023     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5024       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5025                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5026                        ValRange);
5027     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5028       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5029                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5030                        ValRange);
5031     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5032       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5033                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5034                        ValRange);
5035     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5036       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5037                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5038                        ValRange);
5039     } else if (ID == ".amdhsa_next_free_vgpr") {
5040       VGPRRange = ValRange;
5041       NextFreeVGPR = Val;
5042     } else if (ID == ".amdhsa_next_free_sgpr") {
5043       SGPRRange = ValRange;
5044       NextFreeSGPR = Val;
5045     } else if (ID == ".amdhsa_accum_offset") {
5046       if (!isGFX90A())
5047         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5048       AccumOffset = Val;
5049     } else if (ID == ".amdhsa_reserve_vcc") {
5050       if (!isUInt<1>(Val))
5051         return OutOfRangeError(ValRange);
5052       ReserveVCC = Val;
5053     } else if (ID == ".amdhsa_reserve_flat_scratch") {
5054       if (IVersion.Major < 7)
5055         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5056       if (hasArchitectedFlatScratch())
5057         return Error(IDRange.Start,
5058                      "directive is not supported with architected flat scratch",
5059                      IDRange);
5060       if (!isUInt<1>(Val))
5061         return OutOfRangeError(ValRange);
5062       ReserveFlatScr = Val;
5063     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5064       if (IVersion.Major < 8)
5065         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5066       if (!isUInt<1>(Val))
5067         return OutOfRangeError(ValRange);
5068       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5069         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5070                                  IDRange);
5071     } else if (ID == ".amdhsa_float_round_mode_32") {
5072       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5073                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5074     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5075       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5076                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5077     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5078       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5079                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5080     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5081       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5082                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5083                        ValRange);
5084     } else if (ID == ".amdhsa_dx10_clamp") {
5085       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5086                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5087     } else if (ID == ".amdhsa_ieee_mode") {
5088       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5089                        Val, ValRange);
5090     } else if (ID == ".amdhsa_fp16_overflow") {
5091       if (IVersion.Major < 9)
5092         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5093       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5094                        ValRange);
5095     } else if (ID == ".amdhsa_tg_split") {
5096       if (!isGFX90A())
5097         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5098       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5099                        ValRange);
5100     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5101       if (IVersion.Major < 10)
5102         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5103       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5104                        ValRange);
5105     } else if (ID == ".amdhsa_memory_ordered") {
5106       if (IVersion.Major < 10)
5107         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5108       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5109                        ValRange);
5110     } else if (ID == ".amdhsa_forward_progress") {
5111       if (IVersion.Major < 10)
5112         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5113       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5114                        ValRange);
5115     } else if (ID == ".amdhsa_shared_vgpr_count") {
5116       if (IVersion.Major < 10)
5117         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5118       SharedVGPRCount = Val;
5119       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5120                        COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5121                        ValRange);
5122     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5123       PARSE_BITS_ENTRY(
5124           KD.compute_pgm_rsrc2,
5125           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5126           ValRange);
5127     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5128       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5129                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5130                        Val, ValRange);
5131     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5132       PARSE_BITS_ENTRY(
5133           KD.compute_pgm_rsrc2,
5134           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5135           ValRange);
5136     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5137       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5138                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5139                        Val, ValRange);
5140     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5141       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5142                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5143                        Val, ValRange);
5144     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5145       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5146                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5147                        Val, ValRange);
5148     } else if (ID == ".amdhsa_exception_int_div_zero") {
5149       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5150                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5151                        Val, ValRange);
5152     } else {
5153       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5154     }
5155 
5156 #undef PARSE_BITS_ENTRY
5157   }
5158 
5159   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5160     return TokError(".amdhsa_next_free_vgpr directive is required");
5161 
5162   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5163     return TokError(".amdhsa_next_free_sgpr directive is required");
5164 
5165   unsigned VGPRBlocks;
5166   unsigned SGPRBlocks;
5167   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5168                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5169                          EnableWavefrontSize32, NextFreeVGPR,
5170                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5171                          SGPRBlocks))
5172     return true;
5173 
5174   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5175           VGPRBlocks))
5176     return OutOfRangeError(VGPRRange);
5177   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5178                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5179 
5180   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5181           SGPRBlocks))
5182     return OutOfRangeError(SGPRRange);
5183   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5184                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5185                   SGPRBlocks);
5186 
5187   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5188     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5189                     "enabled user SGPRs");
5190 
5191   unsigned UserSGPRCount =
5192       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5193 
5194   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5195     return TokError("too many user SGPRs enabled");
5196   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5197                   UserSGPRCount);
5198 
5199   if (isGFX90A()) {
5200     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5201       return TokError(".amdhsa_accum_offset directive is required");
5202     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5203       return TokError("accum_offset should be in range [4..256] in "
5204                       "increments of 4");
5205     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5206       return TokError("accum_offset exceeds total VGPR allocation");
5207     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5208                     (AccumOffset / 4 - 1));
5209   }
5210 
5211   if (IVersion.Major == 10) {
5212     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5213     if (SharedVGPRCount && EnableWavefrontSize32) {
5214       return TokError("shared_vgpr_count directive not valid on "
5215                       "wavefront size 32");
5216     }
5217     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5218       return TokError("shared_vgpr_count*2 + "
5219                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5220                       "exceed 63\n");
5221     }
5222   }
5223 
5224   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5225       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5226       ReserveFlatScr);
5227   return false;
5228 }
5229 
5230 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5231   uint32_t Major;
5232   uint32_t Minor;
5233 
5234   if (ParseDirectiveMajorMinor(Major, Minor))
5235     return true;
5236 
5237   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5238   return false;
5239 }
5240 
5241 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5242   uint32_t Major;
5243   uint32_t Minor;
5244   uint32_t Stepping;
5245   StringRef VendorName;
5246   StringRef ArchName;
5247 
5248   // If this directive has no arguments, then use the ISA version for the
5249   // targeted GPU.
5250   if (isToken(AsmToken::EndOfStatement)) {
5251     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5252     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5253                                                         ISA.Stepping,
5254                                                         "AMD", "AMDGPU");
5255     return false;
5256   }
5257 
5258   if (ParseDirectiveMajorMinor(Major, Minor))
5259     return true;
5260 
5261   if (!trySkipToken(AsmToken::Comma))
5262     return TokError("stepping version number required, comma expected");
5263 
5264   if (ParseAsAbsoluteExpression(Stepping))
5265     return TokError("invalid stepping version");
5266 
5267   if (!trySkipToken(AsmToken::Comma))
5268     return TokError("vendor name required, comma expected");
5269 
5270   if (!parseString(VendorName, "invalid vendor name"))
5271     return true;
5272 
5273   if (!trySkipToken(AsmToken::Comma))
5274     return TokError("arch name required, comma expected");
5275 
5276   if (!parseString(ArchName, "invalid arch name"))
5277     return true;
5278 
5279   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5280                                                       VendorName, ArchName);
5281   return false;
5282 }
5283 
5284 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5285                                                amd_kernel_code_t &Header) {
5286   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5287   // assembly for backwards compatibility.
5288   if (ID == "max_scratch_backing_memory_byte_size") {
5289     Parser.eatToEndOfStatement();
5290     return false;
5291   }
5292 
5293   SmallString<40> ErrStr;
5294   raw_svector_ostream Err(ErrStr);
5295   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5296     return TokError(Err.str());
5297   }
5298   Lex();
5299 
5300   if (ID == "enable_wavefront_size32") {
5301     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5302       if (!isGFX10Plus())
5303         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5304       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5305         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5306     } else {
5307       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5308         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5309     }
5310   }
5311 
5312   if (ID == "wavefront_size") {
5313     if (Header.wavefront_size == 5) {
5314       if (!isGFX10Plus())
5315         return TokError("wavefront_size=5 is only allowed on GFX10+");
5316       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5317         return TokError("wavefront_size=5 requires +WavefrontSize32");
5318     } else if (Header.wavefront_size == 6) {
5319       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5320         return TokError("wavefront_size=6 requires +WavefrontSize64");
5321     }
5322   }
5323 
5324   if (ID == "enable_wgp_mode") {
5325     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5326         !isGFX10Plus())
5327       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5328   }
5329 
5330   if (ID == "enable_mem_ordered") {
5331     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5332         !isGFX10Plus())
5333       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5334   }
5335 
5336   if (ID == "enable_fwd_progress") {
5337     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5338         !isGFX10Plus())
5339       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5340   }
5341 
5342   return false;
5343 }
5344 
5345 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5346   amd_kernel_code_t Header;
5347   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5348 
5349   while (true) {
5350     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5351     // will set the current token to EndOfStatement.
5352     while(trySkipToken(AsmToken::EndOfStatement));
5353 
5354     StringRef ID;
5355     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5356       return true;
5357 
5358     if (ID == ".end_amd_kernel_code_t")
5359       break;
5360 
5361     if (ParseAMDKernelCodeTValue(ID, Header))
5362       return true;
5363   }
5364 
5365   getTargetStreamer().EmitAMDKernelCodeT(Header);
5366 
5367   return false;
5368 }
5369 
5370 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5371   StringRef KernelName;
5372   if (!parseId(KernelName, "expected symbol name"))
5373     return true;
5374 
5375   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5376                                            ELF::STT_AMDGPU_HSA_KERNEL);
5377 
5378   KernelScope.initialize(getContext());
5379   return false;
5380 }
5381 
5382 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5383   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5384     return Error(getLoc(),
5385                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5386                  "architectures");
5387   }
5388 
5389   auto TargetIDDirective = getLexer().getTok().getStringContents();
5390   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5391     return Error(getParser().getTok().getLoc(), "target id must match options");
5392 
5393   getTargetStreamer().EmitISAVersion();
5394   Lex();
5395 
5396   return false;
5397 }
5398 
5399 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5400   const char *AssemblerDirectiveBegin;
5401   const char *AssemblerDirectiveEnd;
5402   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5403       isHsaAbiVersion3AndAbove(&getSTI())
5404           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5405                             HSAMD::V3::AssemblerDirectiveEnd)
5406           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5407                             HSAMD::AssemblerDirectiveEnd);
5408 
5409   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5410     return Error(getLoc(),
5411                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5412                  "not available on non-amdhsa OSes")).str());
5413   }
5414 
5415   std::string HSAMetadataString;
5416   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5417                           HSAMetadataString))
5418     return true;
5419 
5420   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5421     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5422       return Error(getLoc(), "invalid HSA metadata");
5423   } else {
5424     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5425       return Error(getLoc(), "invalid HSA metadata");
5426   }
5427 
5428   return false;
5429 }
5430 
5431 /// Common code to parse out a block of text (typically YAML) between start and
5432 /// end directives.
5433 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5434                                           const char *AssemblerDirectiveEnd,
5435                                           std::string &CollectString) {
5436 
5437   raw_string_ostream CollectStream(CollectString);
5438 
5439   getLexer().setSkipSpace(false);
5440 
5441   bool FoundEnd = false;
5442   while (!isToken(AsmToken::Eof)) {
5443     while (isToken(AsmToken::Space)) {
5444       CollectStream << getTokenStr();
5445       Lex();
5446     }
5447 
5448     if (trySkipId(AssemblerDirectiveEnd)) {
5449       FoundEnd = true;
5450       break;
5451     }
5452 
5453     CollectStream << Parser.parseStringToEndOfStatement()
5454                   << getContext().getAsmInfo()->getSeparatorString();
5455 
5456     Parser.eatToEndOfStatement();
5457   }
5458 
5459   getLexer().setSkipSpace(true);
5460 
5461   if (isToken(AsmToken::Eof) && !FoundEnd) {
5462     return TokError(Twine("expected directive ") +
5463                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5464   }
5465 
5466   CollectStream.flush();
5467   return false;
5468 }
5469 
5470 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5471 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5472   std::string String;
5473   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5474                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5475     return true;
5476 
5477   auto PALMetadata = getTargetStreamer().getPALMetadata();
5478   if (!PALMetadata->setFromString(String))
5479     return Error(getLoc(), "invalid PAL metadata");
5480   return false;
5481 }
5482 
5483 /// Parse the assembler directive for old linear-format PAL metadata.
5484 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5485   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5486     return Error(getLoc(),
5487                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5488                  "not available on non-amdpal OSes")).str());
5489   }
5490 
5491   auto PALMetadata = getTargetStreamer().getPALMetadata();
5492   PALMetadata->setLegacy();
5493   for (;;) {
5494     uint32_t Key, Value;
5495     if (ParseAsAbsoluteExpression(Key)) {
5496       return TokError(Twine("invalid value in ") +
5497                       Twine(PALMD::AssemblerDirective));
5498     }
5499     if (!trySkipToken(AsmToken::Comma)) {
5500       return TokError(Twine("expected an even number of values in ") +
5501                       Twine(PALMD::AssemblerDirective));
5502     }
5503     if (ParseAsAbsoluteExpression(Value)) {
5504       return TokError(Twine("invalid value in ") +
5505                       Twine(PALMD::AssemblerDirective));
5506     }
5507     PALMetadata->setRegister(Key, Value);
5508     if (!trySkipToken(AsmToken::Comma))
5509       break;
5510   }
5511   return false;
5512 }
5513 
5514 /// ParseDirectiveAMDGPULDS
5515 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5516 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5517   if (getParser().checkForValidSection())
5518     return true;
5519 
5520   StringRef Name;
5521   SMLoc NameLoc = getLoc();
5522   if (getParser().parseIdentifier(Name))
5523     return TokError("expected identifier in directive");
5524 
5525   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5526   if (parseToken(AsmToken::Comma, "expected ','"))
5527     return true;
5528 
5529   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5530 
5531   int64_t Size;
5532   SMLoc SizeLoc = getLoc();
5533   if (getParser().parseAbsoluteExpression(Size))
5534     return true;
5535   if (Size < 0)
5536     return Error(SizeLoc, "size must be non-negative");
5537   if (Size > LocalMemorySize)
5538     return Error(SizeLoc, "size is too large");
5539 
5540   int64_t Alignment = 4;
5541   if (trySkipToken(AsmToken::Comma)) {
5542     SMLoc AlignLoc = getLoc();
5543     if (getParser().parseAbsoluteExpression(Alignment))
5544       return true;
5545     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5546       return Error(AlignLoc, "alignment must be a power of two");
5547 
5548     // Alignment larger than the size of LDS is possible in theory, as long
5549     // as the linker manages to place to symbol at address 0, but we do want
5550     // to make sure the alignment fits nicely into a 32-bit integer.
5551     if (Alignment >= 1u << 31)
5552       return Error(AlignLoc, "alignment is too large");
5553   }
5554 
5555   if (parseEOL())
5556     return true;
5557 
5558   Symbol->redefineIfPossible();
5559   if (!Symbol->isUndefined())
5560     return Error(NameLoc, "invalid symbol redefinition");
5561 
5562   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5563   return false;
5564 }
5565 
5566 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5567   StringRef IDVal = DirectiveID.getString();
5568 
5569   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5570     if (IDVal == ".amdhsa_kernel")
5571      return ParseDirectiveAMDHSAKernel();
5572 
5573     // TODO: Restructure/combine with PAL metadata directive.
5574     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5575       return ParseDirectiveHSAMetadata();
5576   } else {
5577     if (IDVal == ".hsa_code_object_version")
5578       return ParseDirectiveHSACodeObjectVersion();
5579 
5580     if (IDVal == ".hsa_code_object_isa")
5581       return ParseDirectiveHSACodeObjectISA();
5582 
5583     if (IDVal == ".amd_kernel_code_t")
5584       return ParseDirectiveAMDKernelCodeT();
5585 
5586     if (IDVal == ".amdgpu_hsa_kernel")
5587       return ParseDirectiveAMDGPUHsaKernel();
5588 
5589     if (IDVal == ".amd_amdgpu_isa")
5590       return ParseDirectiveISAVersion();
5591 
5592     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5593       return ParseDirectiveHSAMetadata();
5594   }
5595 
5596   if (IDVal == ".amdgcn_target")
5597     return ParseDirectiveAMDGCNTarget();
5598 
5599   if (IDVal == ".amdgpu_lds")
5600     return ParseDirectiveAMDGPULDS();
5601 
5602   if (IDVal == PALMD::AssemblerDirectiveBegin)
5603     return ParseDirectivePALMetadataBegin();
5604 
5605   if (IDVal == PALMD::AssemblerDirective)
5606     return ParseDirectivePALMetadata();
5607 
5608   return true;
5609 }
5610 
5611 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5612                                            unsigned RegNo) {
5613 
5614   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5615     return isGFX9Plus();
5616 
5617   // GFX10+ has 2 more SGPRs 104 and 105.
5618   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5619     return hasSGPR104_SGPR105();
5620 
5621   switch (RegNo) {
5622   case AMDGPU::SRC_SHARED_BASE:
5623   case AMDGPU::SRC_SHARED_LIMIT:
5624   case AMDGPU::SRC_PRIVATE_BASE:
5625   case AMDGPU::SRC_PRIVATE_LIMIT:
5626     return isGFX9Plus();
5627   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5628     return isGFX9Plus() && !isGFX11Plus();
5629   case AMDGPU::TBA:
5630   case AMDGPU::TBA_LO:
5631   case AMDGPU::TBA_HI:
5632   case AMDGPU::TMA:
5633   case AMDGPU::TMA_LO:
5634   case AMDGPU::TMA_HI:
5635     return !isGFX9Plus();
5636   case AMDGPU::XNACK_MASK:
5637   case AMDGPU::XNACK_MASK_LO:
5638   case AMDGPU::XNACK_MASK_HI:
5639     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5640   case AMDGPU::SGPR_NULL:
5641     return isGFX10Plus();
5642   default:
5643     break;
5644   }
5645 
5646   if (isCI())
5647     return true;
5648 
5649   if (isSI() || isGFX10Plus()) {
5650     // No flat_scr on SI.
5651     // On GFX10Plus flat scratch is not a valid register operand and can only be
5652     // accessed with s_setreg/s_getreg.
5653     switch (RegNo) {
5654     case AMDGPU::FLAT_SCR:
5655     case AMDGPU::FLAT_SCR_LO:
5656     case AMDGPU::FLAT_SCR_HI:
5657       return false;
5658     default:
5659       return true;
5660     }
5661   }
5662 
5663   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5664   // SI/CI have.
5665   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5666     return hasSGPR102_SGPR103();
5667 
5668   return true;
5669 }
5670 
5671 OperandMatchResultTy
5672 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5673                               OperandMode Mode) {
5674   // Try to parse with a custom parser
5675   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5676 
5677   // If we successfully parsed the operand or if there as an error parsing,
5678   // we are done.
5679   //
5680   // If we are parsing after we reach EndOfStatement then this means we
5681   // are appending default values to the Operands list.  This is only done
5682   // by custom parser, so we shouldn't continue on to the generic parsing.
5683   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5684       isToken(AsmToken::EndOfStatement))
5685     return ResTy;
5686 
5687   SMLoc RBraceLoc;
5688   SMLoc LBraceLoc = getLoc();
5689   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5690     unsigned Prefix = Operands.size();
5691 
5692     for (;;) {
5693       auto Loc = getLoc();
5694       ResTy = parseReg(Operands);
5695       if (ResTy == MatchOperand_NoMatch)
5696         Error(Loc, "expected a register");
5697       if (ResTy != MatchOperand_Success)
5698         return MatchOperand_ParseFail;
5699 
5700       RBraceLoc = getLoc();
5701       if (trySkipToken(AsmToken::RBrac))
5702         break;
5703 
5704       if (!skipToken(AsmToken::Comma,
5705                      "expected a comma or a closing square bracket")) {
5706         return MatchOperand_ParseFail;
5707       }
5708     }
5709 
5710     if (Operands.size() - Prefix > 1) {
5711       Operands.insert(Operands.begin() + Prefix,
5712                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5713       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5714     }
5715 
5716     return MatchOperand_Success;
5717   }
5718 
5719   return parseRegOrImm(Operands);
5720 }
5721 
5722 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5723   // Clear any forced encodings from the previous instruction.
5724   setForcedEncodingSize(0);
5725   setForcedDPP(false);
5726   setForcedSDWA(false);
5727 
5728   if (Name.endswith("_e64_dpp")) {
5729     setForcedDPP(true);
5730     setForcedEncodingSize(64);
5731     return Name.substr(0, Name.size() - 8);
5732   } else if (Name.endswith("_e64")) {
5733     setForcedEncodingSize(64);
5734     return Name.substr(0, Name.size() - 4);
5735   } else if (Name.endswith("_e32")) {
5736     setForcedEncodingSize(32);
5737     return Name.substr(0, Name.size() - 4);
5738   } else if (Name.endswith("_dpp")) {
5739     setForcedDPP(true);
5740     return Name.substr(0, Name.size() - 4);
5741   } else if (Name.endswith("_sdwa")) {
5742     setForcedSDWA(true);
5743     return Name.substr(0, Name.size() - 5);
5744   }
5745   return Name;
5746 }
5747 
5748 static void applyMnemonicAliases(StringRef &Mnemonic,
5749                                  const FeatureBitset &Features,
5750                                  unsigned VariantID);
5751 
5752 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5753                                        StringRef Name,
5754                                        SMLoc NameLoc, OperandVector &Operands) {
5755   // Add the instruction mnemonic
5756   Name = parseMnemonicSuffix(Name);
5757 
5758   // If the target architecture uses MnemonicAlias, call it here to parse
5759   // operands correctly.
5760   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5761 
5762   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5763 
5764   bool IsMIMG = Name.startswith("image_");
5765 
5766   while (!trySkipToken(AsmToken::EndOfStatement)) {
5767     OperandMode Mode = OperandMode_Default;
5768     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5769       Mode = OperandMode_NSA;
5770     CPolSeen = 0;
5771     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5772 
5773     if (Res != MatchOperand_Success) {
5774       checkUnsupportedInstruction(Name, NameLoc);
5775       if (!Parser.hasPendingError()) {
5776         // FIXME: use real operand location rather than the current location.
5777         StringRef Msg =
5778           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5779                                             "not a valid operand.";
5780         Error(getLoc(), Msg);
5781       }
5782       while (!trySkipToken(AsmToken::EndOfStatement)) {
5783         lex();
5784       }
5785       return true;
5786     }
5787 
5788     // Eat the comma or space if there is one.
5789     trySkipToken(AsmToken::Comma);
5790   }
5791 
5792   return false;
5793 }
5794 
5795 //===----------------------------------------------------------------------===//
5796 // Utility functions
5797 //===----------------------------------------------------------------------===//
5798 
5799 OperandMatchResultTy
5800 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5801 
5802   if (!trySkipId(Prefix, AsmToken::Colon))
5803     return MatchOperand_NoMatch;
5804 
5805   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5806 }
5807 
5808 OperandMatchResultTy
5809 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5810                                     AMDGPUOperand::ImmTy ImmTy,
5811                                     bool (*ConvertResult)(int64_t&)) {
5812   SMLoc S = getLoc();
5813   int64_t Value = 0;
5814 
5815   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5816   if (Res != MatchOperand_Success)
5817     return Res;
5818 
5819   if (ConvertResult && !ConvertResult(Value)) {
5820     Error(S, "invalid " + StringRef(Prefix) + " value.");
5821   }
5822 
5823   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5824   return MatchOperand_Success;
5825 }
5826 
5827 OperandMatchResultTy
5828 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5829                                              OperandVector &Operands,
5830                                              AMDGPUOperand::ImmTy ImmTy,
5831                                              bool (*ConvertResult)(int64_t&)) {
5832   SMLoc S = getLoc();
5833   if (!trySkipId(Prefix, AsmToken::Colon))
5834     return MatchOperand_NoMatch;
5835 
5836   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5837     return MatchOperand_ParseFail;
5838 
5839   unsigned Val = 0;
5840   const unsigned MaxSize = 4;
5841 
5842   // FIXME: How to verify the number of elements matches the number of src
5843   // operands?
5844   for (int I = 0; ; ++I) {
5845     int64_t Op;
5846     SMLoc Loc = getLoc();
5847     if (!parseExpr(Op))
5848       return MatchOperand_ParseFail;
5849 
5850     if (Op != 0 && Op != 1) {
5851       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5852       return MatchOperand_ParseFail;
5853     }
5854 
5855     Val |= (Op << I);
5856 
5857     if (trySkipToken(AsmToken::RBrac))
5858       break;
5859 
5860     if (I + 1 == MaxSize) {
5861       Error(getLoc(), "expected a closing square bracket");
5862       return MatchOperand_ParseFail;
5863     }
5864 
5865     if (!skipToken(AsmToken::Comma, "expected a comma"))
5866       return MatchOperand_ParseFail;
5867   }
5868 
5869   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5870   return MatchOperand_Success;
5871 }
5872 
5873 OperandMatchResultTy
5874 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5875                                AMDGPUOperand::ImmTy ImmTy) {
5876   int64_t Bit;
5877   SMLoc S = getLoc();
5878 
5879   if (trySkipId(Name)) {
5880     Bit = 1;
5881   } else if (trySkipId("no", Name)) {
5882     Bit = 0;
5883   } else {
5884     return MatchOperand_NoMatch;
5885   }
5886 
5887   if (Name == "r128" && !hasMIMG_R128()) {
5888     Error(S, "r128 modifier is not supported on this GPU");
5889     return MatchOperand_ParseFail;
5890   }
5891   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5892     Error(S, "a16 modifier is not supported on this GPU");
5893     return MatchOperand_ParseFail;
5894   }
5895 
5896   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5897     ImmTy = AMDGPUOperand::ImmTyR128A16;
5898 
5899   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5900   return MatchOperand_Success;
5901 }
5902 
5903 OperandMatchResultTy
5904 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5905   unsigned CPolOn = 0;
5906   unsigned CPolOff = 0;
5907   SMLoc S = getLoc();
5908 
5909   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5910   if (isGFX940() && !Mnemo.startswith("s_")) {
5911     if (trySkipId("sc0"))
5912       CPolOn = AMDGPU::CPol::SC0;
5913     else if (trySkipId("nosc0"))
5914       CPolOff = AMDGPU::CPol::SC0;
5915     else if (trySkipId("nt"))
5916       CPolOn = AMDGPU::CPol::NT;
5917     else if (trySkipId("nont"))
5918       CPolOff = AMDGPU::CPol::NT;
5919     else if (trySkipId("sc1"))
5920       CPolOn = AMDGPU::CPol::SC1;
5921     else if (trySkipId("nosc1"))
5922       CPolOff = AMDGPU::CPol::SC1;
5923     else
5924       return MatchOperand_NoMatch;
5925   }
5926   else if (trySkipId("glc"))
5927     CPolOn = AMDGPU::CPol::GLC;
5928   else if (trySkipId("noglc"))
5929     CPolOff = AMDGPU::CPol::GLC;
5930   else if (trySkipId("slc"))
5931     CPolOn = AMDGPU::CPol::SLC;
5932   else if (trySkipId("noslc"))
5933     CPolOff = AMDGPU::CPol::SLC;
5934   else if (trySkipId("dlc"))
5935     CPolOn = AMDGPU::CPol::DLC;
5936   else if (trySkipId("nodlc"))
5937     CPolOff = AMDGPU::CPol::DLC;
5938   else if (trySkipId("scc"))
5939     CPolOn = AMDGPU::CPol::SCC;
5940   else if (trySkipId("noscc"))
5941     CPolOff = AMDGPU::CPol::SCC;
5942   else
5943     return MatchOperand_NoMatch;
5944 
5945   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5946     Error(S, "dlc modifier is not supported on this GPU");
5947     return MatchOperand_ParseFail;
5948   }
5949 
5950   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5951     Error(S, "scc modifier is not supported on this GPU");
5952     return MatchOperand_ParseFail;
5953   }
5954 
5955   if (CPolSeen & (CPolOn | CPolOff)) {
5956     Error(S, "duplicate cache policy modifier");
5957     return MatchOperand_ParseFail;
5958   }
5959 
5960   CPolSeen |= (CPolOn | CPolOff);
5961 
5962   for (unsigned I = 1; I != Operands.size(); ++I) {
5963     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5964     if (Op.isCPol()) {
5965       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5966       return MatchOperand_Success;
5967     }
5968   }
5969 
5970   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5971                                               AMDGPUOperand::ImmTyCPol));
5972 
5973   return MatchOperand_Success;
5974 }
5975 
5976 static void addOptionalImmOperand(
5977   MCInst& Inst, const OperandVector& Operands,
5978   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5979   AMDGPUOperand::ImmTy ImmT,
5980   int64_t Default = 0) {
5981   auto i = OptionalIdx.find(ImmT);
5982   if (i != OptionalIdx.end()) {
5983     unsigned Idx = i->second;
5984     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5985   } else {
5986     Inst.addOperand(MCOperand::createImm(Default));
5987   }
5988 }
5989 
5990 OperandMatchResultTy
5991 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5992                                        StringRef &Value,
5993                                        SMLoc &StringLoc) {
5994   if (!trySkipId(Prefix, AsmToken::Colon))
5995     return MatchOperand_NoMatch;
5996 
5997   StringLoc = getLoc();
5998   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5999                                                   : MatchOperand_ParseFail;
6000 }
6001 
6002 //===----------------------------------------------------------------------===//
6003 // MTBUF format
6004 //===----------------------------------------------------------------------===//
6005 
6006 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6007                                   int64_t MaxVal,
6008                                   int64_t &Fmt) {
6009   int64_t Val;
6010   SMLoc Loc = getLoc();
6011 
6012   auto Res = parseIntWithPrefix(Pref, Val);
6013   if (Res == MatchOperand_ParseFail)
6014     return false;
6015   if (Res == MatchOperand_NoMatch)
6016     return true;
6017 
6018   if (Val < 0 || Val > MaxVal) {
6019     Error(Loc, Twine("out of range ", StringRef(Pref)));
6020     return false;
6021   }
6022 
6023   Fmt = Val;
6024   return true;
6025 }
6026 
6027 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6028 // values to live in a joint format operand in the MCInst encoding.
6029 OperandMatchResultTy
6030 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6031   using namespace llvm::AMDGPU::MTBUFFormat;
6032 
6033   int64_t Dfmt = DFMT_UNDEF;
6034   int64_t Nfmt = NFMT_UNDEF;
6035 
6036   // dfmt and nfmt can appear in either order, and each is optional.
6037   for (int I = 0; I < 2; ++I) {
6038     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6039       return MatchOperand_ParseFail;
6040 
6041     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
6042       return MatchOperand_ParseFail;
6043     }
6044     // Skip optional comma between dfmt/nfmt
6045     // but guard against 2 commas following each other.
6046     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6047         !peekToken().is(AsmToken::Comma)) {
6048       trySkipToken(AsmToken::Comma);
6049     }
6050   }
6051 
6052   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6053     return MatchOperand_NoMatch;
6054 
6055   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6056   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6057 
6058   Format = encodeDfmtNfmt(Dfmt, Nfmt);
6059   return MatchOperand_Success;
6060 }
6061 
6062 OperandMatchResultTy
6063 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6064   using namespace llvm::AMDGPU::MTBUFFormat;
6065 
6066   int64_t Fmt = UFMT_UNDEF;
6067 
6068   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6069     return MatchOperand_ParseFail;
6070 
6071   if (Fmt == UFMT_UNDEF)
6072     return MatchOperand_NoMatch;
6073 
6074   Format = Fmt;
6075   return MatchOperand_Success;
6076 }
6077 
6078 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6079                                     int64_t &Nfmt,
6080                                     StringRef FormatStr,
6081                                     SMLoc Loc) {
6082   using namespace llvm::AMDGPU::MTBUFFormat;
6083   int64_t Format;
6084 
6085   Format = getDfmt(FormatStr);
6086   if (Format != DFMT_UNDEF) {
6087     Dfmt = Format;
6088     return true;
6089   }
6090 
6091   Format = getNfmt(FormatStr, getSTI());
6092   if (Format != NFMT_UNDEF) {
6093     Nfmt = Format;
6094     return true;
6095   }
6096 
6097   Error(Loc, "unsupported format");
6098   return false;
6099 }
6100 
6101 OperandMatchResultTy
6102 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6103                                           SMLoc FormatLoc,
6104                                           int64_t &Format) {
6105   using namespace llvm::AMDGPU::MTBUFFormat;
6106 
6107   int64_t Dfmt = DFMT_UNDEF;
6108   int64_t Nfmt = NFMT_UNDEF;
6109   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6110     return MatchOperand_ParseFail;
6111 
6112   if (trySkipToken(AsmToken::Comma)) {
6113     StringRef Str;
6114     SMLoc Loc = getLoc();
6115     if (!parseId(Str, "expected a format string") ||
6116         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
6117       return MatchOperand_ParseFail;
6118     }
6119     if (Dfmt == DFMT_UNDEF) {
6120       Error(Loc, "duplicate numeric format");
6121       return MatchOperand_ParseFail;
6122     } else if (Nfmt == NFMT_UNDEF) {
6123       Error(Loc, "duplicate data format");
6124       return MatchOperand_ParseFail;
6125     }
6126   }
6127 
6128   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6129   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6130 
6131   if (isGFX10Plus()) {
6132     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6133     if (Ufmt == UFMT_UNDEF) {
6134       Error(FormatLoc, "unsupported format");
6135       return MatchOperand_ParseFail;
6136     }
6137     Format = Ufmt;
6138   } else {
6139     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6140   }
6141 
6142   return MatchOperand_Success;
6143 }
6144 
6145 OperandMatchResultTy
6146 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6147                                             SMLoc Loc,
6148                                             int64_t &Format) {
6149   using namespace llvm::AMDGPU::MTBUFFormat;
6150 
6151   auto Id = getUnifiedFormat(FormatStr, getSTI());
6152   if (Id == UFMT_UNDEF)
6153     return MatchOperand_NoMatch;
6154 
6155   if (!isGFX10Plus()) {
6156     Error(Loc, "unified format is not supported on this GPU");
6157     return MatchOperand_ParseFail;
6158   }
6159 
6160   Format = Id;
6161   return MatchOperand_Success;
6162 }
6163 
6164 OperandMatchResultTy
6165 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6166   using namespace llvm::AMDGPU::MTBUFFormat;
6167   SMLoc Loc = getLoc();
6168 
6169   if (!parseExpr(Format))
6170     return MatchOperand_ParseFail;
6171   if (!isValidFormatEncoding(Format, getSTI())) {
6172     Error(Loc, "out of range format");
6173     return MatchOperand_ParseFail;
6174   }
6175 
6176   return MatchOperand_Success;
6177 }
6178 
6179 OperandMatchResultTy
6180 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6181   using namespace llvm::AMDGPU::MTBUFFormat;
6182 
6183   if (!trySkipId("format", AsmToken::Colon))
6184     return MatchOperand_NoMatch;
6185 
6186   if (trySkipToken(AsmToken::LBrac)) {
6187     StringRef FormatStr;
6188     SMLoc Loc = getLoc();
6189     if (!parseId(FormatStr, "expected a format string"))
6190       return MatchOperand_ParseFail;
6191 
6192     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6193     if (Res == MatchOperand_NoMatch)
6194       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6195     if (Res != MatchOperand_Success)
6196       return Res;
6197 
6198     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6199       return MatchOperand_ParseFail;
6200 
6201     return MatchOperand_Success;
6202   }
6203 
6204   return parseNumericFormat(Format);
6205 }
6206 
6207 OperandMatchResultTy
6208 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6209   using namespace llvm::AMDGPU::MTBUFFormat;
6210 
6211   int64_t Format = getDefaultFormatEncoding(getSTI());
6212   OperandMatchResultTy Res;
6213   SMLoc Loc = getLoc();
6214 
6215   // Parse legacy format syntax.
6216   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6217   if (Res == MatchOperand_ParseFail)
6218     return Res;
6219 
6220   bool FormatFound = (Res == MatchOperand_Success);
6221 
6222   Operands.push_back(
6223     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6224 
6225   if (FormatFound)
6226     trySkipToken(AsmToken::Comma);
6227 
6228   if (isToken(AsmToken::EndOfStatement)) {
6229     // We are expecting an soffset operand,
6230     // but let matcher handle the error.
6231     return MatchOperand_Success;
6232   }
6233 
6234   // Parse soffset.
6235   Res = parseRegOrImm(Operands);
6236   if (Res != MatchOperand_Success)
6237     return Res;
6238 
6239   trySkipToken(AsmToken::Comma);
6240 
6241   if (!FormatFound) {
6242     Res = parseSymbolicOrNumericFormat(Format);
6243     if (Res == MatchOperand_ParseFail)
6244       return Res;
6245     if (Res == MatchOperand_Success) {
6246       auto Size = Operands.size();
6247       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6248       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6249       Op.setImm(Format);
6250     }
6251     return MatchOperand_Success;
6252   }
6253 
6254   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6255     Error(getLoc(), "duplicate format");
6256     return MatchOperand_ParseFail;
6257   }
6258   return MatchOperand_Success;
6259 }
6260 
6261 //===----------------------------------------------------------------------===//
6262 // ds
6263 //===----------------------------------------------------------------------===//
6264 
6265 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6266                                     const OperandVector &Operands) {
6267   OptionalImmIndexMap OptionalIdx;
6268 
6269   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6270     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6271 
6272     // Add the register arguments
6273     if (Op.isReg()) {
6274       Op.addRegOperands(Inst, 1);
6275       continue;
6276     }
6277 
6278     // Handle optional arguments
6279     OptionalIdx[Op.getImmTy()] = i;
6280   }
6281 
6282   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6283   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6284   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6285 
6286   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6287 }
6288 
6289 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6290                                 bool IsGdsHardcoded) {
6291   OptionalImmIndexMap OptionalIdx;
6292   AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset;
6293 
6294   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6295     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6296 
6297     // Add the register arguments
6298     if (Op.isReg()) {
6299       Op.addRegOperands(Inst, 1);
6300       continue;
6301     }
6302 
6303     if (Op.isToken() && Op.getToken() == "gds") {
6304       IsGdsHardcoded = true;
6305       continue;
6306     }
6307 
6308     // Handle optional arguments
6309     OptionalIdx[Op.getImmTy()] = i;
6310 
6311     if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle)
6312       OffsetType = AMDGPUOperand::ImmTySwizzle;
6313   }
6314 
6315   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6316 
6317   if (!IsGdsHardcoded) {
6318     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6319   }
6320   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6321 }
6322 
6323 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6324   OptionalImmIndexMap OptionalIdx;
6325 
6326   unsigned OperandIdx[4];
6327   unsigned EnMask = 0;
6328   int SrcIdx = 0;
6329 
6330   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6331     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6332 
6333     // Add the register arguments
6334     if (Op.isReg()) {
6335       assert(SrcIdx < 4);
6336       OperandIdx[SrcIdx] = Inst.size();
6337       Op.addRegOperands(Inst, 1);
6338       ++SrcIdx;
6339       continue;
6340     }
6341 
6342     if (Op.isOff()) {
6343       assert(SrcIdx < 4);
6344       OperandIdx[SrcIdx] = Inst.size();
6345       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6346       ++SrcIdx;
6347       continue;
6348     }
6349 
6350     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6351       Op.addImmOperands(Inst, 1);
6352       continue;
6353     }
6354 
6355     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6356       continue;
6357 
6358     // Handle optional arguments
6359     OptionalIdx[Op.getImmTy()] = i;
6360   }
6361 
6362   assert(SrcIdx == 4);
6363 
6364   bool Compr = false;
6365   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6366     Compr = true;
6367     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6368     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6369     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6370   }
6371 
6372   for (auto i = 0; i < SrcIdx; ++i) {
6373     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6374       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6375     }
6376   }
6377 
6378   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6379   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6380 
6381   Inst.addOperand(MCOperand::createImm(EnMask));
6382 }
6383 
6384 //===----------------------------------------------------------------------===//
6385 // s_waitcnt
6386 //===----------------------------------------------------------------------===//
6387 
6388 static bool
6389 encodeCnt(
6390   const AMDGPU::IsaVersion ISA,
6391   int64_t &IntVal,
6392   int64_t CntVal,
6393   bool Saturate,
6394   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6395   unsigned (*decode)(const IsaVersion &Version, unsigned))
6396 {
6397   bool Failed = false;
6398 
6399   IntVal = encode(ISA, IntVal, CntVal);
6400   if (CntVal != decode(ISA, IntVal)) {
6401     if (Saturate) {
6402       IntVal = encode(ISA, IntVal, -1);
6403     } else {
6404       Failed = true;
6405     }
6406   }
6407   return Failed;
6408 }
6409 
6410 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6411 
6412   SMLoc CntLoc = getLoc();
6413   StringRef CntName = getTokenStr();
6414 
6415   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6416       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6417     return false;
6418 
6419   int64_t CntVal;
6420   SMLoc ValLoc = getLoc();
6421   if (!parseExpr(CntVal))
6422     return false;
6423 
6424   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6425 
6426   bool Failed = true;
6427   bool Sat = CntName.endswith("_sat");
6428 
6429   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6430     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6431   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6432     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6433   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6434     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6435   } else {
6436     Error(CntLoc, "invalid counter name " + CntName);
6437     return false;
6438   }
6439 
6440   if (Failed) {
6441     Error(ValLoc, "too large value for " + CntName);
6442     return false;
6443   }
6444 
6445   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6446     return false;
6447 
6448   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6449     if (isToken(AsmToken::EndOfStatement)) {
6450       Error(getLoc(), "expected a counter name");
6451       return false;
6452     }
6453   }
6454 
6455   return true;
6456 }
6457 
6458 OperandMatchResultTy
6459 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6460   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6461   int64_t Waitcnt = getWaitcntBitMask(ISA);
6462   SMLoc S = getLoc();
6463 
6464   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6465     while (!isToken(AsmToken::EndOfStatement)) {
6466       if (!parseCnt(Waitcnt))
6467         return MatchOperand_ParseFail;
6468     }
6469   } else {
6470     if (!parseExpr(Waitcnt))
6471       return MatchOperand_ParseFail;
6472   }
6473 
6474   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6475   return MatchOperand_Success;
6476 }
6477 
6478 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6479   SMLoc FieldLoc = getLoc();
6480   StringRef FieldName = getTokenStr();
6481   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6482       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6483     return false;
6484 
6485   SMLoc ValueLoc = getLoc();
6486   StringRef ValueName = getTokenStr();
6487   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6488       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6489     return false;
6490 
6491   unsigned Shift;
6492   if (FieldName == "instid0") {
6493     Shift = 0;
6494   } else if (FieldName == "instskip") {
6495     Shift = 4;
6496   } else if (FieldName == "instid1") {
6497     Shift = 7;
6498   } else {
6499     Error(FieldLoc, "invalid field name " + FieldName);
6500     return false;
6501   }
6502 
6503   int Value;
6504   if (Shift == 4) {
6505     // Parse values for instskip.
6506     Value = StringSwitch<int>(ValueName)
6507                 .Case("SAME", 0)
6508                 .Case("NEXT", 1)
6509                 .Case("SKIP_1", 2)
6510                 .Case("SKIP_2", 3)
6511                 .Case("SKIP_3", 4)
6512                 .Case("SKIP_4", 5)
6513                 .Default(-1);
6514   } else {
6515     // Parse values for instid0 and instid1.
6516     Value = StringSwitch<int>(ValueName)
6517                 .Case("NO_DEP", 0)
6518                 .Case("VALU_DEP_1", 1)
6519                 .Case("VALU_DEP_2", 2)
6520                 .Case("VALU_DEP_3", 3)
6521                 .Case("VALU_DEP_4", 4)
6522                 .Case("TRANS32_DEP_1", 5)
6523                 .Case("TRANS32_DEP_2", 6)
6524                 .Case("TRANS32_DEP_3", 7)
6525                 .Case("FMA_ACCUM_CYCLE_1", 8)
6526                 .Case("SALU_CYCLE_1", 9)
6527                 .Case("SALU_CYCLE_2", 10)
6528                 .Case("SALU_CYCLE_3", 11)
6529                 .Default(-1);
6530   }
6531   if (Value < 0) {
6532     Error(ValueLoc, "invalid value name " + ValueName);
6533     return false;
6534   }
6535 
6536   Delay |= Value << Shift;
6537   return true;
6538 }
6539 
6540 OperandMatchResultTy
6541 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) {
6542   int64_t Delay = 0;
6543   SMLoc S = getLoc();
6544 
6545   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6546     do {
6547       if (!parseDelay(Delay))
6548         return MatchOperand_ParseFail;
6549     } while (trySkipToken(AsmToken::Pipe));
6550   } else {
6551     if (!parseExpr(Delay))
6552       return MatchOperand_ParseFail;
6553   }
6554 
6555   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6556   return MatchOperand_Success;
6557 }
6558 
6559 bool
6560 AMDGPUOperand::isSWaitCnt() const {
6561   return isImm();
6562 }
6563 
6564 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); }
6565 
6566 //===----------------------------------------------------------------------===//
6567 // DepCtr
6568 //===----------------------------------------------------------------------===//
6569 
6570 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6571                                   StringRef DepCtrName) {
6572   switch (ErrorId) {
6573   case OPR_ID_UNKNOWN:
6574     Error(Loc, Twine("invalid counter name ", DepCtrName));
6575     return;
6576   case OPR_ID_UNSUPPORTED:
6577     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6578     return;
6579   case OPR_ID_DUPLICATE:
6580     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6581     return;
6582   case OPR_VAL_INVALID:
6583     Error(Loc, Twine("invalid value for ", DepCtrName));
6584     return;
6585   default:
6586     assert(false);
6587   }
6588 }
6589 
6590 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6591 
6592   using namespace llvm::AMDGPU::DepCtr;
6593 
6594   SMLoc DepCtrLoc = getLoc();
6595   StringRef DepCtrName = getTokenStr();
6596 
6597   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6598       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6599     return false;
6600 
6601   int64_t ExprVal;
6602   if (!parseExpr(ExprVal))
6603     return false;
6604 
6605   unsigned PrevOprMask = UsedOprMask;
6606   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6607 
6608   if (CntVal < 0) {
6609     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6610     return false;
6611   }
6612 
6613   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6614     return false;
6615 
6616   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6617     if (isToken(AsmToken::EndOfStatement)) {
6618       Error(getLoc(), "expected a counter name");
6619       return false;
6620     }
6621   }
6622 
6623   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6624   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6625   return true;
6626 }
6627 
6628 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6629   using namespace llvm::AMDGPU::DepCtr;
6630 
6631   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6632   SMLoc Loc = getLoc();
6633 
6634   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6635     unsigned UsedOprMask = 0;
6636     while (!isToken(AsmToken::EndOfStatement)) {
6637       if (!parseDepCtr(DepCtr, UsedOprMask))
6638         return MatchOperand_ParseFail;
6639     }
6640   } else {
6641     if (!parseExpr(DepCtr))
6642       return MatchOperand_ParseFail;
6643   }
6644 
6645   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6646   return MatchOperand_Success;
6647 }
6648 
6649 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6650 
6651 //===----------------------------------------------------------------------===//
6652 // hwreg
6653 //===----------------------------------------------------------------------===//
6654 
6655 bool
6656 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6657                                 OperandInfoTy &Offset,
6658                                 OperandInfoTy &Width) {
6659   using namespace llvm::AMDGPU::Hwreg;
6660 
6661   // The register may be specified by name or using a numeric code
6662   HwReg.Loc = getLoc();
6663   if (isToken(AsmToken::Identifier) &&
6664       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6665     HwReg.IsSymbolic = true;
6666     lex(); // skip register name
6667   } else if (!parseExpr(HwReg.Id, "a register name")) {
6668     return false;
6669   }
6670 
6671   if (trySkipToken(AsmToken::RParen))
6672     return true;
6673 
6674   // parse optional params
6675   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6676     return false;
6677 
6678   Offset.Loc = getLoc();
6679   if (!parseExpr(Offset.Id))
6680     return false;
6681 
6682   if (!skipToken(AsmToken::Comma, "expected a comma"))
6683     return false;
6684 
6685   Width.Loc = getLoc();
6686   return parseExpr(Width.Id) &&
6687          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6688 }
6689 
6690 bool
6691 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6692                                const OperandInfoTy &Offset,
6693                                const OperandInfoTy &Width) {
6694 
6695   using namespace llvm::AMDGPU::Hwreg;
6696 
6697   if (HwReg.IsSymbolic) {
6698     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6699       Error(HwReg.Loc,
6700             "specified hardware register is not supported on this GPU");
6701       return false;
6702     }
6703   } else {
6704     if (!isValidHwreg(HwReg.Id)) {
6705       Error(HwReg.Loc,
6706             "invalid code of hardware register: only 6-bit values are legal");
6707       return false;
6708     }
6709   }
6710   if (!isValidHwregOffset(Offset.Id)) {
6711     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6712     return false;
6713   }
6714   if (!isValidHwregWidth(Width.Id)) {
6715     Error(Width.Loc,
6716           "invalid bitfield width: only values from 1 to 32 are legal");
6717     return false;
6718   }
6719   return true;
6720 }
6721 
6722 OperandMatchResultTy
6723 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6724   using namespace llvm::AMDGPU::Hwreg;
6725 
6726   int64_t ImmVal = 0;
6727   SMLoc Loc = getLoc();
6728 
6729   if (trySkipId("hwreg", AsmToken::LParen)) {
6730     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6731     OperandInfoTy Offset(OFFSET_DEFAULT_);
6732     OperandInfoTy Width(WIDTH_DEFAULT_);
6733     if (parseHwregBody(HwReg, Offset, Width) &&
6734         validateHwreg(HwReg, Offset, Width)) {
6735       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6736     } else {
6737       return MatchOperand_ParseFail;
6738     }
6739   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6740     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6741       Error(Loc, "invalid immediate: only 16-bit values are legal");
6742       return MatchOperand_ParseFail;
6743     }
6744   } else {
6745     return MatchOperand_ParseFail;
6746   }
6747 
6748   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6749   return MatchOperand_Success;
6750 }
6751 
6752 bool AMDGPUOperand::isHwreg() const {
6753   return isImmTy(ImmTyHwreg);
6754 }
6755 
6756 //===----------------------------------------------------------------------===//
6757 // sendmsg
6758 //===----------------------------------------------------------------------===//
6759 
6760 bool
6761 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6762                                   OperandInfoTy &Op,
6763                                   OperandInfoTy &Stream) {
6764   using namespace llvm::AMDGPU::SendMsg;
6765 
6766   Msg.Loc = getLoc();
6767   if (isToken(AsmToken::Identifier) &&
6768       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6769     Msg.IsSymbolic = true;
6770     lex(); // skip message name
6771   } else if (!parseExpr(Msg.Id, "a message name")) {
6772     return false;
6773   }
6774 
6775   if (trySkipToken(AsmToken::Comma)) {
6776     Op.IsDefined = true;
6777     Op.Loc = getLoc();
6778     if (isToken(AsmToken::Identifier) &&
6779         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6780       lex(); // skip operation name
6781     } else if (!parseExpr(Op.Id, "an operation name")) {
6782       return false;
6783     }
6784 
6785     if (trySkipToken(AsmToken::Comma)) {
6786       Stream.IsDefined = true;
6787       Stream.Loc = getLoc();
6788       if (!parseExpr(Stream.Id))
6789         return false;
6790     }
6791   }
6792 
6793   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6794 }
6795 
6796 bool
6797 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6798                                  const OperandInfoTy &Op,
6799                                  const OperandInfoTy &Stream) {
6800   using namespace llvm::AMDGPU::SendMsg;
6801 
6802   // Validation strictness depends on whether message is specified
6803   // in a symbolic or in a numeric form. In the latter case
6804   // only encoding possibility is checked.
6805   bool Strict = Msg.IsSymbolic;
6806 
6807   if (Strict) {
6808     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6809       Error(Msg.Loc, "specified message id is not supported on this GPU");
6810       return false;
6811     }
6812   } else {
6813     if (!isValidMsgId(Msg.Id, getSTI())) {
6814       Error(Msg.Loc, "invalid message id");
6815       return false;
6816     }
6817   }
6818   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6819     if (Op.IsDefined) {
6820       Error(Op.Loc, "message does not support operations");
6821     } else {
6822       Error(Msg.Loc, "missing message operation");
6823     }
6824     return false;
6825   }
6826   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6827     Error(Op.Loc, "invalid operation id");
6828     return false;
6829   }
6830   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6831       Stream.IsDefined) {
6832     Error(Stream.Loc, "message operation does not support streams");
6833     return false;
6834   }
6835   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6836     Error(Stream.Loc, "invalid message stream id");
6837     return false;
6838   }
6839   return true;
6840 }
6841 
6842 OperandMatchResultTy
6843 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6844   using namespace llvm::AMDGPU::SendMsg;
6845 
6846   int64_t ImmVal = 0;
6847   SMLoc Loc = getLoc();
6848 
6849   if (trySkipId("sendmsg", AsmToken::LParen)) {
6850     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6851     OperandInfoTy Op(OP_NONE_);
6852     OperandInfoTy Stream(STREAM_ID_NONE_);
6853     if (parseSendMsgBody(Msg, Op, Stream) &&
6854         validateSendMsg(Msg, Op, Stream)) {
6855       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6856     } else {
6857       return MatchOperand_ParseFail;
6858     }
6859   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6860     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6861       Error(Loc, "invalid immediate: only 16-bit values are legal");
6862       return MatchOperand_ParseFail;
6863     }
6864   } else {
6865     return MatchOperand_ParseFail;
6866   }
6867 
6868   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6869   return MatchOperand_Success;
6870 }
6871 
6872 bool AMDGPUOperand::isSendMsg() const {
6873   return isImmTy(ImmTySendMsg);
6874 }
6875 
6876 //===----------------------------------------------------------------------===//
6877 // v_interp
6878 //===----------------------------------------------------------------------===//
6879 
6880 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6881   StringRef Str;
6882   SMLoc S = getLoc();
6883 
6884   if (!parseId(Str))
6885     return MatchOperand_NoMatch;
6886 
6887   int Slot = StringSwitch<int>(Str)
6888     .Case("p10", 0)
6889     .Case("p20", 1)
6890     .Case("p0", 2)
6891     .Default(-1);
6892 
6893   if (Slot == -1) {
6894     Error(S, "invalid interpolation slot");
6895     return MatchOperand_ParseFail;
6896   }
6897 
6898   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6899                                               AMDGPUOperand::ImmTyInterpSlot));
6900   return MatchOperand_Success;
6901 }
6902 
6903 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6904   StringRef Str;
6905   SMLoc S = getLoc();
6906 
6907   if (!parseId(Str))
6908     return MatchOperand_NoMatch;
6909 
6910   if (!Str.startswith("attr")) {
6911     Error(S, "invalid interpolation attribute");
6912     return MatchOperand_ParseFail;
6913   }
6914 
6915   StringRef Chan = Str.take_back(2);
6916   int AttrChan = StringSwitch<int>(Chan)
6917     .Case(".x", 0)
6918     .Case(".y", 1)
6919     .Case(".z", 2)
6920     .Case(".w", 3)
6921     .Default(-1);
6922   if (AttrChan == -1) {
6923     Error(S, "invalid or missing interpolation attribute channel");
6924     return MatchOperand_ParseFail;
6925   }
6926 
6927   Str = Str.drop_back(2).drop_front(4);
6928 
6929   uint8_t Attr;
6930   if (Str.getAsInteger(10, Attr)) {
6931     Error(S, "invalid or missing interpolation attribute number");
6932     return MatchOperand_ParseFail;
6933   }
6934 
6935   if (Attr > 63) {
6936     Error(S, "out of bounds interpolation attribute number");
6937     return MatchOperand_ParseFail;
6938   }
6939 
6940   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6941 
6942   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6943                                               AMDGPUOperand::ImmTyInterpAttr));
6944   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6945                                               AMDGPUOperand::ImmTyAttrChan));
6946   return MatchOperand_Success;
6947 }
6948 
6949 //===----------------------------------------------------------------------===//
6950 // exp
6951 //===----------------------------------------------------------------------===//
6952 
6953 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6954   using namespace llvm::AMDGPU::Exp;
6955 
6956   StringRef Str;
6957   SMLoc S = getLoc();
6958 
6959   if (!parseId(Str))
6960     return MatchOperand_NoMatch;
6961 
6962   unsigned Id = getTgtId(Str);
6963   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6964     Error(S, (Id == ET_INVALID) ?
6965                 "invalid exp target" :
6966                 "exp target is not supported on this GPU");
6967     return MatchOperand_ParseFail;
6968   }
6969 
6970   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6971                                               AMDGPUOperand::ImmTyExpTgt));
6972   return MatchOperand_Success;
6973 }
6974 
6975 //===----------------------------------------------------------------------===//
6976 // parser helpers
6977 //===----------------------------------------------------------------------===//
6978 
6979 bool
6980 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6981   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6982 }
6983 
6984 bool
6985 AMDGPUAsmParser::isId(const StringRef Id) const {
6986   return isId(getToken(), Id);
6987 }
6988 
6989 bool
6990 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6991   return getTokenKind() == Kind;
6992 }
6993 
6994 bool
6995 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6996   if (isId(Id)) {
6997     lex();
6998     return true;
6999   }
7000   return false;
7001 }
7002 
7003 bool
7004 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7005   if (isToken(AsmToken::Identifier)) {
7006     StringRef Tok = getTokenStr();
7007     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
7008       lex();
7009       return true;
7010     }
7011   }
7012   return false;
7013 }
7014 
7015 bool
7016 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7017   if (isId(Id) && peekToken().is(Kind)) {
7018     lex();
7019     lex();
7020     return true;
7021   }
7022   return false;
7023 }
7024 
7025 bool
7026 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7027   if (isToken(Kind)) {
7028     lex();
7029     return true;
7030   }
7031   return false;
7032 }
7033 
7034 bool
7035 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7036                            const StringRef ErrMsg) {
7037   if (!trySkipToken(Kind)) {
7038     Error(getLoc(), ErrMsg);
7039     return false;
7040   }
7041   return true;
7042 }
7043 
7044 bool
7045 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7046   SMLoc S = getLoc();
7047 
7048   const MCExpr *Expr;
7049   if (Parser.parseExpression(Expr))
7050     return false;
7051 
7052   if (Expr->evaluateAsAbsolute(Imm))
7053     return true;
7054 
7055   if (Expected.empty()) {
7056     Error(S, "expected absolute expression");
7057   } else {
7058     Error(S, Twine("expected ", Expected) +
7059              Twine(" or an absolute expression"));
7060   }
7061   return false;
7062 }
7063 
7064 bool
7065 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7066   SMLoc S = getLoc();
7067 
7068   const MCExpr *Expr;
7069   if (Parser.parseExpression(Expr))
7070     return false;
7071 
7072   int64_t IntVal;
7073   if (Expr->evaluateAsAbsolute(IntVal)) {
7074     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7075   } else {
7076     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7077   }
7078   return true;
7079 }
7080 
7081 bool
7082 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7083   if (isToken(AsmToken::String)) {
7084     Val = getToken().getStringContents();
7085     lex();
7086     return true;
7087   } else {
7088     Error(getLoc(), ErrMsg);
7089     return false;
7090   }
7091 }
7092 
7093 bool
7094 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7095   if (isToken(AsmToken::Identifier)) {
7096     Val = getTokenStr();
7097     lex();
7098     return true;
7099   } else {
7100     if (!ErrMsg.empty())
7101       Error(getLoc(), ErrMsg);
7102     return false;
7103   }
7104 }
7105 
7106 AsmToken
7107 AMDGPUAsmParser::getToken() const {
7108   return Parser.getTok();
7109 }
7110 
7111 AsmToken
7112 AMDGPUAsmParser::peekToken() {
7113   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
7114 }
7115 
7116 void
7117 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7118   auto TokCount = getLexer().peekTokens(Tokens);
7119 
7120   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7121     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7122 }
7123 
7124 AsmToken::TokenKind
7125 AMDGPUAsmParser::getTokenKind() const {
7126   return getLexer().getKind();
7127 }
7128 
7129 SMLoc
7130 AMDGPUAsmParser::getLoc() const {
7131   return getToken().getLoc();
7132 }
7133 
7134 StringRef
7135 AMDGPUAsmParser::getTokenStr() const {
7136   return getToken().getString();
7137 }
7138 
7139 void
7140 AMDGPUAsmParser::lex() {
7141   Parser.Lex();
7142 }
7143 
7144 SMLoc
7145 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7146                                const OperandVector &Operands) const {
7147   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7148     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7149     if (Test(Op))
7150       return Op.getStartLoc();
7151   }
7152   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7153 }
7154 
7155 SMLoc
7156 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7157                            const OperandVector &Operands) const {
7158   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7159   return getOperandLoc(Test, Operands);
7160 }
7161 
7162 SMLoc
7163 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7164                            const OperandVector &Operands) const {
7165   auto Test = [=](const AMDGPUOperand& Op) {
7166     return Op.isRegKind() && Op.getReg() == Reg;
7167   };
7168   return getOperandLoc(Test, Operands);
7169 }
7170 
7171 SMLoc
7172 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
7173   auto Test = [](const AMDGPUOperand& Op) {
7174     return Op.IsImmKindLiteral() || Op.isExpr();
7175   };
7176   return getOperandLoc(Test, Operands);
7177 }
7178 
7179 SMLoc
7180 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7181   auto Test = [](const AMDGPUOperand& Op) {
7182     return Op.isImmKindConst();
7183   };
7184   return getOperandLoc(Test, Operands);
7185 }
7186 
7187 //===----------------------------------------------------------------------===//
7188 // swizzle
7189 //===----------------------------------------------------------------------===//
7190 
7191 LLVM_READNONE
7192 static unsigned
7193 encodeBitmaskPerm(const unsigned AndMask,
7194                   const unsigned OrMask,
7195                   const unsigned XorMask) {
7196   using namespace llvm::AMDGPU::Swizzle;
7197 
7198   return BITMASK_PERM_ENC |
7199          (AndMask << BITMASK_AND_SHIFT) |
7200          (OrMask  << BITMASK_OR_SHIFT)  |
7201          (XorMask << BITMASK_XOR_SHIFT);
7202 }
7203 
7204 bool
7205 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7206                                      const unsigned MinVal,
7207                                      const unsigned MaxVal,
7208                                      const StringRef ErrMsg,
7209                                      SMLoc &Loc) {
7210   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7211     return false;
7212   }
7213   Loc = getLoc();
7214   if (!parseExpr(Op)) {
7215     return false;
7216   }
7217   if (Op < MinVal || Op > MaxVal) {
7218     Error(Loc, ErrMsg);
7219     return false;
7220   }
7221 
7222   return true;
7223 }
7224 
7225 bool
7226 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7227                                       const unsigned MinVal,
7228                                       const unsigned MaxVal,
7229                                       const StringRef ErrMsg) {
7230   SMLoc Loc;
7231   for (unsigned i = 0; i < OpNum; ++i) {
7232     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7233       return false;
7234   }
7235 
7236   return true;
7237 }
7238 
7239 bool
7240 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7241   using namespace llvm::AMDGPU::Swizzle;
7242 
7243   int64_t Lane[LANE_NUM];
7244   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7245                            "expected a 2-bit lane id")) {
7246     Imm = QUAD_PERM_ENC;
7247     for (unsigned I = 0; I < LANE_NUM; ++I) {
7248       Imm |= Lane[I] << (LANE_SHIFT * I);
7249     }
7250     return true;
7251   }
7252   return false;
7253 }
7254 
7255 bool
7256 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7257   using namespace llvm::AMDGPU::Swizzle;
7258 
7259   SMLoc Loc;
7260   int64_t GroupSize;
7261   int64_t LaneIdx;
7262 
7263   if (!parseSwizzleOperand(GroupSize,
7264                            2, 32,
7265                            "group size must be in the interval [2,32]",
7266                            Loc)) {
7267     return false;
7268   }
7269   if (!isPowerOf2_64(GroupSize)) {
7270     Error(Loc, "group size must be a power of two");
7271     return false;
7272   }
7273   if (parseSwizzleOperand(LaneIdx,
7274                           0, GroupSize - 1,
7275                           "lane id must be in the interval [0,group size - 1]",
7276                           Loc)) {
7277     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7278     return true;
7279   }
7280   return false;
7281 }
7282 
7283 bool
7284 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7285   using namespace llvm::AMDGPU::Swizzle;
7286 
7287   SMLoc Loc;
7288   int64_t GroupSize;
7289 
7290   if (!parseSwizzleOperand(GroupSize,
7291                            2, 32,
7292                            "group size must be in the interval [2,32]",
7293                            Loc)) {
7294     return false;
7295   }
7296   if (!isPowerOf2_64(GroupSize)) {
7297     Error(Loc, "group size must be a power of two");
7298     return false;
7299   }
7300 
7301   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7302   return true;
7303 }
7304 
7305 bool
7306 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7307   using namespace llvm::AMDGPU::Swizzle;
7308 
7309   SMLoc Loc;
7310   int64_t GroupSize;
7311 
7312   if (!parseSwizzleOperand(GroupSize,
7313                            1, 16,
7314                            "group size must be in the interval [1,16]",
7315                            Loc)) {
7316     return false;
7317   }
7318   if (!isPowerOf2_64(GroupSize)) {
7319     Error(Loc, "group size must be a power of two");
7320     return false;
7321   }
7322 
7323   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7324   return true;
7325 }
7326 
7327 bool
7328 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7329   using namespace llvm::AMDGPU::Swizzle;
7330 
7331   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7332     return false;
7333   }
7334 
7335   StringRef Ctl;
7336   SMLoc StrLoc = getLoc();
7337   if (!parseString(Ctl)) {
7338     return false;
7339   }
7340   if (Ctl.size() != BITMASK_WIDTH) {
7341     Error(StrLoc, "expected a 5-character mask");
7342     return false;
7343   }
7344 
7345   unsigned AndMask = 0;
7346   unsigned OrMask = 0;
7347   unsigned XorMask = 0;
7348 
7349   for (size_t i = 0; i < Ctl.size(); ++i) {
7350     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7351     switch(Ctl[i]) {
7352     default:
7353       Error(StrLoc, "invalid mask");
7354       return false;
7355     case '0':
7356       break;
7357     case '1':
7358       OrMask |= Mask;
7359       break;
7360     case 'p':
7361       AndMask |= Mask;
7362       break;
7363     case 'i':
7364       AndMask |= Mask;
7365       XorMask |= Mask;
7366       break;
7367     }
7368   }
7369 
7370   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7371   return true;
7372 }
7373 
7374 bool
7375 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7376 
7377   SMLoc OffsetLoc = getLoc();
7378 
7379   if (!parseExpr(Imm, "a swizzle macro")) {
7380     return false;
7381   }
7382   if (!isUInt<16>(Imm)) {
7383     Error(OffsetLoc, "expected a 16-bit offset");
7384     return false;
7385   }
7386   return true;
7387 }
7388 
7389 bool
7390 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7391   using namespace llvm::AMDGPU::Swizzle;
7392 
7393   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7394 
7395     SMLoc ModeLoc = getLoc();
7396     bool Ok = false;
7397 
7398     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7399       Ok = parseSwizzleQuadPerm(Imm);
7400     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7401       Ok = parseSwizzleBitmaskPerm(Imm);
7402     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7403       Ok = parseSwizzleBroadcast(Imm);
7404     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7405       Ok = parseSwizzleSwap(Imm);
7406     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7407       Ok = parseSwizzleReverse(Imm);
7408     } else {
7409       Error(ModeLoc, "expected a swizzle mode");
7410     }
7411 
7412     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7413   }
7414 
7415   return false;
7416 }
7417 
7418 OperandMatchResultTy
7419 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7420   SMLoc S = getLoc();
7421   int64_t Imm = 0;
7422 
7423   if (trySkipId("offset")) {
7424 
7425     bool Ok = false;
7426     if (skipToken(AsmToken::Colon, "expected a colon")) {
7427       if (trySkipId("swizzle")) {
7428         Ok = parseSwizzleMacro(Imm);
7429       } else {
7430         Ok = parseSwizzleOffset(Imm);
7431       }
7432     }
7433 
7434     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7435 
7436     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7437   } else {
7438     // Swizzle "offset" operand is optional.
7439     // If it is omitted, try parsing other optional operands.
7440     return parseOptionalOpr(Operands);
7441   }
7442 }
7443 
7444 bool
7445 AMDGPUOperand::isSwizzle() const {
7446   return isImmTy(ImmTySwizzle);
7447 }
7448 
7449 //===----------------------------------------------------------------------===//
7450 // VGPR Index Mode
7451 //===----------------------------------------------------------------------===//
7452 
7453 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7454 
7455   using namespace llvm::AMDGPU::VGPRIndexMode;
7456 
7457   if (trySkipToken(AsmToken::RParen)) {
7458     return OFF;
7459   }
7460 
7461   int64_t Imm = 0;
7462 
7463   while (true) {
7464     unsigned Mode = 0;
7465     SMLoc S = getLoc();
7466 
7467     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7468       if (trySkipId(IdSymbolic[ModeId])) {
7469         Mode = 1 << ModeId;
7470         break;
7471       }
7472     }
7473 
7474     if (Mode == 0) {
7475       Error(S, (Imm == 0)?
7476                "expected a VGPR index mode or a closing parenthesis" :
7477                "expected a VGPR index mode");
7478       return UNDEF;
7479     }
7480 
7481     if (Imm & Mode) {
7482       Error(S, "duplicate VGPR index mode");
7483       return UNDEF;
7484     }
7485     Imm |= Mode;
7486 
7487     if (trySkipToken(AsmToken::RParen))
7488       break;
7489     if (!skipToken(AsmToken::Comma,
7490                    "expected a comma or a closing parenthesis"))
7491       return UNDEF;
7492   }
7493 
7494   return Imm;
7495 }
7496 
7497 OperandMatchResultTy
7498 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7499 
7500   using namespace llvm::AMDGPU::VGPRIndexMode;
7501 
7502   int64_t Imm = 0;
7503   SMLoc S = getLoc();
7504 
7505   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7506     Imm = parseGPRIdxMacro();
7507     if (Imm == UNDEF)
7508       return MatchOperand_ParseFail;
7509   } else {
7510     if (getParser().parseAbsoluteExpression(Imm))
7511       return MatchOperand_ParseFail;
7512     if (Imm < 0 || !isUInt<4>(Imm)) {
7513       Error(S, "invalid immediate: only 4-bit values are legal");
7514       return MatchOperand_ParseFail;
7515     }
7516   }
7517 
7518   Operands.push_back(
7519       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7520   return MatchOperand_Success;
7521 }
7522 
7523 bool AMDGPUOperand::isGPRIdxMode() const {
7524   return isImmTy(ImmTyGprIdxMode);
7525 }
7526 
7527 //===----------------------------------------------------------------------===//
7528 // sopp branch targets
7529 //===----------------------------------------------------------------------===//
7530 
7531 OperandMatchResultTy
7532 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7533 
7534   // Make sure we are not parsing something
7535   // that looks like a label or an expression but is not.
7536   // This will improve error messages.
7537   if (isRegister() || isModifier())
7538     return MatchOperand_NoMatch;
7539 
7540   if (!parseExpr(Operands))
7541     return MatchOperand_ParseFail;
7542 
7543   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7544   assert(Opr.isImm() || Opr.isExpr());
7545   SMLoc Loc = Opr.getStartLoc();
7546 
7547   // Currently we do not support arbitrary expressions as branch targets.
7548   // Only labels and absolute expressions are accepted.
7549   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7550     Error(Loc, "expected an absolute expression or a label");
7551   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7552     Error(Loc, "expected a 16-bit signed jump offset");
7553   }
7554 
7555   return MatchOperand_Success;
7556 }
7557 
7558 //===----------------------------------------------------------------------===//
7559 // Boolean holding registers
7560 //===----------------------------------------------------------------------===//
7561 
7562 OperandMatchResultTy
7563 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7564   return parseReg(Operands);
7565 }
7566 
7567 //===----------------------------------------------------------------------===//
7568 // mubuf
7569 //===----------------------------------------------------------------------===//
7570 
7571 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7572   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7573 }
7574 
7575 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7576                                    const OperandVector &Operands,
7577                                    bool IsAtomic,
7578                                    bool IsLds) {
7579   OptionalImmIndexMap OptionalIdx;
7580   unsigned FirstOperandIdx = 1;
7581   bool IsAtomicReturn = false;
7582 
7583   if (IsAtomic) {
7584     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7585       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7586       if (!Op.isCPol())
7587         continue;
7588       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7589       break;
7590     }
7591 
7592     if (!IsAtomicReturn) {
7593       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7594       if (NewOpc != -1)
7595         Inst.setOpcode(NewOpc);
7596     }
7597 
7598     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7599                       SIInstrFlags::IsAtomicRet;
7600   }
7601 
7602   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7603     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7604 
7605     // Add the register arguments
7606     if (Op.isReg()) {
7607       Op.addRegOperands(Inst, 1);
7608       // Insert a tied src for atomic return dst.
7609       // This cannot be postponed as subsequent calls to
7610       // addImmOperands rely on correct number of MC operands.
7611       if (IsAtomicReturn && i == FirstOperandIdx)
7612         Op.addRegOperands(Inst, 1);
7613       continue;
7614     }
7615 
7616     // Handle the case where soffset is an immediate
7617     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7618       Op.addImmOperands(Inst, 1);
7619       continue;
7620     }
7621 
7622     // Handle tokens like 'offen' which are sometimes hard-coded into the
7623     // asm string.  There are no MCInst operands for these.
7624     if (Op.isToken()) {
7625       continue;
7626     }
7627     assert(Op.isImm());
7628 
7629     // Handle optional arguments
7630     OptionalIdx[Op.getImmTy()] = i;
7631   }
7632 
7633   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7634   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7635 
7636   if (!IsLds) { // tfe is not legal with lds opcodes
7637     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7638   }
7639   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7640 }
7641 
7642 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7643   OptionalImmIndexMap OptionalIdx;
7644 
7645   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7646     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7647 
7648     // Add the register arguments
7649     if (Op.isReg()) {
7650       Op.addRegOperands(Inst, 1);
7651       continue;
7652     }
7653 
7654     // Handle the case where soffset is an immediate
7655     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7656       Op.addImmOperands(Inst, 1);
7657       continue;
7658     }
7659 
7660     // Handle tokens like 'offen' which are sometimes hard-coded into the
7661     // asm string.  There are no MCInst operands for these.
7662     if (Op.isToken()) {
7663       continue;
7664     }
7665     assert(Op.isImm());
7666 
7667     // Handle optional arguments
7668     OptionalIdx[Op.getImmTy()] = i;
7669   }
7670 
7671   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7672                         AMDGPUOperand::ImmTyOffset);
7673   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7674   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7675   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7676   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7677 }
7678 
7679 //===----------------------------------------------------------------------===//
7680 // mimg
7681 //===----------------------------------------------------------------------===//
7682 
7683 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7684                               bool IsAtomic) {
7685   unsigned I = 1;
7686   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7687   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7688     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7689   }
7690 
7691   if (IsAtomic) {
7692     // Add src, same as dst
7693     assert(Desc.getNumDefs() == 1);
7694     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7695   }
7696 
7697   OptionalImmIndexMap OptionalIdx;
7698 
7699   for (unsigned E = Operands.size(); I != E; ++I) {
7700     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7701 
7702     // Add the register arguments
7703     if (Op.isReg()) {
7704       Op.addRegOperands(Inst, 1);
7705     } else if (Op.isImmModifier()) {
7706       OptionalIdx[Op.getImmTy()] = I;
7707     } else if (!Op.isToken()) {
7708       llvm_unreachable("unexpected operand type");
7709     }
7710   }
7711 
7712   bool IsGFX10Plus = isGFX10Plus();
7713 
7714   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7715   if (IsGFX10Plus)
7716     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7717   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7718   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7719   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7720   if (IsGFX10Plus)
7721     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7722   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7723     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7724   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7725   if (!IsGFX10Plus)
7726     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7727   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7728 }
7729 
7730 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7731   cvtMIMG(Inst, Operands, true);
7732 }
7733 
7734 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7735   OptionalImmIndexMap OptionalIdx;
7736   bool IsAtomicReturn = false;
7737 
7738   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7739     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7740     if (!Op.isCPol())
7741       continue;
7742     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7743     break;
7744   }
7745 
7746   if (!IsAtomicReturn) {
7747     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7748     if (NewOpc != -1)
7749       Inst.setOpcode(NewOpc);
7750   }
7751 
7752   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7753                     SIInstrFlags::IsAtomicRet;
7754 
7755   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7756     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7757 
7758     // Add the register arguments
7759     if (Op.isReg()) {
7760       Op.addRegOperands(Inst, 1);
7761       if (IsAtomicReturn && i == 1)
7762         Op.addRegOperands(Inst, 1);
7763       continue;
7764     }
7765 
7766     // Handle the case where soffset is an immediate
7767     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7768       Op.addImmOperands(Inst, 1);
7769       continue;
7770     }
7771 
7772     // Handle tokens like 'offen' which are sometimes hard-coded into the
7773     // asm string.  There are no MCInst operands for these.
7774     if (Op.isToken()) {
7775       continue;
7776     }
7777     assert(Op.isImm());
7778 
7779     // Handle optional arguments
7780     OptionalIdx[Op.getImmTy()] = i;
7781   }
7782 
7783   if ((int)Inst.getNumOperands() <=
7784       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7785     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7786   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7787 }
7788 
7789 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7790                                       const OperandVector &Operands) {
7791   for (unsigned I = 1; I < Operands.size(); ++I) {
7792     auto &Operand = (AMDGPUOperand &)*Operands[I];
7793     if (Operand.isReg())
7794       Operand.addRegOperands(Inst, 1);
7795   }
7796 
7797   Inst.addOperand(MCOperand::createImm(1)); // a16
7798 }
7799 
7800 //===----------------------------------------------------------------------===//
7801 // smrd
7802 //===----------------------------------------------------------------------===//
7803 
7804 bool AMDGPUOperand::isSMRDOffset8() const {
7805   return isImm() && isUInt<8>(getImm());
7806 }
7807 
7808 bool AMDGPUOperand::isSMEMOffset() const {
7809   return isImmTy(ImmTyNone) ||
7810          isImmTy(ImmTyOffset); // Offset range is checked later by validator.
7811 }
7812 
7813 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7814   // 32-bit literals are only supported on CI and we only want to use them
7815   // when the offset is > 8-bits.
7816   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7817 }
7818 
7819 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7820   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7821 }
7822 
7823 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7824   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7825 }
7826 
7827 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7828   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7829 }
7830 
7831 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7832   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7833 }
7834 
7835 //===----------------------------------------------------------------------===//
7836 // vop3
7837 //===----------------------------------------------------------------------===//
7838 
7839 static bool ConvertOmodMul(int64_t &Mul) {
7840   if (Mul != 1 && Mul != 2 && Mul != 4)
7841     return false;
7842 
7843   Mul >>= 1;
7844   return true;
7845 }
7846 
7847 static bool ConvertOmodDiv(int64_t &Div) {
7848   if (Div == 1) {
7849     Div = 0;
7850     return true;
7851   }
7852 
7853   if (Div == 2) {
7854     Div = 3;
7855     return true;
7856   }
7857 
7858   return false;
7859 }
7860 
7861 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7862 // This is intentional and ensures compatibility with sp3.
7863 // See bug 35397 for details.
7864 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7865   if (BoundCtrl == 0 || BoundCtrl == 1) {
7866     BoundCtrl = 1;
7867     return true;
7868   }
7869   return false;
7870 }
7871 
7872 // Note: the order in this table matches the order of operands in AsmString.
7873 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7874   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7875   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7876   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7877   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7878   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7879   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7880   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7881   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7882   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7883   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7884   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7885   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7886   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7887   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7888   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7889   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7890   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7891   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7892   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7893   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7894   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7895   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7896   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7897   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7898   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7899   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7900   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7901   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7902   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7903   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7904   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7905   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7906   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7907   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7908   {"dpp8",     AMDGPUOperand::ImmTyDPP8, false, nullptr},
7909   {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr},
7910   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7911   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7912   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7913   {"fi",   AMDGPUOperand::ImmTyDppFi, false, nullptr},
7914   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7915   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7916   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr},
7917   {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr},
7918   {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr}
7919 };
7920 
7921 void AMDGPUAsmParser::onBeginOfFile() {
7922   if (!getParser().getStreamer().getTargetStreamer() ||
7923       getSTI().getTargetTriple().getArch() == Triple::r600)
7924     return;
7925 
7926   if (!getTargetStreamer().getTargetID())
7927     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7928 
7929   if (isHsaAbiVersion3AndAbove(&getSTI()))
7930     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7931 }
7932 
7933 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7934 
7935   OperandMatchResultTy res = parseOptionalOpr(Operands);
7936 
7937   // This is a hack to enable hardcoded mandatory operands which follow
7938   // optional operands.
7939   //
7940   // Current design assumes that all operands after the first optional operand
7941   // are also optional. However implementation of some instructions violates
7942   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7943   //
7944   // To alleviate this problem, we have to (implicitly) parse extra operands
7945   // to make sure autogenerated parser of custom operands never hit hardcoded
7946   // mandatory operands.
7947 
7948   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7949     if (res != MatchOperand_Success ||
7950         isToken(AsmToken::EndOfStatement))
7951       break;
7952 
7953     trySkipToken(AsmToken::Comma);
7954     res = parseOptionalOpr(Operands);
7955   }
7956 
7957   return res;
7958 }
7959 
7960 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7961   OperandMatchResultTy res;
7962   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7963     // try to parse any optional operand here
7964     if (Op.IsBit) {
7965       res = parseNamedBit(Op.Name, Operands, Op.Type);
7966     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7967       res = parseOModOperand(Operands);
7968     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7969                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7970                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7971       res = parseSDWASel(Operands, Op.Name, Op.Type);
7972     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7973       res = parseSDWADstUnused(Operands);
7974     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7975                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7976                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7977                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7978       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7979                                         Op.ConvertResult);
7980     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7981       res = parseDim(Operands);
7982     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7983       res = parseCPol(Operands);
7984     } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) {
7985       res = parseDPP8(Operands);
7986     } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) {
7987       res = parseDPPCtrl(Operands);
7988     } else {
7989       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7990       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
7991         res = parseOperandArrayWithPrefix("neg", Operands,
7992                                           AMDGPUOperand::ImmTyBLGP,
7993                                           nullptr);
7994       }
7995     }
7996     if (res != MatchOperand_NoMatch) {
7997       return res;
7998     }
7999   }
8000   return MatchOperand_NoMatch;
8001 }
8002 
8003 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
8004   StringRef Name = getTokenStr();
8005   if (Name == "mul") {
8006     return parseIntWithPrefix("mul", Operands,
8007                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8008   }
8009 
8010   if (Name == "div") {
8011     return parseIntWithPrefix("div", Operands,
8012                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8013   }
8014 
8015   return MatchOperand_NoMatch;
8016 }
8017 
8018 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
8019   cvtVOP3P(Inst, Operands);
8020 
8021   int Opc = Inst.getOpcode();
8022 
8023   int SrcNum;
8024   const int Ops[] = { AMDGPU::OpName::src0,
8025                       AMDGPU::OpName::src1,
8026                       AMDGPU::OpName::src2 };
8027   for (SrcNum = 0;
8028        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
8029        ++SrcNum);
8030   assert(SrcNum > 0);
8031 
8032   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8033   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8034 
8035   if ((OpSel & (1 << SrcNum)) != 0) {
8036     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8037     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8038     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
8039   }
8040 }
8041 
8042 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8043       // 1. This operand is input modifiers
8044   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8045       // 2. This is not last operand
8046       && Desc.NumOperands > (OpNum + 1)
8047       // 3. Next operand is register class
8048       && Desc.OpInfo[OpNum + 1].RegClass != -1
8049       // 4. Next register is not tied to any other operand
8050       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
8051 }
8052 
8053 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8054 {
8055   OptionalImmIndexMap OptionalIdx;
8056   unsigned Opc = Inst.getOpcode();
8057 
8058   unsigned I = 1;
8059   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8060   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8061     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8062   }
8063 
8064   for (unsigned E = Operands.size(); I != E; ++I) {
8065     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8066     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8067       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8068     } else if (Op.isInterpSlot() ||
8069                Op.isInterpAttr() ||
8070                Op.isAttrChan()) {
8071       Inst.addOperand(MCOperand::createImm(Op.getImm()));
8072     } else if (Op.isImmModifier()) {
8073       OptionalIdx[Op.getImmTy()] = I;
8074     } else {
8075       llvm_unreachable("unhandled operand type");
8076     }
8077   }
8078 
8079   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
8080     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
8081   }
8082 
8083   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8084     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8085   }
8086 
8087   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8088     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8089   }
8090 }
8091 
8092 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8093 {
8094   OptionalImmIndexMap OptionalIdx;
8095   unsigned Opc = Inst.getOpcode();
8096 
8097   unsigned I = 1;
8098   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8099   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8100     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8101   }
8102 
8103   for (unsigned E = Operands.size(); I != E; ++I) {
8104     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8105     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8106       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8107     } else if (Op.isImmModifier()) {
8108       OptionalIdx[Op.getImmTy()] = I;
8109     } else {
8110       llvm_unreachable("unhandled operand type");
8111     }
8112   }
8113 
8114   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8115 
8116   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8117   if (OpSelIdx != -1)
8118     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8119 
8120   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8121 
8122   if (OpSelIdx == -1)
8123     return;
8124 
8125   const int Ops[] = { AMDGPU::OpName::src0,
8126                       AMDGPU::OpName::src1,
8127                       AMDGPU::OpName::src2 };
8128   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8129                          AMDGPU::OpName::src1_modifiers,
8130                          AMDGPU::OpName::src2_modifiers };
8131 
8132   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8133 
8134   for (int J = 0; J < 3; ++J) {
8135     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8136     if (OpIdx == -1)
8137       break;
8138 
8139     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8140     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8141 
8142     if ((OpSel & (1 << J)) != 0)
8143       ModVal |= SISrcMods::OP_SEL_0;
8144     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8145         (OpSel & (1 << 3)) != 0)
8146       ModVal |= SISrcMods::DST_OP_SEL;
8147 
8148     Inst.getOperand(ModIdx).setImm(ModVal);
8149   }
8150 }
8151 
8152 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8153                               OptionalImmIndexMap &OptionalIdx) {
8154   unsigned Opc = Inst.getOpcode();
8155 
8156   unsigned I = 1;
8157   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8158   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8159     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8160   }
8161 
8162   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
8163     // This instruction has src modifiers
8164     for (unsigned E = Operands.size(); I != E; ++I) {
8165       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8166       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8167         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8168       } else if (Op.isImmModifier()) {
8169         OptionalIdx[Op.getImmTy()] = I;
8170       } else if (Op.isRegOrImm()) {
8171         Op.addRegOrImmOperands(Inst, 1);
8172       } else {
8173         llvm_unreachable("unhandled operand type");
8174       }
8175     }
8176   } else {
8177     // No src modifiers
8178     for (unsigned E = Operands.size(); I != E; ++I) {
8179       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8180       if (Op.isMod()) {
8181         OptionalIdx[Op.getImmTy()] = I;
8182       } else {
8183         Op.addRegOrImmOperands(Inst, 1);
8184       }
8185     }
8186   }
8187 
8188   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8189     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8190   }
8191 
8192   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8193     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8194   }
8195 
8196   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8197   // it has src2 register operand that is tied to dst operand
8198   // we don't allow modifiers for this operand in assembler so src2_modifiers
8199   // should be 0.
8200   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
8201       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
8202       Opc == AMDGPU::V_MAC_F32_e64_vi ||
8203       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
8204       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
8205       Opc == AMDGPU::V_MAC_F16_e64_vi ||
8206       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
8207       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
8208       Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
8209       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
8210       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
8211       Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
8212       Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
8213       Opc == AMDGPU::V_FMAC_F16_e64_gfx11) {
8214     auto it = Inst.begin();
8215     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8216     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8217     ++it;
8218     // Copy the operand to ensure it's not invalidated when Inst grows.
8219     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8220   }
8221 }
8222 
8223 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8224   OptionalImmIndexMap OptionalIdx;
8225   cvtVOP3(Inst, Operands, OptionalIdx);
8226 }
8227 
8228 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8229                                OptionalImmIndexMap &OptIdx) {
8230   const int Opc = Inst.getOpcode();
8231   const MCInstrDesc &Desc = MII.get(Opc);
8232 
8233   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8234 
8235   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
8236     assert(!IsPacked);
8237     Inst.addOperand(Inst.getOperand(0));
8238   }
8239 
8240   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8241   // instruction, and then figure out where to actually put the modifiers
8242 
8243   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8244   if (OpSelIdx != -1) {
8245     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8246   }
8247 
8248   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8249   if (OpSelHiIdx != -1) {
8250     int DefaultVal = IsPacked ? -1 : 0;
8251     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8252                           DefaultVal);
8253   }
8254 
8255   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8256   if (NegLoIdx != -1) {
8257     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8258     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8259   }
8260 
8261   const int Ops[] = { AMDGPU::OpName::src0,
8262                       AMDGPU::OpName::src1,
8263                       AMDGPU::OpName::src2 };
8264   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8265                          AMDGPU::OpName::src1_modifiers,
8266                          AMDGPU::OpName::src2_modifiers };
8267 
8268   unsigned OpSel = 0;
8269   unsigned OpSelHi = 0;
8270   unsigned NegLo = 0;
8271   unsigned NegHi = 0;
8272 
8273   if (OpSelIdx != -1)
8274     OpSel = Inst.getOperand(OpSelIdx).getImm();
8275 
8276   if (OpSelHiIdx != -1)
8277     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8278 
8279   if (NegLoIdx != -1) {
8280     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8281     NegLo = Inst.getOperand(NegLoIdx).getImm();
8282     NegHi = Inst.getOperand(NegHiIdx).getImm();
8283   }
8284 
8285   for (int J = 0; J < 3; ++J) {
8286     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8287     if (OpIdx == -1)
8288       break;
8289 
8290     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8291 
8292     if (ModIdx == -1)
8293       continue;
8294 
8295     uint32_t ModVal = 0;
8296 
8297     if ((OpSel & (1 << J)) != 0)
8298       ModVal |= SISrcMods::OP_SEL_0;
8299 
8300     if ((OpSelHi & (1 << J)) != 0)
8301       ModVal |= SISrcMods::OP_SEL_1;
8302 
8303     if ((NegLo & (1 << J)) != 0)
8304       ModVal |= SISrcMods::NEG;
8305 
8306     if ((NegHi & (1 << J)) != 0)
8307       ModVal |= SISrcMods::NEG_HI;
8308 
8309     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8310   }
8311 }
8312 
8313 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8314   OptionalImmIndexMap OptIdx;
8315   cvtVOP3(Inst, Operands, OptIdx);
8316   cvtVOP3P(Inst, Operands, OptIdx);
8317 }
8318 
8319 //===----------------------------------------------------------------------===//
8320 // dpp
8321 //===----------------------------------------------------------------------===//
8322 
8323 bool AMDGPUOperand::isDPP8() const {
8324   return isImmTy(ImmTyDPP8);
8325 }
8326 
8327 bool AMDGPUOperand::isDPPCtrl() const {
8328   using namespace AMDGPU::DPP;
8329 
8330   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8331   if (result) {
8332     int64_t Imm = getImm();
8333     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8334            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8335            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8336            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8337            (Imm == DppCtrl::WAVE_SHL1) ||
8338            (Imm == DppCtrl::WAVE_ROL1) ||
8339            (Imm == DppCtrl::WAVE_SHR1) ||
8340            (Imm == DppCtrl::WAVE_ROR1) ||
8341            (Imm == DppCtrl::ROW_MIRROR) ||
8342            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8343            (Imm == DppCtrl::BCAST15) ||
8344            (Imm == DppCtrl::BCAST31) ||
8345            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8346            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8347   }
8348   return false;
8349 }
8350 
8351 //===----------------------------------------------------------------------===//
8352 // mAI
8353 //===----------------------------------------------------------------------===//
8354 
8355 bool AMDGPUOperand::isBLGP() const {
8356   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8357 }
8358 
8359 bool AMDGPUOperand::isCBSZ() const {
8360   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8361 }
8362 
8363 bool AMDGPUOperand::isABID() const {
8364   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8365 }
8366 
8367 bool AMDGPUOperand::isS16Imm() const {
8368   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8369 }
8370 
8371 bool AMDGPUOperand::isU16Imm() const {
8372   return isImm() && isUInt<16>(getImm());
8373 }
8374 
8375 //===----------------------------------------------------------------------===//
8376 // dim
8377 //===----------------------------------------------------------------------===//
8378 
8379 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8380   // We want to allow "dim:1D" etc.,
8381   // but the initial 1 is tokenized as an integer.
8382   std::string Token;
8383   if (isToken(AsmToken::Integer)) {
8384     SMLoc Loc = getToken().getEndLoc();
8385     Token = std::string(getTokenStr());
8386     lex();
8387     if (getLoc() != Loc)
8388       return false;
8389   }
8390 
8391   StringRef Suffix;
8392   if (!parseId(Suffix))
8393     return false;
8394   Token += Suffix;
8395 
8396   StringRef DimId = Token;
8397   if (DimId.startswith("SQ_RSRC_IMG_"))
8398     DimId = DimId.drop_front(12);
8399 
8400   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8401   if (!DimInfo)
8402     return false;
8403 
8404   Encoding = DimInfo->Encoding;
8405   return true;
8406 }
8407 
8408 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8409   if (!isGFX10Plus())
8410     return MatchOperand_NoMatch;
8411 
8412   SMLoc S = getLoc();
8413 
8414   if (!trySkipId("dim", AsmToken::Colon))
8415     return MatchOperand_NoMatch;
8416 
8417   unsigned Encoding;
8418   SMLoc Loc = getLoc();
8419   if (!parseDimId(Encoding)) {
8420     Error(Loc, "invalid dim value");
8421     return MatchOperand_ParseFail;
8422   }
8423 
8424   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8425                                               AMDGPUOperand::ImmTyDim));
8426   return MatchOperand_Success;
8427 }
8428 
8429 //===----------------------------------------------------------------------===//
8430 // dpp
8431 //===----------------------------------------------------------------------===//
8432 
8433 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8434   SMLoc S = getLoc();
8435 
8436   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8437     return MatchOperand_NoMatch;
8438 
8439   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8440 
8441   int64_t Sels[8];
8442 
8443   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8444     return MatchOperand_ParseFail;
8445 
8446   for (size_t i = 0; i < 8; ++i) {
8447     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8448       return MatchOperand_ParseFail;
8449 
8450     SMLoc Loc = getLoc();
8451     if (getParser().parseAbsoluteExpression(Sels[i]))
8452       return MatchOperand_ParseFail;
8453     if (0 > Sels[i] || 7 < Sels[i]) {
8454       Error(Loc, "expected a 3-bit value");
8455       return MatchOperand_ParseFail;
8456     }
8457   }
8458 
8459   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8460     return MatchOperand_ParseFail;
8461 
8462   unsigned DPP8 = 0;
8463   for (size_t i = 0; i < 8; ++i)
8464     DPP8 |= (Sels[i] << (i * 3));
8465 
8466   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8467   return MatchOperand_Success;
8468 }
8469 
8470 bool
8471 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8472                                     const OperandVector &Operands) {
8473   if (Ctrl == "row_newbcast")
8474     return isGFX90A();
8475 
8476   if (Ctrl == "row_share" ||
8477       Ctrl == "row_xmask")
8478     return isGFX10Plus();
8479 
8480   if (Ctrl == "wave_shl" ||
8481       Ctrl == "wave_shr" ||
8482       Ctrl == "wave_rol" ||
8483       Ctrl == "wave_ror" ||
8484       Ctrl == "row_bcast")
8485     return isVI() || isGFX9();
8486 
8487   return Ctrl == "row_mirror" ||
8488          Ctrl == "row_half_mirror" ||
8489          Ctrl == "quad_perm" ||
8490          Ctrl == "row_shl" ||
8491          Ctrl == "row_shr" ||
8492          Ctrl == "row_ror";
8493 }
8494 
8495 int64_t
8496 AMDGPUAsmParser::parseDPPCtrlPerm() {
8497   // quad_perm:[%d,%d,%d,%d]
8498 
8499   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8500     return -1;
8501 
8502   int64_t Val = 0;
8503   for (int i = 0; i < 4; ++i) {
8504     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8505       return -1;
8506 
8507     int64_t Temp;
8508     SMLoc Loc = getLoc();
8509     if (getParser().parseAbsoluteExpression(Temp))
8510       return -1;
8511     if (Temp < 0 || Temp > 3) {
8512       Error(Loc, "expected a 2-bit value");
8513       return -1;
8514     }
8515 
8516     Val += (Temp << i * 2);
8517   }
8518 
8519   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8520     return -1;
8521 
8522   return Val;
8523 }
8524 
8525 int64_t
8526 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8527   using namespace AMDGPU::DPP;
8528 
8529   // sel:%d
8530 
8531   int64_t Val;
8532   SMLoc Loc = getLoc();
8533 
8534   if (getParser().parseAbsoluteExpression(Val))
8535     return -1;
8536 
8537   struct DppCtrlCheck {
8538     int64_t Ctrl;
8539     int Lo;
8540     int Hi;
8541   };
8542 
8543   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8544     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8545     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8546     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8547     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8548     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8549     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8550     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8551     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8552     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8553     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8554     .Default({-1, 0, 0});
8555 
8556   bool Valid;
8557   if (Check.Ctrl == -1) {
8558     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8559     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8560   } else {
8561     Valid = Check.Lo <= Val && Val <= Check.Hi;
8562     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8563   }
8564 
8565   if (!Valid) {
8566     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8567     return -1;
8568   }
8569 
8570   return Val;
8571 }
8572 
8573 OperandMatchResultTy
8574 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8575   using namespace AMDGPU::DPP;
8576 
8577   if (!isToken(AsmToken::Identifier) ||
8578       !isSupportedDPPCtrl(getTokenStr(), Operands))
8579     return MatchOperand_NoMatch;
8580 
8581   SMLoc S = getLoc();
8582   int64_t Val = -1;
8583   StringRef Ctrl;
8584 
8585   parseId(Ctrl);
8586 
8587   if (Ctrl == "row_mirror") {
8588     Val = DppCtrl::ROW_MIRROR;
8589   } else if (Ctrl == "row_half_mirror") {
8590     Val = DppCtrl::ROW_HALF_MIRROR;
8591   } else {
8592     if (skipToken(AsmToken::Colon, "expected a colon")) {
8593       if (Ctrl == "quad_perm") {
8594         Val = parseDPPCtrlPerm();
8595       } else {
8596         Val = parseDPPCtrlSel(Ctrl);
8597       }
8598     }
8599   }
8600 
8601   if (Val == -1)
8602     return MatchOperand_ParseFail;
8603 
8604   Operands.push_back(
8605     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8606   return MatchOperand_Success;
8607 }
8608 
8609 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8610   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8611 }
8612 
8613 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8614   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8615 }
8616 
8617 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8618   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8619 }
8620 
8621 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8622   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8623 }
8624 
8625 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8626   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8627 }
8628 
8629 // Add dummy $old operand
8630 void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst,
8631                                         const OperandVector &Operands,
8632                                         bool IsDPP8) {
8633   Inst.addOperand(MCOperand::createReg(0));
8634   cvtVOP3DPP(Inst, Operands, IsDPP8);
8635 }
8636 
8637 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8638   OptionalImmIndexMap OptionalIdx;
8639   unsigned Opc = Inst.getOpcode();
8640   bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8641   unsigned I = 1;
8642   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8643   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8644     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8645   }
8646 
8647   int Fi = 0;
8648   for (unsigned E = Operands.size(); I != E; ++I) {
8649     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8650                                             MCOI::TIED_TO);
8651     if (TiedTo != -1) {
8652       assert((unsigned)TiedTo < Inst.getNumOperands());
8653       // handle tied old or src2 for MAC instructions
8654       Inst.addOperand(Inst.getOperand(TiedTo));
8655     }
8656     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8657     // Add the register arguments
8658     if (IsDPP8 && Op.isFI()) {
8659       Fi = Op.getImm();
8660     } else if (HasModifiers &&
8661                isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8662       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8663     } else if (Op.isReg()) {
8664       Op.addRegOperands(Inst, 1);
8665     } else if (Op.isImm() &&
8666                Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) {
8667       assert(!HasModifiers && "Case should be unreachable with modifiers");
8668       assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
8669       Op.addImmOperands(Inst, 1);
8670     } else if (Op.isImm()) {
8671       OptionalIdx[Op.getImmTy()] = I;
8672     } else {
8673       llvm_unreachable("unhandled operand type");
8674     }
8675   }
8676   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8677     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8678   }
8679   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8680     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8681   }
8682   if (Desc.TSFlags & SIInstrFlags::VOP3P)
8683     cvtVOP3P(Inst, Operands, OptionalIdx);
8684   else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) {
8685     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8686   }
8687 
8688   if (IsDPP8) {
8689     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
8690     using namespace llvm::AMDGPU::DPP;
8691     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8692   } else {
8693     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
8694     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8695     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8696     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8697     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8698       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8699     }
8700   }
8701 }
8702 
8703 // Add dummy $old operand
8704 void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst,
8705                                       const OperandVector &Operands,
8706                                       bool IsDPP8) {
8707   Inst.addOperand(MCOperand::createReg(0));
8708   cvtDPP(Inst, Operands, IsDPP8);
8709 }
8710 
8711 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8712   OptionalImmIndexMap OptionalIdx;
8713 
8714   unsigned Opc = Inst.getOpcode();
8715   bool HasModifiers =
8716       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8717   unsigned I = 1;
8718   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8719   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8720     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8721   }
8722 
8723   int Fi = 0;
8724   for (unsigned E = Operands.size(); I != E; ++I) {
8725     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8726                                             MCOI::TIED_TO);
8727     if (TiedTo != -1) {
8728       assert((unsigned)TiedTo < Inst.getNumOperands());
8729       // handle tied old or src2 for MAC instructions
8730       Inst.addOperand(Inst.getOperand(TiedTo));
8731     }
8732     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8733     // Add the register arguments
8734     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8735       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8736       // Skip it.
8737       continue;
8738     }
8739 
8740     if (IsDPP8) {
8741       if (Op.isDPP8()) {
8742         Op.addImmOperands(Inst, 1);
8743       } else if (HasModifiers &&
8744                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8745         Op.addRegWithFPInputModsOperands(Inst, 2);
8746       } else if (Op.isFI()) {
8747         Fi = Op.getImm();
8748       } else if (Op.isReg()) {
8749         Op.addRegOperands(Inst, 1);
8750       } else {
8751         llvm_unreachable("Invalid operand type");
8752       }
8753     } else {
8754       if (HasModifiers &&
8755           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8756         Op.addRegWithFPInputModsOperands(Inst, 2);
8757       } else if (Op.isReg()) {
8758         Op.addRegOperands(Inst, 1);
8759       } else if (Op.isDPPCtrl()) {
8760         Op.addImmOperands(Inst, 1);
8761       } else if (Op.isImm()) {
8762         // Handle optional arguments
8763         OptionalIdx[Op.getImmTy()] = I;
8764       } else {
8765         llvm_unreachable("Invalid operand type");
8766       }
8767     }
8768   }
8769 
8770   if (IsDPP8) {
8771     using namespace llvm::AMDGPU::DPP;
8772     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8773   } else {
8774     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8775     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8776     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8777     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8778       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8779     }
8780   }
8781 }
8782 
8783 //===----------------------------------------------------------------------===//
8784 // sdwa
8785 //===----------------------------------------------------------------------===//
8786 
8787 OperandMatchResultTy
8788 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8789                               AMDGPUOperand::ImmTy Type) {
8790   using namespace llvm::AMDGPU::SDWA;
8791 
8792   SMLoc S = getLoc();
8793   StringRef Value;
8794   OperandMatchResultTy res;
8795 
8796   SMLoc StringLoc;
8797   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8798   if (res != MatchOperand_Success) {
8799     return res;
8800   }
8801 
8802   int64_t Int;
8803   Int = StringSwitch<int64_t>(Value)
8804         .Case("BYTE_0", SdwaSel::BYTE_0)
8805         .Case("BYTE_1", SdwaSel::BYTE_1)
8806         .Case("BYTE_2", SdwaSel::BYTE_2)
8807         .Case("BYTE_3", SdwaSel::BYTE_3)
8808         .Case("WORD_0", SdwaSel::WORD_0)
8809         .Case("WORD_1", SdwaSel::WORD_1)
8810         .Case("DWORD", SdwaSel::DWORD)
8811         .Default(0xffffffff);
8812 
8813   if (Int == 0xffffffff) {
8814     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8815     return MatchOperand_ParseFail;
8816   }
8817 
8818   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8819   return MatchOperand_Success;
8820 }
8821 
8822 OperandMatchResultTy
8823 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8824   using namespace llvm::AMDGPU::SDWA;
8825 
8826   SMLoc S = getLoc();
8827   StringRef Value;
8828   OperandMatchResultTy res;
8829 
8830   SMLoc StringLoc;
8831   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8832   if (res != MatchOperand_Success) {
8833     return res;
8834   }
8835 
8836   int64_t Int;
8837   Int = StringSwitch<int64_t>(Value)
8838         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8839         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8840         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8841         .Default(0xffffffff);
8842 
8843   if (Int == 0xffffffff) {
8844     Error(StringLoc, "invalid dst_unused value");
8845     return MatchOperand_ParseFail;
8846   }
8847 
8848   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8849   return MatchOperand_Success;
8850 }
8851 
8852 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8853   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8854 }
8855 
8856 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8857   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8858 }
8859 
8860 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8861   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8862 }
8863 
8864 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8865   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8866 }
8867 
8868 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8869   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8870 }
8871 
8872 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8873                               uint64_t BasicInstType,
8874                               bool SkipDstVcc,
8875                               bool SkipSrcVcc) {
8876   using namespace llvm::AMDGPU::SDWA;
8877 
8878   OptionalImmIndexMap OptionalIdx;
8879   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8880   bool SkippedVcc = false;
8881 
8882   unsigned I = 1;
8883   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8884   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8885     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8886   }
8887 
8888   for (unsigned E = Operands.size(); I != E; ++I) {
8889     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8890     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8891         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8892       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8893       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8894       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8895       // Skip VCC only if we didn't skip it on previous iteration.
8896       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8897       if (BasicInstType == SIInstrFlags::VOP2 &&
8898           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8899            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8900         SkippedVcc = true;
8901         continue;
8902       } else if (BasicInstType == SIInstrFlags::VOPC &&
8903                  Inst.getNumOperands() == 0) {
8904         SkippedVcc = true;
8905         continue;
8906       }
8907     }
8908     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8909       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8910     } else if (Op.isImm()) {
8911       // Handle optional arguments
8912       OptionalIdx[Op.getImmTy()] = I;
8913     } else {
8914       llvm_unreachable("Invalid operand type");
8915     }
8916     SkippedVcc = false;
8917   }
8918 
8919   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8920       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8921       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8922     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8923     switch (BasicInstType) {
8924     case SIInstrFlags::VOP1:
8925       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8926       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8927         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8928       }
8929       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8930       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8931       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8932       break;
8933 
8934     case SIInstrFlags::VOP2:
8935       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8936       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8937         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8938       }
8939       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8940       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8941       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8942       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8943       break;
8944 
8945     case SIInstrFlags::VOPC:
8946       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8947         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8948       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8949       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8950       break;
8951 
8952     default:
8953       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8954     }
8955   }
8956 
8957   // special case v_mac_{f16, f32}:
8958   // it has src2 register operand that is tied to dst operand
8959   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8960       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8961     auto it = Inst.begin();
8962     std::advance(
8963       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8964     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8965   }
8966 }
8967 
8968 //===----------------------------------------------------------------------===//
8969 // mAI
8970 //===----------------------------------------------------------------------===//
8971 
8972 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8973   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8974 }
8975 
8976 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8977   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8978 }
8979 
8980 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8981   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8982 }
8983 
8984 /// Force static initialization.
8985 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8986   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8987   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8988 }
8989 
8990 #define GET_REGISTER_MATCHER
8991 #define GET_MATCHER_IMPLEMENTATION
8992 #define GET_MNEMONIC_SPELL_CHECKER
8993 #define GET_MNEMONIC_CHECKER
8994 #include "AMDGPUGenAsmMatcher.inc"
8995 
8996 // This function should be defined after auto-generated include so that we have
8997 // MatchClassKind enum defined
8998 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8999                                                      unsigned Kind) {
9000   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9001   // But MatchInstructionImpl() expects to meet token and fails to validate
9002   // operand. This method checks if we are given immediate operand but expect to
9003   // get corresponding token.
9004   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9005   switch (Kind) {
9006   case MCK_addr64:
9007     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9008   case MCK_gds:
9009     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9010   case MCK_lds:
9011     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9012   case MCK_idxen:
9013     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9014   case MCK_offen:
9015     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9016   case MCK_SSrcB32:
9017     // When operands have expression values, they will return true for isToken,
9018     // because it is not possible to distinguish between a token and an
9019     // expression at parse time. MatchInstructionImpl() will always try to
9020     // match an operand as a token, when isToken returns true, and when the
9021     // name of the expression is not a valid token, the match will fail,
9022     // so we need to handle it here.
9023     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
9024   case MCK_SSrcF32:
9025     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
9026   case MCK_SoppBrTarget:
9027     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
9028   case MCK_VReg32OrOff:
9029     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9030   case MCK_InterpSlot:
9031     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9032   case MCK_Attr:
9033     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9034   case MCK_AttrChan:
9035     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
9036   case MCK_ImmSMEMOffset:
9037     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
9038   case MCK_SReg_64:
9039   case MCK_SReg_64_XEXEC:
9040     // Null is defined as a 32-bit register but
9041     // it should also be enabled with 64-bit operands.
9042     // The following code enables it for SReg_64 operands
9043     // used as source and destination. Remaining source
9044     // operands are handled in isInlinableImm.
9045     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9046   default:
9047     return Match_InvalidOperand;
9048   }
9049 }
9050 
9051 //===----------------------------------------------------------------------===//
9052 // endpgm
9053 //===----------------------------------------------------------------------===//
9054 
9055 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
9056   SMLoc S = getLoc();
9057   int64_t Imm = 0;
9058 
9059   if (!parseExpr(Imm)) {
9060     // The operand is optional, if not present default to 0
9061     Imm = 0;
9062   }
9063 
9064   if (!isUInt<16>(Imm)) {
9065     Error(S, "expected a 16-bit value");
9066     return MatchOperand_ParseFail;
9067   }
9068 
9069   Operands.push_back(
9070       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9071   return MatchOperand_Success;
9072 }
9073 
9074 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9075 
9076 //===----------------------------------------------------------------------===//
9077 // LDSDIR
9078 //===----------------------------------------------------------------------===//
9079 
9080 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const {
9081   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST);
9082 }
9083 
9084 bool AMDGPUOperand::isWaitVDST() const {
9085   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9086 }
9087 
9088 //===----------------------------------------------------------------------===//
9089 // VINTERP
9090 //===----------------------------------------------------------------------===//
9091 
9092 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const {
9093   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP);
9094 }
9095 
9096 bool AMDGPUOperand::isWaitEXP() const {
9097   return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9098 }
9099