1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCInstrDesc.h"
29 #include "llvm/MC/MCParser/MCAsmLexer.h"
30 #include "llvm/MC/MCParser/MCAsmParser.h"
31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
32 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/AMDGPUMetadata.h"
36 #include "llvm/Support/AMDHSAKernelDescriptor.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/MachineValueType.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/TargetParser.h"
41 
42 using namespace llvm;
43 using namespace llvm::AMDGPU;
44 using namespace llvm::amdhsa;
45 
46 namespace {
47 
48 class AMDGPUAsmParser;
49 
50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
51 
52 //===----------------------------------------------------------------------===//
53 // Operand
54 //===----------------------------------------------------------------------===//
55 
56 class AMDGPUOperand : public MCParsedAsmOperand {
57   enum KindTy {
58     Token,
59     Immediate,
60     Register,
61     Expression
62   } Kind;
63 
64   SMLoc StartLoc, EndLoc;
65   const AMDGPUAsmParser *AsmParser;
66 
67 public:
68   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
69       : Kind(Kind_), AsmParser(AsmParser_) {}
70 
71   using Ptr = std::unique_ptr<AMDGPUOperand>;
72 
73   struct Modifiers {
74     bool Abs = false;
75     bool Neg = false;
76     bool Sext = false;
77 
78     bool hasFPModifiers() const { return Abs || Neg; }
79     bool hasIntModifiers() const { return Sext; }
80     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
81 
82     int64_t getFPModifiersOperand() const {
83       int64_t Operand = 0;
84       Operand |= Abs ? SISrcMods::ABS : 0u;
85       Operand |= Neg ? SISrcMods::NEG : 0u;
86       return Operand;
87     }
88 
89     int64_t getIntModifiersOperand() const {
90       int64_t Operand = 0;
91       Operand |= Sext ? SISrcMods::SEXT : 0u;
92       return Operand;
93     }
94 
95     int64_t getModifiersOperand() const {
96       assert(!(hasFPModifiers() && hasIntModifiers())
97            && "fp and int modifiers should not be used simultaneously");
98       if (hasFPModifiers()) {
99         return getFPModifiersOperand();
100       } else if (hasIntModifiers()) {
101         return getIntModifiersOperand();
102       } else {
103         return 0;
104       }
105     }
106 
107     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
108   };
109 
110   enum ImmTy {
111     ImmTyNone,
112     ImmTyGDS,
113     ImmTyLDS,
114     ImmTyOffen,
115     ImmTyIdxen,
116     ImmTyAddr64,
117     ImmTyOffset,
118     ImmTyInstOffset,
119     ImmTyOffset0,
120     ImmTyOffset1,
121     ImmTyCPol,
122     ImmTySWZ,
123     ImmTyTFE,
124     ImmTyD16,
125     ImmTyClampSI,
126     ImmTyOModSI,
127     ImmTySdwaDstSel,
128     ImmTySdwaSrc0Sel,
129     ImmTySdwaSrc1Sel,
130     ImmTySdwaDstUnused,
131     ImmTyDMask,
132     ImmTyDim,
133     ImmTyUNorm,
134     ImmTyDA,
135     ImmTyR128A16,
136     ImmTyA16,
137     ImmTyLWE,
138     ImmTyExpTgt,
139     ImmTyExpCompr,
140     ImmTyExpVM,
141     ImmTyFORMAT,
142     ImmTyHwreg,
143     ImmTyOff,
144     ImmTySendMsg,
145     ImmTyInterpSlot,
146     ImmTyInterpAttr,
147     ImmTyAttrChan,
148     ImmTyOpSel,
149     ImmTyOpSelHi,
150     ImmTyNegLo,
151     ImmTyNegHi,
152     ImmTyDPP8,
153     ImmTyDppCtrl,
154     ImmTyDppRowMask,
155     ImmTyDppBankMask,
156     ImmTyDppBoundCtrl,
157     ImmTyDppFi,
158     ImmTySwizzle,
159     ImmTyGprIdxMode,
160     ImmTyHigh,
161     ImmTyBLGP,
162     ImmTyCBSZ,
163     ImmTyABID,
164     ImmTyEndpgm,
165     ImmTyWaitVDST,
166     ImmTyWaitEXP,
167   };
168 
169   enum ImmKindTy {
170     ImmKindTyNone,
171     ImmKindTyLiteral,
172     ImmKindTyConst,
173   };
174 
175 private:
176   struct TokOp {
177     const char *Data;
178     unsigned Length;
179   };
180 
181   struct ImmOp {
182     int64_t Val;
183     ImmTy Type;
184     bool IsFPImm;
185     mutable ImmKindTy Kind;
186     Modifiers Mods;
187   };
188 
189   struct RegOp {
190     unsigned RegNo;
191     Modifiers Mods;
192   };
193 
194   union {
195     TokOp Tok;
196     ImmOp Imm;
197     RegOp Reg;
198     const MCExpr *Expr;
199   };
200 
201 public:
202   bool isToken() const override {
203     if (Kind == Token)
204       return true;
205 
206     // When parsing operands, we can't always tell if something was meant to be
207     // a token, like 'gds', or an expression that references a global variable.
208     // In this case, we assume the string is an expression, and if we need to
209     // interpret is a token, then we treat the symbol name as the token.
210     return isSymbolRefExpr();
211   }
212 
213   bool isSymbolRefExpr() const {
214     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
215   }
216 
217   bool isImm() const override {
218     return Kind == Immediate;
219   }
220 
221   void setImmKindNone() const {
222     assert(isImm());
223     Imm.Kind = ImmKindTyNone;
224   }
225 
226   void setImmKindLiteral() const {
227     assert(isImm());
228     Imm.Kind = ImmKindTyLiteral;
229   }
230 
231   void setImmKindConst() const {
232     assert(isImm());
233     Imm.Kind = ImmKindTyConst;
234   }
235 
236   bool IsImmKindLiteral() const {
237     return isImm() && Imm.Kind == ImmKindTyLiteral;
238   }
239 
240   bool isImmKindConst() const {
241     return isImm() && Imm.Kind == ImmKindTyConst;
242   }
243 
244   bool isInlinableImm(MVT type) const;
245   bool isLiteralImm(MVT type) const;
246 
247   bool isRegKind() const {
248     return Kind == Register;
249   }
250 
251   bool isReg() const override {
252     return isRegKind() && !hasModifiers();
253   }
254 
255   bool isRegOrInline(unsigned RCID, MVT type) const {
256     return isRegClass(RCID) || isInlinableImm(type);
257   }
258 
259   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
260     return isRegOrInline(RCID, type) || isLiteralImm(type);
261   }
262 
263   bool isRegOrImmWithInt16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
265   }
266 
267   bool isRegOrImmWithInt32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
269   }
270 
271   bool isRegOrInlineImmWithInt16InputMods() const {
272     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
273   }
274 
275   bool isRegOrInlineImmWithInt32InputMods() const {
276     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
277   }
278 
279   bool isRegOrImmWithInt64InputMods() const {
280     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
281   }
282 
283   bool isRegOrImmWithFP16InputMods() const {
284     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
285   }
286 
287   bool isRegOrImmWithFP32InputMods() const {
288     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
289   }
290 
291   bool isRegOrImmWithFP64InputMods() const {
292     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
293   }
294 
295   bool isRegOrInlineImmWithFP16InputMods() const {
296     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
297   }
298 
299   bool isRegOrInlineImmWithFP32InputMods() const {
300     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
301   }
302 
303 
304   bool isVReg() const {
305     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
306            isRegClass(AMDGPU::VReg_64RegClassID) ||
307            isRegClass(AMDGPU::VReg_96RegClassID) ||
308            isRegClass(AMDGPU::VReg_128RegClassID) ||
309            isRegClass(AMDGPU::VReg_160RegClassID) ||
310            isRegClass(AMDGPU::VReg_192RegClassID) ||
311            isRegClass(AMDGPU::VReg_256RegClassID) ||
312            isRegClass(AMDGPU::VReg_512RegClassID) ||
313            isRegClass(AMDGPU::VReg_1024RegClassID);
314   }
315 
316   bool isVReg32() const {
317     return isRegClass(AMDGPU::VGPR_32RegClassID);
318   }
319 
320   bool isVReg32OrOff() const {
321     return isOff() || isVReg32();
322   }
323 
324   bool isNull() const {
325     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
326   }
327 
328   bool isVRegWithInputMods() const;
329 
330   bool isSDWAOperand(MVT type) const;
331   bool isSDWAFP16Operand() const;
332   bool isSDWAFP32Operand() const;
333   bool isSDWAInt16Operand() const;
334   bool isSDWAInt32Operand() const;
335 
336   bool isImmTy(ImmTy ImmT) const {
337     return isImm() && Imm.Type == ImmT;
338   }
339 
340   bool isImmModifier() const {
341     return isImm() && Imm.Type != ImmTyNone;
342   }
343 
344   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
345   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
346   bool isDMask() const { return isImmTy(ImmTyDMask); }
347   bool isDim() const { return isImmTy(ImmTyDim); }
348   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
349   bool isDA() const { return isImmTy(ImmTyDA); }
350   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
351   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
352   bool isLWE() const { return isImmTy(ImmTyLWE); }
353   bool isOff() const { return isImmTy(ImmTyOff); }
354   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
355   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
356   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
357   bool isOffen() const { return isImmTy(ImmTyOffen); }
358   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
359   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
360   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
361   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
362   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
363 
364   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
365   bool isGDS() const { return isImmTy(ImmTyGDS); }
366   bool isLDS() const { return isImmTy(ImmTyLDS); }
367   bool isCPol() const { return isImmTy(ImmTyCPol); }
368   bool isSWZ() const { return isImmTy(ImmTySWZ); }
369   bool isTFE() const { return isImmTy(ImmTyTFE); }
370   bool isD16() const { return isImmTy(ImmTyD16); }
371   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
372   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
373   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
374   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
375   bool isFI() const { return isImmTy(ImmTyDppFi); }
376   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
377   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
378   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
379   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
380   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
381   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
382   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
383   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
384   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
385   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
386   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
387   bool isHigh() const { return isImmTy(ImmTyHigh); }
388 
389   bool isMod() const {
390     return isClampSI() || isOModSI();
391   }
392 
393   bool isRegOrImm() const {
394     return isReg() || isImm();
395   }
396 
397   bool isRegClass(unsigned RCID) const;
398 
399   bool isInlineValue() const;
400 
401   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
402     return isRegOrInline(RCID, type) && !hasModifiers();
403   }
404 
405   bool isSCSrcB16() const {
406     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
407   }
408 
409   bool isSCSrcV2B16() const {
410     return isSCSrcB16();
411   }
412 
413   bool isSCSrcB32() const {
414     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
415   }
416 
417   bool isSCSrcB64() const {
418     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
419   }
420 
421   bool isBoolReg() const;
422 
423   bool isSCSrcF16() const {
424     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
425   }
426 
427   bool isSCSrcV2F16() const {
428     return isSCSrcF16();
429   }
430 
431   bool isSCSrcF32() const {
432     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
433   }
434 
435   bool isSCSrcF64() const {
436     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
437   }
438 
439   bool isSSrcB32() const {
440     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
441   }
442 
443   bool isSSrcB16() const {
444     return isSCSrcB16() || isLiteralImm(MVT::i16);
445   }
446 
447   bool isSSrcV2B16() const {
448     llvm_unreachable("cannot happen");
449     return isSSrcB16();
450   }
451 
452   bool isSSrcB64() const {
453     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
454     // See isVSrc64().
455     return isSCSrcB64() || isLiteralImm(MVT::i64);
456   }
457 
458   bool isSSrcF32() const {
459     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
460   }
461 
462   bool isSSrcF64() const {
463     return isSCSrcB64() || isLiteralImm(MVT::f64);
464   }
465 
466   bool isSSrcF16() const {
467     return isSCSrcB16() || isLiteralImm(MVT::f16);
468   }
469 
470   bool isSSrcV2F16() const {
471     llvm_unreachable("cannot happen");
472     return isSSrcF16();
473   }
474 
475   bool isSSrcV2FP32() const {
476     llvm_unreachable("cannot happen");
477     return isSSrcF32();
478   }
479 
480   bool isSCSrcV2FP32() const {
481     llvm_unreachable("cannot happen");
482     return isSCSrcF32();
483   }
484 
485   bool isSSrcV2INT32() const {
486     llvm_unreachable("cannot happen");
487     return isSSrcB32();
488   }
489 
490   bool isSCSrcV2INT32() const {
491     llvm_unreachable("cannot happen");
492     return isSCSrcB32();
493   }
494 
495   bool isSSrcOrLdsB32() const {
496     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
497            isLiteralImm(MVT::i32) || isExpr();
498   }
499 
500   bool isVCSrcB32() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
502   }
503 
504   bool isVCSrcB64() const {
505     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
506   }
507 
508   bool isVCSrcB16() const {
509     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
510   }
511 
512   bool isVCSrcV2B16() const {
513     return isVCSrcB16();
514   }
515 
516   bool isVCSrcF32() const {
517     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
518   }
519 
520   bool isVCSrcF64() const {
521     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
522   }
523 
524   bool isVCSrcF16() const {
525     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
526   }
527 
528   bool isVCSrcV2F16() const {
529     return isVCSrcF16();
530   }
531 
532   bool isVSrcB32() const {
533     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
534   }
535 
536   bool isVSrcB64() const {
537     return isVCSrcF64() || isLiteralImm(MVT::i64);
538   }
539 
540   bool isVSrcB16() const {
541     return isVCSrcB16() || isLiteralImm(MVT::i16);
542   }
543 
544   bool isVSrcV2B16() const {
545     return isVSrcB16() || isLiteralImm(MVT::v2i16);
546   }
547 
548   bool isVCSrcV2FP32() const {
549     return isVCSrcF64();
550   }
551 
552   bool isVSrcV2FP32() const {
553     return isVSrcF64() || isLiteralImm(MVT::v2f32);
554   }
555 
556   bool isVCSrcV2INT32() const {
557     return isVCSrcB64();
558   }
559 
560   bool isVSrcV2INT32() const {
561     return isVSrcB64() || isLiteralImm(MVT::v2i32);
562   }
563 
564   bool isVSrcF32() const {
565     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
566   }
567 
568   bool isVSrcF64() const {
569     return isVCSrcF64() || isLiteralImm(MVT::f64);
570   }
571 
572   bool isVSrcF16() const {
573     return isVCSrcF16() || isLiteralImm(MVT::f16);
574   }
575 
576   bool isVSrcV2F16() const {
577     return isVSrcF16() || isLiteralImm(MVT::v2f16);
578   }
579 
580   bool isVISrcB32() const {
581     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
582   }
583 
584   bool isVISrcB16() const {
585     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
586   }
587 
588   bool isVISrcV2B16() const {
589     return isVISrcB16();
590   }
591 
592   bool isVISrcF32() const {
593     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
594   }
595 
596   bool isVISrcF16() const {
597     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
598   }
599 
600   bool isVISrcV2F16() const {
601     return isVISrcF16() || isVISrcB32();
602   }
603 
604   bool isVISrc_64B64() const {
605     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
606   }
607 
608   bool isVISrc_64F64() const {
609     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
610   }
611 
612   bool isVISrc_64V2FP32() const {
613     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
614   }
615 
616   bool isVISrc_64V2INT32() const {
617     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
618   }
619 
620   bool isVISrc_256B64() const {
621     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
622   }
623 
624   bool isVISrc_256F64() const {
625     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
626   }
627 
628   bool isVISrc_128B16() const {
629     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
630   }
631 
632   bool isVISrc_128V2B16() const {
633     return isVISrc_128B16();
634   }
635 
636   bool isVISrc_128B32() const {
637     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
638   }
639 
640   bool isVISrc_128F32() const {
641     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
642   }
643 
644   bool isVISrc_256V2FP32() const {
645     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
646   }
647 
648   bool isVISrc_256V2INT32() const {
649     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
650   }
651 
652   bool isVISrc_512B32() const {
653     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
654   }
655 
656   bool isVISrc_512B16() const {
657     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
658   }
659 
660   bool isVISrc_512V2B16() const {
661     return isVISrc_512B16();
662   }
663 
664   bool isVISrc_512F32() const {
665     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
666   }
667 
668   bool isVISrc_512F16() const {
669     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
670   }
671 
672   bool isVISrc_512V2F16() const {
673     return isVISrc_512F16() || isVISrc_512B32();
674   }
675 
676   bool isVISrc_1024B32() const {
677     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
678   }
679 
680   bool isVISrc_1024B16() const {
681     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
682   }
683 
684   bool isVISrc_1024V2B16() const {
685     return isVISrc_1024B16();
686   }
687 
688   bool isVISrc_1024F32() const {
689     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
690   }
691 
692   bool isVISrc_1024F16() const {
693     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
694   }
695 
696   bool isVISrc_1024V2F16() const {
697     return isVISrc_1024F16() || isVISrc_1024B32();
698   }
699 
700   bool isAISrcB32() const {
701     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
702   }
703 
704   bool isAISrcB16() const {
705     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
706   }
707 
708   bool isAISrcV2B16() const {
709     return isAISrcB16();
710   }
711 
712   bool isAISrcF32() const {
713     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
714   }
715 
716   bool isAISrcF16() const {
717     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
718   }
719 
720   bool isAISrcV2F16() const {
721     return isAISrcF16() || isAISrcB32();
722   }
723 
724   bool isAISrc_64B64() const {
725     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
726   }
727 
728   bool isAISrc_64F64() const {
729     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
730   }
731 
732   bool isAISrc_128B32() const {
733     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
734   }
735 
736   bool isAISrc_128B16() const {
737     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
738   }
739 
740   bool isAISrc_128V2B16() const {
741     return isAISrc_128B16();
742   }
743 
744   bool isAISrc_128F32() const {
745     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
746   }
747 
748   bool isAISrc_128F16() const {
749     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
750   }
751 
752   bool isAISrc_128V2F16() const {
753     return isAISrc_128F16() || isAISrc_128B32();
754   }
755 
756   bool isVISrc_128F16() const {
757     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
758   }
759 
760   bool isVISrc_128V2F16() const {
761     return isVISrc_128F16() || isVISrc_128B32();
762   }
763 
764   bool isAISrc_256B64() const {
765     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
766   }
767 
768   bool isAISrc_256F64() const {
769     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
770   }
771 
772   bool isAISrc_512B32() const {
773     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
774   }
775 
776   bool isAISrc_512B16() const {
777     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
778   }
779 
780   bool isAISrc_512V2B16() const {
781     return isAISrc_512B16();
782   }
783 
784   bool isAISrc_512F32() const {
785     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
786   }
787 
788   bool isAISrc_512F16() const {
789     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
790   }
791 
792   bool isAISrc_512V2F16() const {
793     return isAISrc_512F16() || isAISrc_512B32();
794   }
795 
796   bool isAISrc_1024B32() const {
797     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
798   }
799 
800   bool isAISrc_1024B16() const {
801     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
802   }
803 
804   bool isAISrc_1024V2B16() const {
805     return isAISrc_1024B16();
806   }
807 
808   bool isAISrc_1024F32() const {
809     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
810   }
811 
812   bool isAISrc_1024F16() const {
813     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
814   }
815 
816   bool isAISrc_1024V2F16() const {
817     return isAISrc_1024F16() || isAISrc_1024B32();
818   }
819 
820   bool isKImmFP32() const {
821     return isLiteralImm(MVT::f32);
822   }
823 
824   bool isKImmFP16() const {
825     return isLiteralImm(MVT::f16);
826   }
827 
828   bool isMem() const override {
829     return false;
830   }
831 
832   bool isExpr() const {
833     return Kind == Expression;
834   }
835 
836   bool isSoppBrTarget() const {
837     return isExpr() || isImm();
838   }
839 
840   bool isSWaitCnt() const;
841   bool isDepCtr() const;
842   bool isSDelayAlu() const;
843   bool isHwreg() const;
844   bool isSendMsg() const;
845   bool isSwizzle() const;
846   bool isSMRDOffset8() const;
847   bool isSMEMOffset() const;
848   bool isSMRDLiteralOffset() const;
849   bool isDPP8() const;
850   bool isDPPCtrl() const;
851   bool isBLGP() const;
852   bool isCBSZ() const;
853   bool isABID() const;
854   bool isGPRIdxMode() const;
855   bool isS16Imm() const;
856   bool isU16Imm() const;
857   bool isEndpgm() const;
858   bool isWaitVDST() const;
859   bool isWaitEXP() const;
860 
861   StringRef getExpressionAsToken() const {
862     assert(isExpr());
863     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
864     return S->getSymbol().getName();
865   }
866 
867   StringRef getToken() const {
868     assert(isToken());
869 
870     if (Kind == Expression)
871       return getExpressionAsToken();
872 
873     return StringRef(Tok.Data, Tok.Length);
874   }
875 
876   int64_t getImm() const {
877     assert(isImm());
878     return Imm.Val;
879   }
880 
881   void setImm(int64_t Val) {
882     assert(isImm());
883     Imm.Val = Val;
884   }
885 
886   ImmTy getImmTy() const {
887     assert(isImm());
888     return Imm.Type;
889   }
890 
891   unsigned getReg() const override {
892     assert(isRegKind());
893     return Reg.RegNo;
894   }
895 
896   SMLoc getStartLoc() const override {
897     return StartLoc;
898   }
899 
900   SMLoc getEndLoc() const override {
901     return EndLoc;
902   }
903 
904   SMRange getLocRange() const {
905     return SMRange(StartLoc, EndLoc);
906   }
907 
908   Modifiers getModifiers() const {
909     assert(isRegKind() || isImmTy(ImmTyNone));
910     return isRegKind() ? Reg.Mods : Imm.Mods;
911   }
912 
913   void setModifiers(Modifiers Mods) {
914     assert(isRegKind() || isImmTy(ImmTyNone));
915     if (isRegKind())
916       Reg.Mods = Mods;
917     else
918       Imm.Mods = Mods;
919   }
920 
921   bool hasModifiers() const {
922     return getModifiers().hasModifiers();
923   }
924 
925   bool hasFPModifiers() const {
926     return getModifiers().hasFPModifiers();
927   }
928 
929   bool hasIntModifiers() const {
930     return getModifiers().hasIntModifiers();
931   }
932 
933   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
934 
935   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
936 
937   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
938 
939   template <unsigned Bitwidth>
940   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
941 
942   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
943     addKImmFPOperands<16>(Inst, N);
944   }
945 
946   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
947     addKImmFPOperands<32>(Inst, N);
948   }
949 
950   void addRegOperands(MCInst &Inst, unsigned N) const;
951 
952   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
953     addRegOperands(Inst, N);
954   }
955 
956   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
957     if (isRegKind())
958       addRegOperands(Inst, N);
959     else if (isExpr())
960       Inst.addOperand(MCOperand::createExpr(Expr));
961     else
962       addImmOperands(Inst, N);
963   }
964 
965   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
966     Modifiers Mods = getModifiers();
967     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
968     if (isRegKind()) {
969       addRegOperands(Inst, N);
970     } else {
971       addImmOperands(Inst, N, false);
972     }
973   }
974 
975   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
976     assert(!hasIntModifiers());
977     addRegOrImmWithInputModsOperands(Inst, N);
978   }
979 
980   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
981     assert(!hasFPModifiers());
982     addRegOrImmWithInputModsOperands(Inst, N);
983   }
984 
985   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
986     Modifiers Mods = getModifiers();
987     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
988     assert(isRegKind());
989     addRegOperands(Inst, N);
990   }
991 
992   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
993     assert(!hasIntModifiers());
994     addRegWithInputModsOperands(Inst, N);
995   }
996 
997   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
998     assert(!hasFPModifiers());
999     addRegWithInputModsOperands(Inst, N);
1000   }
1001 
1002   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
1003     if (isImm())
1004       addImmOperands(Inst, N);
1005     else {
1006       assert(isExpr());
1007       Inst.addOperand(MCOperand::createExpr(Expr));
1008     }
1009   }
1010 
1011   static void printImmTy(raw_ostream& OS, ImmTy Type) {
1012     switch (Type) {
1013     case ImmTyNone: OS << "None"; break;
1014     case ImmTyGDS: OS << "GDS"; break;
1015     case ImmTyLDS: OS << "LDS"; break;
1016     case ImmTyOffen: OS << "Offen"; break;
1017     case ImmTyIdxen: OS << "Idxen"; break;
1018     case ImmTyAddr64: OS << "Addr64"; break;
1019     case ImmTyOffset: OS << "Offset"; break;
1020     case ImmTyInstOffset: OS << "InstOffset"; break;
1021     case ImmTyOffset0: OS << "Offset0"; break;
1022     case ImmTyOffset1: OS << "Offset1"; break;
1023     case ImmTyCPol: OS << "CPol"; break;
1024     case ImmTySWZ: OS << "SWZ"; break;
1025     case ImmTyTFE: OS << "TFE"; break;
1026     case ImmTyD16: OS << "D16"; break;
1027     case ImmTyFORMAT: OS << "FORMAT"; break;
1028     case ImmTyClampSI: OS << "ClampSI"; break;
1029     case ImmTyOModSI: OS << "OModSI"; break;
1030     case ImmTyDPP8: OS << "DPP8"; break;
1031     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1032     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1033     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1034     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1035     case ImmTyDppFi: OS << "FI"; break;
1036     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1037     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1038     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1039     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1040     case ImmTyDMask: OS << "DMask"; break;
1041     case ImmTyDim: OS << "Dim"; break;
1042     case ImmTyUNorm: OS << "UNorm"; break;
1043     case ImmTyDA: OS << "DA"; break;
1044     case ImmTyR128A16: OS << "R128A16"; break;
1045     case ImmTyA16: OS << "A16"; break;
1046     case ImmTyLWE: OS << "LWE"; break;
1047     case ImmTyOff: OS << "Off"; break;
1048     case ImmTyExpTgt: OS << "ExpTgt"; break;
1049     case ImmTyExpCompr: OS << "ExpCompr"; break;
1050     case ImmTyExpVM: OS << "ExpVM"; break;
1051     case ImmTyHwreg: OS << "Hwreg"; break;
1052     case ImmTySendMsg: OS << "SendMsg"; break;
1053     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1054     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1055     case ImmTyAttrChan: OS << "AttrChan"; break;
1056     case ImmTyOpSel: OS << "OpSel"; break;
1057     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1058     case ImmTyNegLo: OS << "NegLo"; break;
1059     case ImmTyNegHi: OS << "NegHi"; break;
1060     case ImmTySwizzle: OS << "Swizzle"; break;
1061     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1062     case ImmTyHigh: OS << "High"; break;
1063     case ImmTyBLGP: OS << "BLGP"; break;
1064     case ImmTyCBSZ: OS << "CBSZ"; break;
1065     case ImmTyABID: OS << "ABID"; break;
1066     case ImmTyEndpgm: OS << "Endpgm"; break;
1067     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1068     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1069     }
1070   }
1071 
1072   void print(raw_ostream &OS) const override {
1073     switch (Kind) {
1074     case Register:
1075       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1076       break;
1077     case Immediate:
1078       OS << '<' << getImm();
1079       if (getImmTy() != ImmTyNone) {
1080         OS << " type: "; printImmTy(OS, getImmTy());
1081       }
1082       OS << " mods: " << Imm.Mods << '>';
1083       break;
1084     case Token:
1085       OS << '\'' << getToken() << '\'';
1086       break;
1087     case Expression:
1088       OS << "<expr " << *Expr << '>';
1089       break;
1090     }
1091   }
1092 
1093   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1094                                       int64_t Val, SMLoc Loc,
1095                                       ImmTy Type = ImmTyNone,
1096                                       bool IsFPImm = false) {
1097     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1098     Op->Imm.Val = Val;
1099     Op->Imm.IsFPImm = IsFPImm;
1100     Op->Imm.Kind = ImmKindTyNone;
1101     Op->Imm.Type = Type;
1102     Op->Imm.Mods = Modifiers();
1103     Op->StartLoc = Loc;
1104     Op->EndLoc = Loc;
1105     return Op;
1106   }
1107 
1108   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1109                                         StringRef Str, SMLoc Loc,
1110                                         bool HasExplicitEncodingSize = true) {
1111     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1112     Res->Tok.Data = Str.data();
1113     Res->Tok.Length = Str.size();
1114     Res->StartLoc = Loc;
1115     Res->EndLoc = Loc;
1116     return Res;
1117   }
1118 
1119   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1120                                       unsigned RegNo, SMLoc S,
1121                                       SMLoc E) {
1122     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1123     Op->Reg.RegNo = RegNo;
1124     Op->Reg.Mods = Modifiers();
1125     Op->StartLoc = S;
1126     Op->EndLoc = E;
1127     return Op;
1128   }
1129 
1130   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1131                                        const class MCExpr *Expr, SMLoc S) {
1132     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1133     Op->Expr = Expr;
1134     Op->StartLoc = S;
1135     Op->EndLoc = S;
1136     return Op;
1137   }
1138 };
1139 
1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1141   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1142   return OS;
1143 }
1144 
1145 //===----------------------------------------------------------------------===//
1146 // AsmParser
1147 //===----------------------------------------------------------------------===//
1148 
1149 // Holds info related to the current kernel, e.g. count of SGPRs used.
1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1151 // .amdgpu_hsa_kernel or at EOF.
1152 class KernelScopeInfo {
1153   int SgprIndexUnusedMin = -1;
1154   int VgprIndexUnusedMin = -1;
1155   int AgprIndexUnusedMin = -1;
1156   MCContext *Ctx = nullptr;
1157   MCSubtargetInfo const *MSTI = nullptr;
1158 
1159   void usesSgprAt(int i) {
1160     if (i >= SgprIndexUnusedMin) {
1161       SgprIndexUnusedMin = ++i;
1162       if (Ctx) {
1163         MCSymbol* const Sym =
1164           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1165         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1166       }
1167     }
1168   }
1169 
1170   void usesVgprAt(int i) {
1171     if (i >= VgprIndexUnusedMin) {
1172       VgprIndexUnusedMin = ++i;
1173       if (Ctx) {
1174         MCSymbol* const Sym =
1175           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1176         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1177                                          VgprIndexUnusedMin);
1178         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1179       }
1180     }
1181   }
1182 
1183   void usesAgprAt(int i) {
1184     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1185     if (!hasMAIInsts(*MSTI))
1186       return;
1187 
1188     if (i >= AgprIndexUnusedMin) {
1189       AgprIndexUnusedMin = ++i;
1190       if (Ctx) {
1191         MCSymbol* const Sym =
1192           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1193         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1194 
1195         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1196         MCSymbol* const vSym =
1197           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1198         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1199                                          VgprIndexUnusedMin);
1200         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1201       }
1202     }
1203   }
1204 
1205 public:
1206   KernelScopeInfo() = default;
1207 
1208   void initialize(MCContext &Context) {
1209     Ctx = &Context;
1210     MSTI = Ctx->getSubtargetInfo();
1211 
1212     usesSgprAt(SgprIndexUnusedMin = -1);
1213     usesVgprAt(VgprIndexUnusedMin = -1);
1214     if (hasMAIInsts(*MSTI)) {
1215       usesAgprAt(AgprIndexUnusedMin = -1);
1216     }
1217   }
1218 
1219   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1220                     unsigned RegWidth) {
1221     switch (RegKind) {
1222     case IS_SGPR:
1223       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1224       break;
1225     case IS_AGPR:
1226       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1227       break;
1228     case IS_VGPR:
1229       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1230       break;
1231     default:
1232       break;
1233     }
1234   }
1235 };
1236 
1237 class AMDGPUAsmParser : public MCTargetAsmParser {
1238   MCAsmParser &Parser;
1239 
1240   // Number of extra operands parsed after the first optional operand.
1241   // This may be necessary to skip hardcoded mandatory operands.
1242   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1243 
1244   unsigned ForcedEncodingSize = 0;
1245   bool ForcedDPP = false;
1246   bool ForcedSDWA = false;
1247   KernelScopeInfo KernelScope;
1248   unsigned CPolSeen;
1249 
1250   /// @name Auto-generated Match Functions
1251   /// {
1252 
1253 #define GET_ASSEMBLER_HEADER
1254 #include "AMDGPUGenAsmMatcher.inc"
1255 
1256   /// }
1257 
1258 private:
1259   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1260   bool OutOfRangeError(SMRange Range);
1261   /// Calculate VGPR/SGPR blocks required for given target, reserved
1262   /// registers, and user-specified NextFreeXGPR values.
1263   ///
1264   /// \param Features [in] Target features, used for bug corrections.
1265   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1266   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1267   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1268   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1269   /// descriptor field, if valid.
1270   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1271   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1272   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1273   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1274   /// \param VGPRBlocks [out] Result VGPR block count.
1275   /// \param SGPRBlocks [out] Result SGPR block count.
1276   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1277                           bool FlatScrUsed, bool XNACKUsed,
1278                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1279                           SMRange VGPRRange, unsigned NextFreeSGPR,
1280                           SMRange SGPRRange, unsigned &VGPRBlocks,
1281                           unsigned &SGPRBlocks);
1282   bool ParseDirectiveAMDGCNTarget();
1283   bool ParseDirectiveAMDHSAKernel();
1284   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1285   bool ParseDirectiveHSACodeObjectVersion();
1286   bool ParseDirectiveHSACodeObjectISA();
1287   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1288   bool ParseDirectiveAMDKernelCodeT();
1289   // TODO: Possibly make subtargetHasRegister const.
1290   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1291   bool ParseDirectiveAMDGPUHsaKernel();
1292 
1293   bool ParseDirectiveISAVersion();
1294   bool ParseDirectiveHSAMetadata();
1295   bool ParseDirectivePALMetadataBegin();
1296   bool ParseDirectivePALMetadata();
1297   bool ParseDirectiveAMDGPULDS();
1298 
1299   /// Common code to parse out a block of text (typically YAML) between start and
1300   /// end directives.
1301   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1302                            const char *AssemblerDirectiveEnd,
1303                            std::string &CollectString);
1304 
1305   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1306                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1307   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1308                            unsigned &RegNum, unsigned &RegWidth,
1309                            bool RestoreOnFailure = false);
1310   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1311                            unsigned &RegNum, unsigned &RegWidth,
1312                            SmallVectorImpl<AsmToken> &Tokens);
1313   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1314                            unsigned &RegWidth,
1315                            SmallVectorImpl<AsmToken> &Tokens);
1316   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1317                            unsigned &RegWidth,
1318                            SmallVectorImpl<AsmToken> &Tokens);
1319   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1320                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1321   bool ParseRegRange(unsigned& Num, unsigned& Width);
1322   unsigned getRegularReg(RegisterKind RegKind,
1323                          unsigned RegNum,
1324                          unsigned RegWidth,
1325                          SMLoc Loc);
1326 
1327   bool isRegister();
1328   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1329   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1330   void initializeGprCountSymbol(RegisterKind RegKind);
1331   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1332                              unsigned RegWidth);
1333   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1334                     bool IsAtomic, bool IsLds = false);
1335   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1336                  bool IsGdsHardcoded);
1337 
1338 public:
1339   enum AMDGPUMatchResultTy {
1340     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1341   };
1342   enum OperandMode {
1343     OperandMode_Default,
1344     OperandMode_NSA,
1345   };
1346 
1347   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1348 
1349   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1350                const MCInstrInfo &MII,
1351                const MCTargetOptions &Options)
1352       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1353     MCAsmParserExtension::Initialize(Parser);
1354 
1355     if (getFeatureBits().none()) {
1356       // Set default features.
1357       copySTI().ToggleFeature("southern-islands");
1358     }
1359 
1360     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1361 
1362     {
1363       // TODO: make those pre-defined variables read-only.
1364       // Currently there is none suitable machinery in the core llvm-mc for this.
1365       // MCSymbol::isRedefinable is intended for another purpose, and
1366       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1367       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1368       MCContext &Ctx = getContext();
1369       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1370         MCSymbol *Sym =
1371             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1372         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1373         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1374         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1375         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1376         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1377       } else {
1378         MCSymbol *Sym =
1379             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1380         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1381         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1382         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1383         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1384         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1385       }
1386       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1387         initializeGprCountSymbol(IS_VGPR);
1388         initializeGprCountSymbol(IS_SGPR);
1389       } else
1390         KernelScope.initialize(getContext());
1391     }
1392   }
1393 
1394   bool hasMIMG_R128() const {
1395     return AMDGPU::hasMIMG_R128(getSTI());
1396   }
1397 
1398   bool hasPackedD16() const {
1399     return AMDGPU::hasPackedD16(getSTI());
1400   }
1401 
1402   bool hasGFX10A16() const {
1403     return AMDGPU::hasGFX10A16(getSTI());
1404   }
1405 
1406   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1407 
1408   bool isSI() const {
1409     return AMDGPU::isSI(getSTI());
1410   }
1411 
1412   bool isCI() const {
1413     return AMDGPU::isCI(getSTI());
1414   }
1415 
1416   bool isVI() const {
1417     return AMDGPU::isVI(getSTI());
1418   }
1419 
1420   bool isGFX9() const {
1421     return AMDGPU::isGFX9(getSTI());
1422   }
1423 
1424   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1425   bool isGFX90A() const {
1426     return AMDGPU::isGFX90A(getSTI());
1427   }
1428 
1429   bool isGFX940() const {
1430     return AMDGPU::isGFX940(getSTI());
1431   }
1432 
1433   bool isGFX9Plus() const {
1434     return AMDGPU::isGFX9Plus(getSTI());
1435   }
1436 
1437   bool isGFX10() const {
1438     return AMDGPU::isGFX10(getSTI());
1439   }
1440 
1441   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1442 
1443   bool isGFX11() const {
1444     return AMDGPU::isGFX11(getSTI());
1445   }
1446 
1447   bool isGFX11Plus() const {
1448     return AMDGPU::isGFX11Plus(getSTI());
1449   }
1450 
1451   bool isGFX10_BEncoding() const {
1452     return AMDGPU::isGFX10_BEncoding(getSTI());
1453   }
1454 
1455   bool hasInv2PiInlineImm() const {
1456     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1457   }
1458 
1459   bool hasFlatOffsets() const {
1460     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1461   }
1462 
1463   bool hasArchitectedFlatScratch() const {
1464     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1465   }
1466 
1467   bool hasSGPR102_SGPR103() const {
1468     return !isVI() && !isGFX9();
1469   }
1470 
1471   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1472 
1473   bool hasIntClamp() const {
1474     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1475   }
1476 
1477   AMDGPUTargetStreamer &getTargetStreamer() {
1478     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1479     return static_cast<AMDGPUTargetStreamer &>(TS);
1480   }
1481 
1482   const MCRegisterInfo *getMRI() const {
1483     // We need this const_cast because for some reason getContext() is not const
1484     // in MCAsmParser.
1485     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1486   }
1487 
1488   const MCInstrInfo *getMII() const {
1489     return &MII;
1490   }
1491 
1492   const FeatureBitset &getFeatureBits() const {
1493     return getSTI().getFeatureBits();
1494   }
1495 
1496   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1497   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1498   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1499 
1500   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1501   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1502   bool isForcedDPP() const { return ForcedDPP; }
1503   bool isForcedSDWA() const { return ForcedSDWA; }
1504   ArrayRef<unsigned> getMatchedVariants() const;
1505   StringRef getMatchedVariantName() const;
1506 
1507   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1508   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1509                      bool RestoreOnFailure);
1510   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1511   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1512                                         SMLoc &EndLoc) override;
1513   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1514   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1515                                       unsigned Kind) override;
1516   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1517                                OperandVector &Operands, MCStreamer &Out,
1518                                uint64_t &ErrorInfo,
1519                                bool MatchingInlineAsm) override;
1520   bool ParseDirective(AsmToken DirectiveID) override;
1521   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1522                                     OperandMode Mode = OperandMode_Default);
1523   StringRef parseMnemonicSuffix(StringRef Name);
1524   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1525                         SMLoc NameLoc, OperandVector &Operands) override;
1526   //bool ProcessInstruction(MCInst &Inst);
1527 
1528   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1529 
1530   OperandMatchResultTy
1531   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1532                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1533                      bool (*ConvertResult)(int64_t &) = nullptr);
1534 
1535   OperandMatchResultTy
1536   parseOperandArrayWithPrefix(const char *Prefix,
1537                               OperandVector &Operands,
1538                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1539                               bool (*ConvertResult)(int64_t&) = nullptr);
1540 
1541   OperandMatchResultTy
1542   parseNamedBit(StringRef Name, OperandVector &Operands,
1543                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1544   OperandMatchResultTy parseCPol(OperandVector &Operands);
1545   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1546                                              StringRef &Value,
1547                                              SMLoc &StringLoc);
1548 
1549   bool isModifier();
1550   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1551   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1552   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1553   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1554   bool parseSP3NegModifier();
1555   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1556   OperandMatchResultTy parseReg(OperandVector &Operands);
1557   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1558   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1559   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1560   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1561   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1562   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1563   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1564   OperandMatchResultTy parseUfmt(int64_t &Format);
1565   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1566   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1567   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1568   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1569   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1570   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1571   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1572 
1573   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1574   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1575   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1576   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1577 
1578   bool parseCnt(int64_t &IntVal);
1579   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1580 
1581   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1582   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1583   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1584 
1585   bool parseDelay(int64_t &Delay);
1586   OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1587 
1588   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1589 
1590 private:
1591   struct OperandInfoTy {
1592     SMLoc Loc;
1593     int64_t Id;
1594     bool IsSymbolic = false;
1595     bool IsDefined = false;
1596 
1597     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1598   };
1599 
1600   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1601   bool validateSendMsg(const OperandInfoTy &Msg,
1602                        const OperandInfoTy &Op,
1603                        const OperandInfoTy &Stream);
1604 
1605   bool parseHwregBody(OperandInfoTy &HwReg,
1606                       OperandInfoTy &Offset,
1607                       OperandInfoTy &Width);
1608   bool validateHwreg(const OperandInfoTy &HwReg,
1609                      const OperandInfoTy &Offset,
1610                      const OperandInfoTy &Width);
1611 
1612   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1613   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1614   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1615 
1616   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1617                       const OperandVector &Operands) const;
1618   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1619   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1620   SMLoc getLitLoc(const OperandVector &Operands) const;
1621   SMLoc getConstLoc(const OperandVector &Operands) const;
1622 
1623   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1624   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1625   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1626   bool validateSOPLiteral(const MCInst &Inst) const;
1627   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1628   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1629   bool validateIntClampSupported(const MCInst &Inst);
1630   bool validateMIMGAtomicDMask(const MCInst &Inst);
1631   bool validateMIMGGatherDMask(const MCInst &Inst);
1632   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1633   Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1634   bool validateMIMGAddrSize(const MCInst &Inst);
1635   bool validateMIMGD16(const MCInst &Inst);
1636   bool validateMIMGDim(const MCInst &Inst);
1637   bool validateMIMGMSAA(const MCInst &Inst);
1638   bool validateOpSel(const MCInst &Inst);
1639   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1640   bool validateVccOperand(unsigned Reg) const;
1641   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1642   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1643   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1644   bool validateAGPRLdSt(const MCInst &Inst) const;
1645   bool validateVGPRAlign(const MCInst &Inst) const;
1646   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1647   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1648   bool validateDivScale(const MCInst &Inst);
1649   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1650                              const SMLoc &IDLoc);
1651   bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands,
1652                           const SMLoc &IDLoc);
1653   bool validateExeczVcczOperands(const OperandVector &Operands);
1654   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1655   unsigned getConstantBusLimit(unsigned Opcode) const;
1656   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1657   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1658   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1659 
1660   bool isSupportedMnemo(StringRef Mnemo,
1661                         const FeatureBitset &FBS);
1662   bool isSupportedMnemo(StringRef Mnemo,
1663                         const FeatureBitset &FBS,
1664                         ArrayRef<unsigned> Variants);
1665   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1666 
1667   bool isId(const StringRef Id) const;
1668   bool isId(const AsmToken &Token, const StringRef Id) const;
1669   bool isToken(const AsmToken::TokenKind Kind) const;
1670   bool trySkipId(const StringRef Id);
1671   bool trySkipId(const StringRef Pref, const StringRef Id);
1672   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1673   bool trySkipToken(const AsmToken::TokenKind Kind);
1674   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1675   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1676   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1677 
1678   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1679   AsmToken::TokenKind getTokenKind() const;
1680   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1681   bool parseExpr(OperandVector &Operands);
1682   StringRef getTokenStr() const;
1683   AsmToken peekToken();
1684   AsmToken getToken() const;
1685   SMLoc getLoc() const;
1686   void lex();
1687 
1688 public:
1689   void onBeginOfFile() override;
1690 
1691   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1692   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1693 
1694   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1695   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1696   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1697   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1698   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1699   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1700 
1701   bool parseSwizzleOperand(int64_t &Op,
1702                            const unsigned MinVal,
1703                            const unsigned MaxVal,
1704                            const StringRef ErrMsg,
1705                            SMLoc &Loc);
1706   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1707                             const unsigned MinVal,
1708                             const unsigned MaxVal,
1709                             const StringRef ErrMsg);
1710   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1711   bool parseSwizzleOffset(int64_t &Imm);
1712   bool parseSwizzleMacro(int64_t &Imm);
1713   bool parseSwizzleQuadPerm(int64_t &Imm);
1714   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1715   bool parseSwizzleBroadcast(int64_t &Imm);
1716   bool parseSwizzleSwap(int64_t &Imm);
1717   bool parseSwizzleReverse(int64_t &Imm);
1718 
1719   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1720   int64_t parseGPRIdxMacro();
1721 
1722   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1723   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1724   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1725   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1726 
1727   AMDGPUOperand::Ptr defaultCPol() const;
1728 
1729   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1730   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1731   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1732   AMDGPUOperand::Ptr defaultFlatOffset() const;
1733 
1734   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1735 
1736   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1737                OptionalImmIndexMap &OptionalIdx);
1738   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1739   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1740   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1741   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1742                 OptionalImmIndexMap &OptionalIdx);
1743 
1744   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1745   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1746 
1747   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1748                bool IsAtomic = false);
1749   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1750   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1751 
1752   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1753 
1754   bool parseDimId(unsigned &Encoding);
1755   OperandMatchResultTy parseDim(OperandVector &Operands);
1756   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1757   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1758   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1759   int64_t parseDPPCtrlSel(StringRef Ctrl);
1760   int64_t parseDPPCtrlPerm();
1761   AMDGPUOperand::Ptr defaultRowMask() const;
1762   AMDGPUOperand::Ptr defaultBankMask() const;
1763   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1764   AMDGPUOperand::Ptr defaultFI() const;
1765   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1766   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1767     cvtDPP(Inst, Operands, true);
1768   }
1769   void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands,
1770                        bool IsDPP8 = false);
1771   void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) {
1772     cvtVOPCNoDstDPP(Inst, Operands, true);
1773   }
1774   void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1775                   bool IsDPP8 = false);
1776   void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1777     cvtVOP3DPP(Inst, Operands, true);
1778   }
1779   void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands,
1780                          bool IsDPP8 = false);
1781   void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) {
1782     cvtVOPC64NoDstDPP(Inst, Operands, true);
1783   }
1784 
1785   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1786                                     AMDGPUOperand::ImmTy Type);
1787   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1788   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1789   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1790   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1791   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1792   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1793   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1794                uint64_t BasicInstType,
1795                bool SkipDstVcc = false,
1796                bool SkipSrcVcc = false);
1797 
1798   AMDGPUOperand::Ptr defaultBLGP() const;
1799   AMDGPUOperand::Ptr defaultCBSZ() const;
1800   AMDGPUOperand::Ptr defaultABID() const;
1801 
1802   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1803   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1804 
1805   AMDGPUOperand::Ptr defaultWaitVDST() const;
1806   AMDGPUOperand::Ptr defaultWaitEXP() const;
1807 };
1808 
1809 struct OptionalOperand {
1810   const char *Name;
1811   AMDGPUOperand::ImmTy Type;
1812   bool IsBit;
1813   bool (*ConvertResult)(int64_t&);
1814 };
1815 
1816 } // end anonymous namespace
1817 
1818 // May be called with integer type with equivalent bitwidth.
1819 static const fltSemantics *getFltSemantics(unsigned Size) {
1820   switch (Size) {
1821   case 4:
1822     return &APFloat::IEEEsingle();
1823   case 8:
1824     return &APFloat::IEEEdouble();
1825   case 2:
1826     return &APFloat::IEEEhalf();
1827   default:
1828     llvm_unreachable("unsupported fp type");
1829   }
1830 }
1831 
1832 static const fltSemantics *getFltSemantics(MVT VT) {
1833   return getFltSemantics(VT.getSizeInBits() / 8);
1834 }
1835 
1836 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1837   switch (OperandType) {
1838   case AMDGPU::OPERAND_REG_IMM_INT32:
1839   case AMDGPU::OPERAND_REG_IMM_FP32:
1840   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1841   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1842   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1843   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1844   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1845   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1846   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1847   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1848   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1849   case AMDGPU::OPERAND_KIMM32:
1850     return &APFloat::IEEEsingle();
1851   case AMDGPU::OPERAND_REG_IMM_INT64:
1852   case AMDGPU::OPERAND_REG_IMM_FP64:
1853   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1854   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1855   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1856     return &APFloat::IEEEdouble();
1857   case AMDGPU::OPERAND_REG_IMM_INT16:
1858   case AMDGPU::OPERAND_REG_IMM_FP16:
1859   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1860   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1861   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1862   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1863   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1864   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1865   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1866   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1867   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1868   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1869   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1870   case AMDGPU::OPERAND_KIMM16:
1871     return &APFloat::IEEEhalf();
1872   default:
1873     llvm_unreachable("unsupported fp type");
1874   }
1875 }
1876 
1877 //===----------------------------------------------------------------------===//
1878 // Operand
1879 //===----------------------------------------------------------------------===//
1880 
1881 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1882   bool Lost;
1883 
1884   // Convert literal to single precision
1885   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1886                                                APFloat::rmNearestTiesToEven,
1887                                                &Lost);
1888   // We allow precision lost but not overflow or underflow
1889   if (Status != APFloat::opOK &&
1890       Lost &&
1891       ((Status & APFloat::opOverflow)  != 0 ||
1892        (Status & APFloat::opUnderflow) != 0)) {
1893     return false;
1894   }
1895 
1896   return true;
1897 }
1898 
1899 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1900   return isUIntN(Size, Val) || isIntN(Size, Val);
1901 }
1902 
1903 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1904   if (VT.getScalarType() == MVT::i16) {
1905     // FP immediate values are broken.
1906     return isInlinableIntLiteral(Val);
1907   }
1908 
1909   // f16/v2f16 operands work correctly for all values.
1910   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1911 }
1912 
1913 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1914 
1915   // This is a hack to enable named inline values like
1916   // shared_base with both 32-bit and 64-bit operands.
1917   // Note that these values are defined as
1918   // 32-bit operands only.
1919   if (isInlineValue()) {
1920     return true;
1921   }
1922 
1923   if (!isImmTy(ImmTyNone)) {
1924     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1925     return false;
1926   }
1927   // TODO: We should avoid using host float here. It would be better to
1928   // check the float bit values which is what a few other places do.
1929   // We've had bot failures before due to weird NaN support on mips hosts.
1930 
1931   APInt Literal(64, Imm.Val);
1932 
1933   if (Imm.IsFPImm) { // We got fp literal token
1934     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1935       return AMDGPU::isInlinableLiteral64(Imm.Val,
1936                                           AsmParser->hasInv2PiInlineImm());
1937     }
1938 
1939     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1940     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1941       return false;
1942 
1943     if (type.getScalarSizeInBits() == 16) {
1944       return isInlineableLiteralOp16(
1945         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1946         type, AsmParser->hasInv2PiInlineImm());
1947     }
1948 
1949     // Check if single precision literal is inlinable
1950     return AMDGPU::isInlinableLiteral32(
1951       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1952       AsmParser->hasInv2PiInlineImm());
1953   }
1954 
1955   // We got int literal token.
1956   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1957     return AMDGPU::isInlinableLiteral64(Imm.Val,
1958                                         AsmParser->hasInv2PiInlineImm());
1959   }
1960 
1961   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1962     return false;
1963   }
1964 
1965   if (type.getScalarSizeInBits() == 16) {
1966     return isInlineableLiteralOp16(
1967       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1968       type, AsmParser->hasInv2PiInlineImm());
1969   }
1970 
1971   return AMDGPU::isInlinableLiteral32(
1972     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1973     AsmParser->hasInv2PiInlineImm());
1974 }
1975 
1976 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1977   // Check that this immediate can be added as literal
1978   if (!isImmTy(ImmTyNone)) {
1979     return false;
1980   }
1981 
1982   if (!Imm.IsFPImm) {
1983     // We got int literal token.
1984 
1985     if (type == MVT::f64 && hasFPModifiers()) {
1986       // Cannot apply fp modifiers to int literals preserving the same semantics
1987       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1988       // disable these cases.
1989       return false;
1990     }
1991 
1992     unsigned Size = type.getSizeInBits();
1993     if (Size == 64)
1994       Size = 32;
1995 
1996     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1997     // types.
1998     return isSafeTruncation(Imm.Val, Size);
1999   }
2000 
2001   // We got fp literal token
2002   if (type == MVT::f64) { // Expected 64-bit fp operand
2003     // We would set low 64-bits of literal to zeroes but we accept this literals
2004     return true;
2005   }
2006 
2007   if (type == MVT::i64) { // Expected 64-bit int operand
2008     // We don't allow fp literals in 64-bit integer instructions. It is
2009     // unclear how we should encode them.
2010     return false;
2011   }
2012 
2013   // We allow fp literals with f16x2 operands assuming that the specified
2014   // literal goes into the lower half and the upper half is zero. We also
2015   // require that the literal may be losslessly converted to f16.
2016   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2017                      (type == MVT::v2i16)? MVT::i16 :
2018                      (type == MVT::v2f32)? MVT::f32 : type;
2019 
2020   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2021   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2022 }
2023 
2024 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2025   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2026 }
2027 
2028 bool AMDGPUOperand::isVRegWithInputMods() const {
2029   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2030          // GFX90A allows DPP on 64-bit operands.
2031          (isRegClass(AMDGPU::VReg_64RegClassID) &&
2032           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
2033 }
2034 
2035 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2036   if (AsmParser->isVI())
2037     return isVReg32();
2038   else if (AsmParser->isGFX9Plus())
2039     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2040   else
2041     return false;
2042 }
2043 
2044 bool AMDGPUOperand::isSDWAFP16Operand() const {
2045   return isSDWAOperand(MVT::f16);
2046 }
2047 
2048 bool AMDGPUOperand::isSDWAFP32Operand() const {
2049   return isSDWAOperand(MVT::f32);
2050 }
2051 
2052 bool AMDGPUOperand::isSDWAInt16Operand() const {
2053   return isSDWAOperand(MVT::i16);
2054 }
2055 
2056 bool AMDGPUOperand::isSDWAInt32Operand() const {
2057   return isSDWAOperand(MVT::i32);
2058 }
2059 
2060 bool AMDGPUOperand::isBoolReg() const {
2061   auto FB = AsmParser->getFeatureBits();
2062   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2063                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2064 }
2065 
2066 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2067 {
2068   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2069   assert(Size == 2 || Size == 4 || Size == 8);
2070 
2071   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2072 
2073   if (Imm.Mods.Abs) {
2074     Val &= ~FpSignMask;
2075   }
2076   if (Imm.Mods.Neg) {
2077     Val ^= FpSignMask;
2078   }
2079 
2080   return Val;
2081 }
2082 
2083 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2084   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2085                              Inst.getNumOperands())) {
2086     addLiteralImmOperand(Inst, Imm.Val,
2087                          ApplyModifiers &
2088                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2089   } else {
2090     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2091     Inst.addOperand(MCOperand::createImm(Imm.Val));
2092     setImmKindNone();
2093   }
2094 }
2095 
2096 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2097   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2098   auto OpNum = Inst.getNumOperands();
2099   // Check that this operand accepts literals
2100   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2101 
2102   if (ApplyModifiers) {
2103     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2104     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2105     Val = applyInputFPModifiers(Val, Size);
2106   }
2107 
2108   APInt Literal(64, Val);
2109   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2110 
2111   if (Imm.IsFPImm) { // We got fp literal token
2112     switch (OpTy) {
2113     case AMDGPU::OPERAND_REG_IMM_INT64:
2114     case AMDGPU::OPERAND_REG_IMM_FP64:
2115     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2116     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2117     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2118       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2119                                        AsmParser->hasInv2PiInlineImm())) {
2120         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2121         setImmKindConst();
2122         return;
2123       }
2124 
2125       // Non-inlineable
2126       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2127         // For fp operands we check if low 32 bits are zeros
2128         if (Literal.getLoBits(32) != 0) {
2129           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2130           "Can't encode literal as exact 64-bit floating-point operand. "
2131           "Low 32-bits will be set to zero");
2132         }
2133 
2134         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2135         setImmKindLiteral();
2136         return;
2137       }
2138 
2139       // We don't allow fp literals in 64-bit integer instructions. It is
2140       // unclear how we should encode them. This case should be checked earlier
2141       // in predicate methods (isLiteralImm())
2142       llvm_unreachable("fp literal in 64-bit integer instruction.");
2143 
2144     case AMDGPU::OPERAND_REG_IMM_INT32:
2145     case AMDGPU::OPERAND_REG_IMM_FP32:
2146     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2147     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2148     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2149     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2150     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2151     case AMDGPU::OPERAND_REG_IMM_INT16:
2152     case AMDGPU::OPERAND_REG_IMM_FP16:
2153     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2154     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2155     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2156     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2157     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2158     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2159     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2160     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2161     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2162     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2163     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2164     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2165     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2166     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2167     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2168     case AMDGPU::OPERAND_KIMM32:
2169     case AMDGPU::OPERAND_KIMM16: {
2170       bool lost;
2171       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2172       // Convert literal to single precision
2173       FPLiteral.convert(*getOpFltSemantics(OpTy),
2174                         APFloat::rmNearestTiesToEven, &lost);
2175       // We allow precision lost but not overflow or underflow. This should be
2176       // checked earlier in isLiteralImm()
2177 
2178       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2179       Inst.addOperand(MCOperand::createImm(ImmVal));
2180       setImmKindLiteral();
2181       return;
2182     }
2183     default:
2184       llvm_unreachable("invalid operand size");
2185     }
2186 
2187     return;
2188   }
2189 
2190   // We got int literal token.
2191   // Only sign extend inline immediates.
2192   switch (OpTy) {
2193   case AMDGPU::OPERAND_REG_IMM_INT32:
2194   case AMDGPU::OPERAND_REG_IMM_FP32:
2195   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2196   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2197   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2198   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2199   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2200   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2201   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2202   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2203   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2204   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2205   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2206     if (isSafeTruncation(Val, 32) &&
2207         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2208                                      AsmParser->hasInv2PiInlineImm())) {
2209       Inst.addOperand(MCOperand::createImm(Val));
2210       setImmKindConst();
2211       return;
2212     }
2213 
2214     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2215     setImmKindLiteral();
2216     return;
2217 
2218   case AMDGPU::OPERAND_REG_IMM_INT64:
2219   case AMDGPU::OPERAND_REG_IMM_FP64:
2220   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2221   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2222   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2223     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2224       Inst.addOperand(MCOperand::createImm(Val));
2225       setImmKindConst();
2226       return;
2227     }
2228 
2229     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2230     setImmKindLiteral();
2231     return;
2232 
2233   case AMDGPU::OPERAND_REG_IMM_INT16:
2234   case AMDGPU::OPERAND_REG_IMM_FP16:
2235   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2236   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2237   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2238   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2239   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2240     if (isSafeTruncation(Val, 16) &&
2241         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2242                                      AsmParser->hasInv2PiInlineImm())) {
2243       Inst.addOperand(MCOperand::createImm(Val));
2244       setImmKindConst();
2245       return;
2246     }
2247 
2248     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2249     setImmKindLiteral();
2250     return;
2251 
2252   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2253   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2254   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2255   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2256     assert(isSafeTruncation(Val, 16));
2257     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2258                                         AsmParser->hasInv2PiInlineImm()));
2259 
2260     Inst.addOperand(MCOperand::createImm(Val));
2261     return;
2262   }
2263   case AMDGPU::OPERAND_KIMM32:
2264     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2265     setImmKindNone();
2266     return;
2267   case AMDGPU::OPERAND_KIMM16:
2268     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2269     setImmKindNone();
2270     return;
2271   default:
2272     llvm_unreachable("invalid operand size");
2273   }
2274 }
2275 
2276 template <unsigned Bitwidth>
2277 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2278   APInt Literal(64, Imm.Val);
2279   setImmKindNone();
2280 
2281   if (!Imm.IsFPImm) {
2282     // We got int literal token.
2283     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2284     return;
2285   }
2286 
2287   bool Lost;
2288   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2289   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2290                     APFloat::rmNearestTiesToEven, &Lost);
2291   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2292 }
2293 
2294 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2295   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2296 }
2297 
2298 static bool isInlineValue(unsigned Reg) {
2299   switch (Reg) {
2300   case AMDGPU::SRC_SHARED_BASE:
2301   case AMDGPU::SRC_SHARED_LIMIT:
2302   case AMDGPU::SRC_PRIVATE_BASE:
2303   case AMDGPU::SRC_PRIVATE_LIMIT:
2304   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2305     return true;
2306   case AMDGPU::SRC_VCCZ:
2307   case AMDGPU::SRC_EXECZ:
2308   case AMDGPU::SRC_SCC:
2309     return true;
2310   case AMDGPU::SGPR_NULL:
2311     return true;
2312   default:
2313     return false;
2314   }
2315 }
2316 
2317 bool AMDGPUOperand::isInlineValue() const {
2318   return isRegKind() && ::isInlineValue(getReg());
2319 }
2320 
2321 //===----------------------------------------------------------------------===//
2322 // AsmParser
2323 //===----------------------------------------------------------------------===//
2324 
2325 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2326   if (Is == IS_VGPR) {
2327     switch (RegWidth) {
2328       default: return -1;
2329       case 32:
2330         return AMDGPU::VGPR_32RegClassID;
2331       case 64:
2332         return AMDGPU::VReg_64RegClassID;
2333       case 96:
2334         return AMDGPU::VReg_96RegClassID;
2335       case 128:
2336         return AMDGPU::VReg_128RegClassID;
2337       case 160:
2338         return AMDGPU::VReg_160RegClassID;
2339       case 192:
2340         return AMDGPU::VReg_192RegClassID;
2341       case 224:
2342         return AMDGPU::VReg_224RegClassID;
2343       case 256:
2344         return AMDGPU::VReg_256RegClassID;
2345       case 512:
2346         return AMDGPU::VReg_512RegClassID;
2347       case 1024:
2348         return AMDGPU::VReg_1024RegClassID;
2349     }
2350   } else if (Is == IS_TTMP) {
2351     switch (RegWidth) {
2352       default: return -1;
2353       case 32:
2354         return AMDGPU::TTMP_32RegClassID;
2355       case 64:
2356         return AMDGPU::TTMP_64RegClassID;
2357       case 128:
2358         return AMDGPU::TTMP_128RegClassID;
2359       case 256:
2360         return AMDGPU::TTMP_256RegClassID;
2361       case 512:
2362         return AMDGPU::TTMP_512RegClassID;
2363     }
2364   } else if (Is == IS_SGPR) {
2365     switch (RegWidth) {
2366       default: return -1;
2367       case 32:
2368         return AMDGPU::SGPR_32RegClassID;
2369       case 64:
2370         return AMDGPU::SGPR_64RegClassID;
2371       case 96:
2372         return AMDGPU::SGPR_96RegClassID;
2373       case 128:
2374         return AMDGPU::SGPR_128RegClassID;
2375       case 160:
2376         return AMDGPU::SGPR_160RegClassID;
2377       case 192:
2378         return AMDGPU::SGPR_192RegClassID;
2379       case 224:
2380         return AMDGPU::SGPR_224RegClassID;
2381       case 256:
2382         return AMDGPU::SGPR_256RegClassID;
2383       case 512:
2384         return AMDGPU::SGPR_512RegClassID;
2385     }
2386   } else if (Is == IS_AGPR) {
2387     switch (RegWidth) {
2388       default: return -1;
2389       case 32:
2390         return AMDGPU::AGPR_32RegClassID;
2391       case 64:
2392         return AMDGPU::AReg_64RegClassID;
2393       case 96:
2394         return AMDGPU::AReg_96RegClassID;
2395       case 128:
2396         return AMDGPU::AReg_128RegClassID;
2397       case 160:
2398         return AMDGPU::AReg_160RegClassID;
2399       case 192:
2400         return AMDGPU::AReg_192RegClassID;
2401       case 224:
2402         return AMDGPU::AReg_224RegClassID;
2403       case 256:
2404         return AMDGPU::AReg_256RegClassID;
2405       case 512:
2406         return AMDGPU::AReg_512RegClassID;
2407       case 1024:
2408         return AMDGPU::AReg_1024RegClassID;
2409     }
2410   }
2411   return -1;
2412 }
2413 
2414 static unsigned getSpecialRegForName(StringRef RegName) {
2415   return StringSwitch<unsigned>(RegName)
2416     .Case("exec", AMDGPU::EXEC)
2417     .Case("vcc", AMDGPU::VCC)
2418     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2419     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2420     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2421     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2422     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2423     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2424     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2425     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2426     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2427     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2428     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2429     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2430     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2431     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2432     .Case("m0", AMDGPU::M0)
2433     .Case("vccz", AMDGPU::SRC_VCCZ)
2434     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2435     .Case("execz", AMDGPU::SRC_EXECZ)
2436     .Case("src_execz", AMDGPU::SRC_EXECZ)
2437     .Case("scc", AMDGPU::SRC_SCC)
2438     .Case("src_scc", AMDGPU::SRC_SCC)
2439     .Case("tba", AMDGPU::TBA)
2440     .Case("tma", AMDGPU::TMA)
2441     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2442     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2443     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2444     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2445     .Case("vcc_lo", AMDGPU::VCC_LO)
2446     .Case("vcc_hi", AMDGPU::VCC_HI)
2447     .Case("exec_lo", AMDGPU::EXEC_LO)
2448     .Case("exec_hi", AMDGPU::EXEC_HI)
2449     .Case("tma_lo", AMDGPU::TMA_LO)
2450     .Case("tma_hi", AMDGPU::TMA_HI)
2451     .Case("tba_lo", AMDGPU::TBA_LO)
2452     .Case("tba_hi", AMDGPU::TBA_HI)
2453     .Case("pc", AMDGPU::PC_REG)
2454     .Case("null", AMDGPU::SGPR_NULL)
2455     .Default(AMDGPU::NoRegister);
2456 }
2457 
2458 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2459                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2460   auto R = parseRegister();
2461   if (!R) return true;
2462   assert(R->isReg());
2463   RegNo = R->getReg();
2464   StartLoc = R->getStartLoc();
2465   EndLoc = R->getEndLoc();
2466   return false;
2467 }
2468 
2469 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2470                                     SMLoc &EndLoc) {
2471   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2472 }
2473 
2474 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2475                                                        SMLoc &StartLoc,
2476                                                        SMLoc &EndLoc) {
2477   bool Result =
2478       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2479   bool PendingErrors = getParser().hasPendingError();
2480   getParser().clearPendingErrors();
2481   if (PendingErrors)
2482     return MatchOperand_ParseFail;
2483   if (Result)
2484     return MatchOperand_NoMatch;
2485   return MatchOperand_Success;
2486 }
2487 
2488 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2489                                             RegisterKind RegKind, unsigned Reg1,
2490                                             SMLoc Loc) {
2491   switch (RegKind) {
2492   case IS_SPECIAL:
2493     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2494       Reg = AMDGPU::EXEC;
2495       RegWidth = 64;
2496       return true;
2497     }
2498     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2499       Reg = AMDGPU::FLAT_SCR;
2500       RegWidth = 64;
2501       return true;
2502     }
2503     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2504       Reg = AMDGPU::XNACK_MASK;
2505       RegWidth = 64;
2506       return true;
2507     }
2508     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2509       Reg = AMDGPU::VCC;
2510       RegWidth = 64;
2511       return true;
2512     }
2513     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2514       Reg = AMDGPU::TBA;
2515       RegWidth = 64;
2516       return true;
2517     }
2518     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2519       Reg = AMDGPU::TMA;
2520       RegWidth = 64;
2521       return true;
2522     }
2523     Error(Loc, "register does not fit in the list");
2524     return false;
2525   case IS_VGPR:
2526   case IS_SGPR:
2527   case IS_AGPR:
2528   case IS_TTMP:
2529     if (Reg1 != Reg + RegWidth / 32) {
2530       Error(Loc, "registers in a list must have consecutive indices");
2531       return false;
2532     }
2533     RegWidth += 32;
2534     return true;
2535   default:
2536     llvm_unreachable("unexpected register kind");
2537   }
2538 }
2539 
2540 struct RegInfo {
2541   StringLiteral Name;
2542   RegisterKind Kind;
2543 };
2544 
2545 static constexpr RegInfo RegularRegisters[] = {
2546   {{"v"},    IS_VGPR},
2547   {{"s"},    IS_SGPR},
2548   {{"ttmp"}, IS_TTMP},
2549   {{"acc"},  IS_AGPR},
2550   {{"a"},    IS_AGPR},
2551 };
2552 
2553 static bool isRegularReg(RegisterKind Kind) {
2554   return Kind == IS_VGPR ||
2555          Kind == IS_SGPR ||
2556          Kind == IS_TTMP ||
2557          Kind == IS_AGPR;
2558 }
2559 
2560 static const RegInfo* getRegularRegInfo(StringRef Str) {
2561   for (const RegInfo &Reg : RegularRegisters)
2562     if (Str.startswith(Reg.Name))
2563       return &Reg;
2564   return nullptr;
2565 }
2566 
2567 static bool getRegNum(StringRef Str, unsigned& Num) {
2568   return !Str.getAsInteger(10, Num);
2569 }
2570 
2571 bool
2572 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2573                             const AsmToken &NextToken) const {
2574 
2575   // A list of consecutive registers: [s0,s1,s2,s3]
2576   if (Token.is(AsmToken::LBrac))
2577     return true;
2578 
2579   if (!Token.is(AsmToken::Identifier))
2580     return false;
2581 
2582   // A single register like s0 or a range of registers like s[0:1]
2583 
2584   StringRef Str = Token.getString();
2585   const RegInfo *Reg = getRegularRegInfo(Str);
2586   if (Reg) {
2587     StringRef RegName = Reg->Name;
2588     StringRef RegSuffix = Str.substr(RegName.size());
2589     if (!RegSuffix.empty()) {
2590       unsigned Num;
2591       // A single register with an index: rXX
2592       if (getRegNum(RegSuffix, Num))
2593         return true;
2594     } else {
2595       // A range of registers: r[XX:YY].
2596       if (NextToken.is(AsmToken::LBrac))
2597         return true;
2598     }
2599   }
2600 
2601   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2602 }
2603 
2604 bool
2605 AMDGPUAsmParser::isRegister()
2606 {
2607   return isRegister(getToken(), peekToken());
2608 }
2609 
2610 unsigned
2611 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2612                                unsigned RegNum,
2613                                unsigned RegWidth,
2614                                SMLoc Loc) {
2615 
2616   assert(isRegularReg(RegKind));
2617 
2618   unsigned AlignSize = 1;
2619   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2620     // SGPR and TTMP registers must be aligned.
2621     // Max required alignment is 4 dwords.
2622     AlignSize = std::min(RegWidth / 32, 4u);
2623   }
2624 
2625   if (RegNum % AlignSize != 0) {
2626     Error(Loc, "invalid register alignment");
2627     return AMDGPU::NoRegister;
2628   }
2629 
2630   unsigned RegIdx = RegNum / AlignSize;
2631   int RCID = getRegClass(RegKind, RegWidth);
2632   if (RCID == -1) {
2633     Error(Loc, "invalid or unsupported register size");
2634     return AMDGPU::NoRegister;
2635   }
2636 
2637   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2638   const MCRegisterClass RC = TRI->getRegClass(RCID);
2639   if (RegIdx >= RC.getNumRegs()) {
2640     Error(Loc, "register index is out of range");
2641     return AMDGPU::NoRegister;
2642   }
2643 
2644   return RC.getRegister(RegIdx);
2645 }
2646 
2647 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2648   int64_t RegLo, RegHi;
2649   if (!skipToken(AsmToken::LBrac, "missing register index"))
2650     return false;
2651 
2652   SMLoc FirstIdxLoc = getLoc();
2653   SMLoc SecondIdxLoc;
2654 
2655   if (!parseExpr(RegLo))
2656     return false;
2657 
2658   if (trySkipToken(AsmToken::Colon)) {
2659     SecondIdxLoc = getLoc();
2660     if (!parseExpr(RegHi))
2661       return false;
2662   } else {
2663     RegHi = RegLo;
2664   }
2665 
2666   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2667     return false;
2668 
2669   if (!isUInt<32>(RegLo)) {
2670     Error(FirstIdxLoc, "invalid register index");
2671     return false;
2672   }
2673 
2674   if (!isUInt<32>(RegHi)) {
2675     Error(SecondIdxLoc, "invalid register index");
2676     return false;
2677   }
2678 
2679   if (RegLo > RegHi) {
2680     Error(FirstIdxLoc, "first register index should not exceed second index");
2681     return false;
2682   }
2683 
2684   Num = static_cast<unsigned>(RegLo);
2685   RegWidth = 32 * ((RegHi - RegLo) + 1);
2686   return true;
2687 }
2688 
2689 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2690                                           unsigned &RegNum, unsigned &RegWidth,
2691                                           SmallVectorImpl<AsmToken> &Tokens) {
2692   assert(isToken(AsmToken::Identifier));
2693   unsigned Reg = getSpecialRegForName(getTokenStr());
2694   if (Reg) {
2695     RegNum = 0;
2696     RegWidth = 32;
2697     RegKind = IS_SPECIAL;
2698     Tokens.push_back(getToken());
2699     lex(); // skip register name
2700   }
2701   return Reg;
2702 }
2703 
2704 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2705                                           unsigned &RegNum, unsigned &RegWidth,
2706                                           SmallVectorImpl<AsmToken> &Tokens) {
2707   assert(isToken(AsmToken::Identifier));
2708   StringRef RegName = getTokenStr();
2709   auto Loc = getLoc();
2710 
2711   const RegInfo *RI = getRegularRegInfo(RegName);
2712   if (!RI) {
2713     Error(Loc, "invalid register name");
2714     return AMDGPU::NoRegister;
2715   }
2716 
2717   Tokens.push_back(getToken());
2718   lex(); // skip register name
2719 
2720   RegKind = RI->Kind;
2721   StringRef RegSuffix = RegName.substr(RI->Name.size());
2722   if (!RegSuffix.empty()) {
2723     // Single 32-bit register: vXX.
2724     if (!getRegNum(RegSuffix, RegNum)) {
2725       Error(Loc, "invalid register index");
2726       return AMDGPU::NoRegister;
2727     }
2728     RegWidth = 32;
2729   } else {
2730     // Range of registers: v[XX:YY]. ":YY" is optional.
2731     if (!ParseRegRange(RegNum, RegWidth))
2732       return AMDGPU::NoRegister;
2733   }
2734 
2735   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2736 }
2737 
2738 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2739                                        unsigned &RegWidth,
2740                                        SmallVectorImpl<AsmToken> &Tokens) {
2741   unsigned Reg = AMDGPU::NoRegister;
2742   auto ListLoc = getLoc();
2743 
2744   if (!skipToken(AsmToken::LBrac,
2745                  "expected a register or a list of registers")) {
2746     return AMDGPU::NoRegister;
2747   }
2748 
2749   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2750 
2751   auto Loc = getLoc();
2752   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2753     return AMDGPU::NoRegister;
2754   if (RegWidth != 32) {
2755     Error(Loc, "expected a single 32-bit register");
2756     return AMDGPU::NoRegister;
2757   }
2758 
2759   for (; trySkipToken(AsmToken::Comma); ) {
2760     RegisterKind NextRegKind;
2761     unsigned NextReg, NextRegNum, NextRegWidth;
2762     Loc = getLoc();
2763 
2764     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2765                              NextRegNum, NextRegWidth,
2766                              Tokens)) {
2767       return AMDGPU::NoRegister;
2768     }
2769     if (NextRegWidth != 32) {
2770       Error(Loc, "expected a single 32-bit register");
2771       return AMDGPU::NoRegister;
2772     }
2773     if (NextRegKind != RegKind) {
2774       Error(Loc, "registers in a list must be of the same kind");
2775       return AMDGPU::NoRegister;
2776     }
2777     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2778       return AMDGPU::NoRegister;
2779   }
2780 
2781   if (!skipToken(AsmToken::RBrac,
2782                  "expected a comma or a closing square bracket")) {
2783     return AMDGPU::NoRegister;
2784   }
2785 
2786   if (isRegularReg(RegKind))
2787     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2788 
2789   return Reg;
2790 }
2791 
2792 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2793                                           unsigned &RegNum, unsigned &RegWidth,
2794                                           SmallVectorImpl<AsmToken> &Tokens) {
2795   auto Loc = getLoc();
2796   Reg = AMDGPU::NoRegister;
2797 
2798   if (isToken(AsmToken::Identifier)) {
2799     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2800     if (Reg == AMDGPU::NoRegister)
2801       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2802   } else {
2803     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2804   }
2805 
2806   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2807   if (Reg == AMDGPU::NoRegister) {
2808     assert(Parser.hasPendingError());
2809     return false;
2810   }
2811 
2812   if (!subtargetHasRegister(*TRI, Reg)) {
2813     if (Reg == AMDGPU::SGPR_NULL) {
2814       Error(Loc, "'null' operand is not supported on this GPU");
2815     } else {
2816       Error(Loc, "register not available on this GPU");
2817     }
2818     return false;
2819   }
2820 
2821   return true;
2822 }
2823 
2824 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2825                                           unsigned &RegNum, unsigned &RegWidth,
2826                                           bool RestoreOnFailure /*=false*/) {
2827   Reg = AMDGPU::NoRegister;
2828 
2829   SmallVector<AsmToken, 1> Tokens;
2830   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2831     if (RestoreOnFailure) {
2832       while (!Tokens.empty()) {
2833         getLexer().UnLex(Tokens.pop_back_val());
2834       }
2835     }
2836     return true;
2837   }
2838   return false;
2839 }
2840 
2841 Optional<StringRef>
2842 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2843   switch (RegKind) {
2844   case IS_VGPR:
2845     return StringRef(".amdgcn.next_free_vgpr");
2846   case IS_SGPR:
2847     return StringRef(".amdgcn.next_free_sgpr");
2848   default:
2849     return None;
2850   }
2851 }
2852 
2853 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2854   auto SymbolName = getGprCountSymbolName(RegKind);
2855   assert(SymbolName && "initializing invalid register kind");
2856   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2857   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2858 }
2859 
2860 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2861                                             unsigned DwordRegIndex,
2862                                             unsigned RegWidth) {
2863   // Symbols are only defined for GCN targets
2864   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2865     return true;
2866 
2867   auto SymbolName = getGprCountSymbolName(RegKind);
2868   if (!SymbolName)
2869     return true;
2870   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2871 
2872   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2873   int64_t OldCount;
2874 
2875   if (!Sym->isVariable())
2876     return !Error(getLoc(),
2877                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2878   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2879     return !Error(
2880         getLoc(),
2881         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2882 
2883   if (OldCount <= NewMax)
2884     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2885 
2886   return true;
2887 }
2888 
2889 std::unique_ptr<AMDGPUOperand>
2890 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2891   const auto &Tok = getToken();
2892   SMLoc StartLoc = Tok.getLoc();
2893   SMLoc EndLoc = Tok.getEndLoc();
2894   RegisterKind RegKind;
2895   unsigned Reg, RegNum, RegWidth;
2896 
2897   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2898     return nullptr;
2899   }
2900   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2901     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2902       return nullptr;
2903   } else
2904     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2905   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2906 }
2907 
2908 OperandMatchResultTy
2909 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2910   // TODO: add syntactic sugar for 1/(2*PI)
2911 
2912   assert(!isRegister());
2913   assert(!isModifier());
2914 
2915   const auto& Tok = getToken();
2916   const auto& NextTok = peekToken();
2917   bool IsReal = Tok.is(AsmToken::Real);
2918   SMLoc S = getLoc();
2919   bool Negate = false;
2920 
2921   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2922     lex();
2923     IsReal = true;
2924     Negate = true;
2925   }
2926 
2927   if (IsReal) {
2928     // Floating-point expressions are not supported.
2929     // Can only allow floating-point literals with an
2930     // optional sign.
2931 
2932     StringRef Num = getTokenStr();
2933     lex();
2934 
2935     APFloat RealVal(APFloat::IEEEdouble());
2936     auto roundMode = APFloat::rmNearestTiesToEven;
2937     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2938       return MatchOperand_ParseFail;
2939     }
2940     if (Negate)
2941       RealVal.changeSign();
2942 
2943     Operands.push_back(
2944       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2945                                AMDGPUOperand::ImmTyNone, true));
2946 
2947     return MatchOperand_Success;
2948 
2949   } else {
2950     int64_t IntVal;
2951     const MCExpr *Expr;
2952     SMLoc S = getLoc();
2953 
2954     if (HasSP3AbsModifier) {
2955       // This is a workaround for handling expressions
2956       // as arguments of SP3 'abs' modifier, for example:
2957       //     |1.0|
2958       //     |-1|
2959       //     |1+x|
2960       // This syntax is not compatible with syntax of standard
2961       // MC expressions (due to the trailing '|').
2962       SMLoc EndLoc;
2963       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2964         return MatchOperand_ParseFail;
2965     } else {
2966       if (Parser.parseExpression(Expr))
2967         return MatchOperand_ParseFail;
2968     }
2969 
2970     if (Expr->evaluateAsAbsolute(IntVal)) {
2971       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2972     } else {
2973       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2974     }
2975 
2976     return MatchOperand_Success;
2977   }
2978 
2979   return MatchOperand_NoMatch;
2980 }
2981 
2982 OperandMatchResultTy
2983 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2984   if (!isRegister())
2985     return MatchOperand_NoMatch;
2986 
2987   if (auto R = parseRegister()) {
2988     assert(R->isReg());
2989     Operands.push_back(std::move(R));
2990     return MatchOperand_Success;
2991   }
2992   return MatchOperand_ParseFail;
2993 }
2994 
2995 OperandMatchResultTy
2996 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2997   auto res = parseReg(Operands);
2998   if (res != MatchOperand_NoMatch) {
2999     return res;
3000   } else if (isModifier()) {
3001     return MatchOperand_NoMatch;
3002   } else {
3003     return parseImm(Operands, HasSP3AbsMod);
3004   }
3005 }
3006 
3007 bool
3008 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3009   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3010     const auto &str = Token.getString();
3011     return str == "abs" || str == "neg" || str == "sext";
3012   }
3013   return false;
3014 }
3015 
3016 bool
3017 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3018   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3019 }
3020 
3021 bool
3022 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3023   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3024 }
3025 
3026 bool
3027 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3028   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3029 }
3030 
3031 // Check if this is an operand modifier or an opcode modifier
3032 // which may look like an expression but it is not. We should
3033 // avoid parsing these modifiers as expressions. Currently
3034 // recognized sequences are:
3035 //   |...|
3036 //   abs(...)
3037 //   neg(...)
3038 //   sext(...)
3039 //   -reg
3040 //   -|...|
3041 //   -abs(...)
3042 //   name:...
3043 // Note that simple opcode modifiers like 'gds' may be parsed as
3044 // expressions; this is a special case. See getExpressionAsToken.
3045 //
3046 bool
3047 AMDGPUAsmParser::isModifier() {
3048 
3049   AsmToken Tok = getToken();
3050   AsmToken NextToken[2];
3051   peekTokens(NextToken);
3052 
3053   return isOperandModifier(Tok, NextToken[0]) ||
3054          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3055          isOpcodeModifierWithVal(Tok, NextToken[0]);
3056 }
3057 
3058 // Check if the current token is an SP3 'neg' modifier.
3059 // Currently this modifier is allowed in the following context:
3060 //
3061 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3062 // 2. Before an 'abs' modifier: -abs(...)
3063 // 3. Before an SP3 'abs' modifier: -|...|
3064 //
3065 // In all other cases "-" is handled as a part
3066 // of an expression that follows the sign.
3067 //
3068 // Note: When "-" is followed by an integer literal,
3069 // this is interpreted as integer negation rather
3070 // than a floating-point NEG modifier applied to N.
3071 // Beside being contr-intuitive, such use of floating-point
3072 // NEG modifier would have resulted in different meaning
3073 // of integer literals used with VOP1/2/C and VOP3,
3074 // for example:
3075 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3076 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3077 // Negative fp literals with preceding "-" are
3078 // handled likewise for uniformity
3079 //
3080 bool
3081 AMDGPUAsmParser::parseSP3NegModifier() {
3082 
3083   AsmToken NextToken[2];
3084   peekTokens(NextToken);
3085 
3086   if (isToken(AsmToken::Minus) &&
3087       (isRegister(NextToken[0], NextToken[1]) ||
3088        NextToken[0].is(AsmToken::Pipe) ||
3089        isId(NextToken[0], "abs"))) {
3090     lex();
3091     return true;
3092   }
3093 
3094   return false;
3095 }
3096 
3097 OperandMatchResultTy
3098 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3099                                               bool AllowImm) {
3100   bool Neg, SP3Neg;
3101   bool Abs, SP3Abs;
3102   SMLoc Loc;
3103 
3104   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3105   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3106     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3107     return MatchOperand_ParseFail;
3108   }
3109 
3110   SP3Neg = parseSP3NegModifier();
3111 
3112   Loc = getLoc();
3113   Neg = trySkipId("neg");
3114   if (Neg && SP3Neg) {
3115     Error(Loc, "expected register or immediate");
3116     return MatchOperand_ParseFail;
3117   }
3118   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3119     return MatchOperand_ParseFail;
3120 
3121   Abs = trySkipId("abs");
3122   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3123     return MatchOperand_ParseFail;
3124 
3125   Loc = getLoc();
3126   SP3Abs = trySkipToken(AsmToken::Pipe);
3127   if (Abs && SP3Abs) {
3128     Error(Loc, "expected register or immediate");
3129     return MatchOperand_ParseFail;
3130   }
3131 
3132   OperandMatchResultTy Res;
3133   if (AllowImm) {
3134     Res = parseRegOrImm(Operands, SP3Abs);
3135   } else {
3136     Res = parseReg(Operands);
3137   }
3138   if (Res != MatchOperand_Success) {
3139     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3140   }
3141 
3142   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3143     return MatchOperand_ParseFail;
3144   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3145     return MatchOperand_ParseFail;
3146   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3147     return MatchOperand_ParseFail;
3148 
3149   AMDGPUOperand::Modifiers Mods;
3150   Mods.Abs = Abs || SP3Abs;
3151   Mods.Neg = Neg || SP3Neg;
3152 
3153   if (Mods.hasFPModifiers()) {
3154     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3155     if (Op.isExpr()) {
3156       Error(Op.getStartLoc(), "expected an absolute expression");
3157       return MatchOperand_ParseFail;
3158     }
3159     Op.setModifiers(Mods);
3160   }
3161   return MatchOperand_Success;
3162 }
3163 
3164 OperandMatchResultTy
3165 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3166                                                bool AllowImm) {
3167   bool Sext = trySkipId("sext");
3168   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3169     return MatchOperand_ParseFail;
3170 
3171   OperandMatchResultTy Res;
3172   if (AllowImm) {
3173     Res = parseRegOrImm(Operands);
3174   } else {
3175     Res = parseReg(Operands);
3176   }
3177   if (Res != MatchOperand_Success) {
3178     return Sext? MatchOperand_ParseFail : Res;
3179   }
3180 
3181   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3182     return MatchOperand_ParseFail;
3183 
3184   AMDGPUOperand::Modifiers Mods;
3185   Mods.Sext = Sext;
3186 
3187   if (Mods.hasIntModifiers()) {
3188     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3189     if (Op.isExpr()) {
3190       Error(Op.getStartLoc(), "expected an absolute expression");
3191       return MatchOperand_ParseFail;
3192     }
3193     Op.setModifiers(Mods);
3194   }
3195 
3196   return MatchOperand_Success;
3197 }
3198 
3199 OperandMatchResultTy
3200 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3201   return parseRegOrImmWithFPInputMods(Operands, false);
3202 }
3203 
3204 OperandMatchResultTy
3205 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3206   return parseRegOrImmWithIntInputMods(Operands, false);
3207 }
3208 
3209 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3210   auto Loc = getLoc();
3211   if (trySkipId("off")) {
3212     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3213                                                 AMDGPUOperand::ImmTyOff, false));
3214     return MatchOperand_Success;
3215   }
3216 
3217   if (!isRegister())
3218     return MatchOperand_NoMatch;
3219 
3220   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3221   if (Reg) {
3222     Operands.push_back(std::move(Reg));
3223     return MatchOperand_Success;
3224   }
3225 
3226   return MatchOperand_ParseFail;
3227 
3228 }
3229 
3230 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3231   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3232 
3233   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3234       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3235       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3236       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3237     return Match_InvalidOperand;
3238 
3239   if ((TSFlags & SIInstrFlags::VOP3) &&
3240       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3241       getForcedEncodingSize() != 64)
3242     return Match_PreferE32;
3243 
3244   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3245       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3246     // v_mac_f32/16 allow only dst_sel == DWORD;
3247     auto OpNum =
3248         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3249     const auto &Op = Inst.getOperand(OpNum);
3250     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3251       return Match_InvalidOperand;
3252     }
3253   }
3254 
3255   return Match_Success;
3256 }
3257 
3258 static ArrayRef<unsigned> getAllVariants() {
3259   static const unsigned Variants[] = {
3260     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3261     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3262     AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3263   };
3264 
3265   return makeArrayRef(Variants);
3266 }
3267 
3268 // What asm variants we should check
3269 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3270   if (isForcedDPP() && isForcedVOP3()) {
3271     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3272     return makeArrayRef(Variants);
3273   }
3274   if (getForcedEncodingSize() == 32) {
3275     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3276     return makeArrayRef(Variants);
3277   }
3278 
3279   if (isForcedVOP3()) {
3280     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3281     return makeArrayRef(Variants);
3282   }
3283 
3284   if (isForcedSDWA()) {
3285     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3286                                         AMDGPUAsmVariants::SDWA9};
3287     return makeArrayRef(Variants);
3288   }
3289 
3290   if (isForcedDPP()) {
3291     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3292     return makeArrayRef(Variants);
3293   }
3294 
3295   return getAllVariants();
3296 }
3297 
3298 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3299   if (isForcedDPP() && isForcedVOP3())
3300     return "e64_dpp";
3301 
3302   if (getForcedEncodingSize() == 32)
3303     return "e32";
3304 
3305   if (isForcedVOP3())
3306     return "e64";
3307 
3308   if (isForcedSDWA())
3309     return "sdwa";
3310 
3311   if (isForcedDPP())
3312     return "dpp";
3313 
3314   return "";
3315 }
3316 
3317 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3318   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3319   const unsigned Num = Desc.getNumImplicitUses();
3320   for (unsigned i = 0; i < Num; ++i) {
3321     unsigned Reg = Desc.ImplicitUses[i];
3322     switch (Reg) {
3323     case AMDGPU::FLAT_SCR:
3324     case AMDGPU::VCC:
3325     case AMDGPU::VCC_LO:
3326     case AMDGPU::VCC_HI:
3327     case AMDGPU::M0:
3328       return Reg;
3329     default:
3330       break;
3331     }
3332   }
3333   return AMDGPU::NoRegister;
3334 }
3335 
3336 // NB: This code is correct only when used to check constant
3337 // bus limitations because GFX7 support no f16 inline constants.
3338 // Note that there are no cases when a GFX7 opcode violates
3339 // constant bus limitations due to the use of an f16 constant.
3340 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3341                                        unsigned OpIdx) const {
3342   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3343 
3344   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3345     return false;
3346   }
3347 
3348   const MCOperand &MO = Inst.getOperand(OpIdx);
3349 
3350   int64_t Val = MO.getImm();
3351   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3352 
3353   switch (OpSize) { // expected operand size
3354   case 8:
3355     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3356   case 4:
3357     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3358   case 2: {
3359     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3360     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3361         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3362         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3363       return AMDGPU::isInlinableIntLiteral(Val);
3364 
3365     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3366         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3367         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3368       return AMDGPU::isInlinableIntLiteralV216(Val);
3369 
3370     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3371         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3372         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3373       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3374 
3375     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3376   }
3377   default:
3378     llvm_unreachable("invalid operand size");
3379   }
3380 }
3381 
3382 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3383   if (!isGFX10Plus())
3384     return 1;
3385 
3386   switch (Opcode) {
3387   // 64-bit shift instructions can use only one scalar value input
3388   case AMDGPU::V_LSHLREV_B64_e64:
3389   case AMDGPU::V_LSHLREV_B64_gfx10:
3390   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3391   case AMDGPU::V_LSHRREV_B64_e64:
3392   case AMDGPU::V_LSHRREV_B64_gfx10:
3393   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3394   case AMDGPU::V_ASHRREV_I64_e64:
3395   case AMDGPU::V_ASHRREV_I64_gfx10:
3396   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3397   case AMDGPU::V_LSHL_B64_e64:
3398   case AMDGPU::V_LSHR_B64_e64:
3399   case AMDGPU::V_ASHR_I64_e64:
3400     return 1;
3401   default:
3402     return 2;
3403   }
3404 }
3405 
3406 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3407   const MCOperand &MO = Inst.getOperand(OpIdx);
3408   if (MO.isImm()) {
3409     return !isInlineConstant(Inst, OpIdx);
3410   } else if (MO.isReg()) {
3411     auto Reg = MO.getReg();
3412     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3413     auto PReg = mc2PseudoReg(Reg);
3414     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3415   } else {
3416     return true;
3417   }
3418 }
3419 
3420 bool
3421 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3422                                                 const OperandVector &Operands) {
3423   const unsigned Opcode = Inst.getOpcode();
3424   const MCInstrDesc &Desc = MII.get(Opcode);
3425   unsigned LastSGPR = AMDGPU::NoRegister;
3426   unsigned ConstantBusUseCount = 0;
3427   unsigned NumLiterals = 0;
3428   unsigned LiteralSize;
3429 
3430   if (Desc.TSFlags &
3431       (SIInstrFlags::VOPC |
3432        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3433        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3434        SIInstrFlags::SDWA)) {
3435     // Check special imm operands (used by madmk, etc)
3436     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3437       ++NumLiterals;
3438       LiteralSize = 4;
3439     }
3440 
3441     SmallDenseSet<unsigned> SGPRsUsed;
3442     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3443     if (SGPRUsed != AMDGPU::NoRegister) {
3444       SGPRsUsed.insert(SGPRUsed);
3445       ++ConstantBusUseCount;
3446     }
3447 
3448     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3449     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3450     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3451 
3452     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3453 
3454     for (int OpIdx : OpIndices) {
3455       if (OpIdx == -1) break;
3456 
3457       const MCOperand &MO = Inst.getOperand(OpIdx);
3458       if (usesConstantBus(Inst, OpIdx)) {
3459         if (MO.isReg()) {
3460           LastSGPR = mc2PseudoReg(MO.getReg());
3461           // Pairs of registers with a partial intersections like these
3462           //   s0, s[0:1]
3463           //   flat_scratch_lo, flat_scratch
3464           //   flat_scratch_lo, flat_scratch_hi
3465           // are theoretically valid but they are disabled anyway.
3466           // Note that this code mimics SIInstrInfo::verifyInstruction
3467           if (!SGPRsUsed.count(LastSGPR)) {
3468             SGPRsUsed.insert(LastSGPR);
3469             ++ConstantBusUseCount;
3470           }
3471         } else { // Expression or a literal
3472 
3473           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3474             continue; // special operand like VINTERP attr_chan
3475 
3476           // An instruction may use only one literal.
3477           // This has been validated on the previous step.
3478           // See validateVOPLiteral.
3479           // This literal may be used as more than one operand.
3480           // If all these operands are of the same size,
3481           // this literal counts as one scalar value.
3482           // Otherwise it counts as 2 scalar values.
3483           // See "GFX10 Shader Programming", section 3.6.2.3.
3484 
3485           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3486           if (Size < 4) Size = 4;
3487 
3488           if (NumLiterals == 0) {
3489             NumLiterals = 1;
3490             LiteralSize = Size;
3491           } else if (LiteralSize != Size) {
3492             NumLiterals = 2;
3493           }
3494         }
3495       }
3496     }
3497   }
3498   ConstantBusUseCount += NumLiterals;
3499 
3500   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3501     return true;
3502 
3503   SMLoc LitLoc = getLitLoc(Operands);
3504   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3505   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3506   Error(Loc, "invalid operand (violates constant bus restrictions)");
3507   return false;
3508 }
3509 
3510 bool
3511 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3512                                                  const OperandVector &Operands) {
3513   const unsigned Opcode = Inst.getOpcode();
3514   const MCInstrDesc &Desc = MII.get(Opcode);
3515 
3516   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3517   if (DstIdx == -1 ||
3518       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3519     return true;
3520   }
3521 
3522   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3523 
3524   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3525   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3526   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3527 
3528   assert(DstIdx != -1);
3529   const MCOperand &Dst = Inst.getOperand(DstIdx);
3530   assert(Dst.isReg());
3531 
3532   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3533 
3534   for (int SrcIdx : SrcIndices) {
3535     if (SrcIdx == -1) break;
3536     const MCOperand &Src = Inst.getOperand(SrcIdx);
3537     if (Src.isReg()) {
3538       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3539         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3540         Error(getRegLoc(SrcReg, Operands),
3541           "destination must be different than all sources");
3542         return false;
3543       }
3544     }
3545   }
3546 
3547   return true;
3548 }
3549 
3550 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3551 
3552   const unsigned Opc = Inst.getOpcode();
3553   const MCInstrDesc &Desc = MII.get(Opc);
3554 
3555   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3556     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3557     assert(ClampIdx != -1);
3558     return Inst.getOperand(ClampIdx).getImm() == 0;
3559   }
3560 
3561   return true;
3562 }
3563 
3564 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3565 
3566   const unsigned Opc = Inst.getOpcode();
3567   const MCInstrDesc &Desc = MII.get(Opc);
3568 
3569   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3570     return None;
3571 
3572   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3573   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3574   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3575 
3576   assert(VDataIdx != -1);
3577 
3578   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3579     return None;
3580 
3581   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3582   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3583   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3584   if (DMask == 0)
3585     DMask = 1;
3586 
3587   bool isPackedD16 = false;
3588   unsigned DataSize =
3589     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3590   if (hasPackedD16()) {
3591     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3592     isPackedD16 = D16Idx >= 0;
3593     if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3594       DataSize = (DataSize + 1) / 2;
3595   }
3596 
3597   if ((VDataSize / 4) == DataSize + TFESize)
3598     return None;
3599 
3600   return StringRef(isPackedD16
3601                        ? "image data size does not match dmask, d16 and tfe"
3602                        : "image data size does not match dmask and tfe");
3603 }
3604 
3605 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3606   const unsigned Opc = Inst.getOpcode();
3607   const MCInstrDesc &Desc = MII.get(Opc);
3608 
3609   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3610     return true;
3611 
3612   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3613 
3614   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3615       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3616   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3617   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3618   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3619   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3620 
3621   assert(VAddr0Idx != -1);
3622   assert(SrsrcIdx != -1);
3623   assert(SrsrcIdx > VAddr0Idx);
3624 
3625   if (DimIdx == -1)
3626     return true; // intersect_ray
3627 
3628   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3629   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3630   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3631   unsigned ActualAddrSize =
3632       IsNSA ? SrsrcIdx - VAddr0Idx
3633             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3634   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3635 
3636   unsigned ExpectedAddrSize =
3637       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3638 
3639   if (!IsNSA) {
3640     if (ExpectedAddrSize > 8)
3641       ExpectedAddrSize = 16;
3642 
3643     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3644     // This provides backward compatibility for assembly created
3645     // before 160b/192b/224b types were directly supported.
3646     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3647       return true;
3648   }
3649 
3650   return ActualAddrSize == ExpectedAddrSize;
3651 }
3652 
3653 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3654 
3655   const unsigned Opc = Inst.getOpcode();
3656   const MCInstrDesc &Desc = MII.get(Opc);
3657 
3658   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3659     return true;
3660   if (!Desc.mayLoad() || !Desc.mayStore())
3661     return true; // Not atomic
3662 
3663   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3664   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3665 
3666   // This is an incomplete check because image_atomic_cmpswap
3667   // may only use 0x3 and 0xf while other atomic operations
3668   // may use 0x1 and 0x3. However these limitations are
3669   // verified when we check that dmask matches dst size.
3670   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3671 }
3672 
3673 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3674 
3675   const unsigned Opc = Inst.getOpcode();
3676   const MCInstrDesc &Desc = MII.get(Opc);
3677 
3678   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3679     return true;
3680 
3681   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3682   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3683 
3684   // GATHER4 instructions use dmask in a different fashion compared to
3685   // other MIMG instructions. The only useful DMASK values are
3686   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3687   // (red,red,red,red) etc.) The ISA document doesn't mention
3688   // this.
3689   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3690 }
3691 
3692 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3693   const unsigned Opc = Inst.getOpcode();
3694   const MCInstrDesc &Desc = MII.get(Opc);
3695 
3696   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3697     return true;
3698 
3699   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3700   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3701       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3702 
3703   if (!BaseOpcode->MSAA)
3704     return true;
3705 
3706   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3707   assert(DimIdx != -1);
3708 
3709   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3710   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3711 
3712   return DimInfo->MSAA;
3713 }
3714 
3715 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3716 {
3717   switch (Opcode) {
3718   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3719   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3720   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3721     return true;
3722   default:
3723     return false;
3724   }
3725 }
3726 
3727 // movrels* opcodes should only allow VGPRS as src0.
3728 // This is specified in .td description for vop1/vop3,
3729 // but sdwa is handled differently. See isSDWAOperand.
3730 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3731                                       const OperandVector &Operands) {
3732 
3733   const unsigned Opc = Inst.getOpcode();
3734   const MCInstrDesc &Desc = MII.get(Opc);
3735 
3736   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3737     return true;
3738 
3739   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3740   assert(Src0Idx != -1);
3741 
3742   SMLoc ErrLoc;
3743   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3744   if (Src0.isReg()) {
3745     auto Reg = mc2PseudoReg(Src0.getReg());
3746     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3747     if (!isSGPR(Reg, TRI))
3748       return true;
3749     ErrLoc = getRegLoc(Reg, Operands);
3750   } else {
3751     ErrLoc = getConstLoc(Operands);
3752   }
3753 
3754   Error(ErrLoc, "source operand must be a VGPR");
3755   return false;
3756 }
3757 
3758 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3759                                           const OperandVector &Operands) {
3760 
3761   const unsigned Opc = Inst.getOpcode();
3762 
3763   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3764     return true;
3765 
3766   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3767   assert(Src0Idx != -1);
3768 
3769   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3770   if (!Src0.isReg())
3771     return true;
3772 
3773   auto Reg = mc2PseudoReg(Src0.getReg());
3774   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3775   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3776     Error(getRegLoc(Reg, Operands),
3777           "source operand must be either a VGPR or an inline constant");
3778     return false;
3779   }
3780 
3781   return true;
3782 }
3783 
3784 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3785                                    const OperandVector &Operands) {
3786   const unsigned Opc = Inst.getOpcode();
3787   const MCInstrDesc &Desc = MII.get(Opc);
3788 
3789   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3790     return true;
3791 
3792   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3793   if (Src2Idx == -1)
3794     return true;
3795 
3796   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3797   if (!Src2.isReg())
3798     return true;
3799 
3800   MCRegister Src2Reg = Src2.getReg();
3801   MCRegister DstReg = Inst.getOperand(0).getReg();
3802   if (Src2Reg == DstReg)
3803     return true;
3804 
3805   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3806   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3807     return true;
3808 
3809   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3810     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3811           "source 2 operand must not partially overlap with dst");
3812     return false;
3813   }
3814 
3815   return true;
3816 }
3817 
3818 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3819   switch (Inst.getOpcode()) {
3820   default:
3821     return true;
3822   case V_DIV_SCALE_F32_gfx6_gfx7:
3823   case V_DIV_SCALE_F32_vi:
3824   case V_DIV_SCALE_F32_gfx10:
3825   case V_DIV_SCALE_F64_gfx6_gfx7:
3826   case V_DIV_SCALE_F64_vi:
3827   case V_DIV_SCALE_F64_gfx10:
3828     break;
3829   }
3830 
3831   // TODO: Check that src0 = src1 or src2.
3832 
3833   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3834                     AMDGPU::OpName::src2_modifiers,
3835                     AMDGPU::OpName::src2_modifiers}) {
3836     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3837             .getImm() &
3838         SISrcMods::ABS) {
3839       return false;
3840     }
3841   }
3842 
3843   return true;
3844 }
3845 
3846 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3847 
3848   const unsigned Opc = Inst.getOpcode();
3849   const MCInstrDesc &Desc = MII.get(Opc);
3850 
3851   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3852     return true;
3853 
3854   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3855   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3856     if (isCI() || isSI())
3857       return false;
3858   }
3859 
3860   return true;
3861 }
3862 
3863 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3864   const unsigned Opc = Inst.getOpcode();
3865   const MCInstrDesc &Desc = MII.get(Opc);
3866 
3867   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3868     return true;
3869 
3870   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3871   if (DimIdx < 0)
3872     return true;
3873 
3874   long Imm = Inst.getOperand(DimIdx).getImm();
3875   if (Imm < 0 || Imm >= 8)
3876     return false;
3877 
3878   return true;
3879 }
3880 
3881 static bool IsRevOpcode(const unsigned Opcode)
3882 {
3883   switch (Opcode) {
3884   case AMDGPU::V_SUBREV_F32_e32:
3885   case AMDGPU::V_SUBREV_F32_e64:
3886   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3887   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3888   case AMDGPU::V_SUBREV_F32_e32_vi:
3889   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3890   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3891   case AMDGPU::V_SUBREV_F32_e64_vi:
3892 
3893   case AMDGPU::V_SUBREV_CO_U32_e32:
3894   case AMDGPU::V_SUBREV_CO_U32_e64:
3895   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3896   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3897 
3898   case AMDGPU::V_SUBBREV_U32_e32:
3899   case AMDGPU::V_SUBBREV_U32_e64:
3900   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3901   case AMDGPU::V_SUBBREV_U32_e32_vi:
3902   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3903   case AMDGPU::V_SUBBREV_U32_e64_vi:
3904 
3905   case AMDGPU::V_SUBREV_U32_e32:
3906   case AMDGPU::V_SUBREV_U32_e64:
3907   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3908   case AMDGPU::V_SUBREV_U32_e32_vi:
3909   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3910   case AMDGPU::V_SUBREV_U32_e64_vi:
3911 
3912   case AMDGPU::V_SUBREV_F16_e32:
3913   case AMDGPU::V_SUBREV_F16_e64:
3914   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3915   case AMDGPU::V_SUBREV_F16_e32_vi:
3916   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3917   case AMDGPU::V_SUBREV_F16_e64_vi:
3918 
3919   case AMDGPU::V_SUBREV_U16_e32:
3920   case AMDGPU::V_SUBREV_U16_e64:
3921   case AMDGPU::V_SUBREV_U16_e32_vi:
3922   case AMDGPU::V_SUBREV_U16_e64_vi:
3923 
3924   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3925   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3926   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3927 
3928   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3929   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3930 
3931   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3932   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3933 
3934   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3935   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3936 
3937   case AMDGPU::V_LSHRREV_B32_e32:
3938   case AMDGPU::V_LSHRREV_B32_e64:
3939   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3940   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3941   case AMDGPU::V_LSHRREV_B32_e32_vi:
3942   case AMDGPU::V_LSHRREV_B32_e64_vi:
3943   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3944   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3945 
3946   case AMDGPU::V_ASHRREV_I32_e32:
3947   case AMDGPU::V_ASHRREV_I32_e64:
3948   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3949   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3950   case AMDGPU::V_ASHRREV_I32_e32_vi:
3951   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3952   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3953   case AMDGPU::V_ASHRREV_I32_e64_vi:
3954 
3955   case AMDGPU::V_LSHLREV_B32_e32:
3956   case AMDGPU::V_LSHLREV_B32_e64:
3957   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3958   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3959   case AMDGPU::V_LSHLREV_B32_e32_vi:
3960   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3961   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3962   case AMDGPU::V_LSHLREV_B32_e64_vi:
3963 
3964   case AMDGPU::V_LSHLREV_B16_e32:
3965   case AMDGPU::V_LSHLREV_B16_e64:
3966   case AMDGPU::V_LSHLREV_B16_e32_vi:
3967   case AMDGPU::V_LSHLREV_B16_e64_vi:
3968   case AMDGPU::V_LSHLREV_B16_gfx10:
3969 
3970   case AMDGPU::V_LSHRREV_B16_e32:
3971   case AMDGPU::V_LSHRREV_B16_e64:
3972   case AMDGPU::V_LSHRREV_B16_e32_vi:
3973   case AMDGPU::V_LSHRREV_B16_e64_vi:
3974   case AMDGPU::V_LSHRREV_B16_gfx10:
3975 
3976   case AMDGPU::V_ASHRREV_I16_e32:
3977   case AMDGPU::V_ASHRREV_I16_e64:
3978   case AMDGPU::V_ASHRREV_I16_e32_vi:
3979   case AMDGPU::V_ASHRREV_I16_e64_vi:
3980   case AMDGPU::V_ASHRREV_I16_gfx10:
3981 
3982   case AMDGPU::V_LSHLREV_B64_e64:
3983   case AMDGPU::V_LSHLREV_B64_gfx10:
3984   case AMDGPU::V_LSHLREV_B64_vi:
3985 
3986   case AMDGPU::V_LSHRREV_B64_e64:
3987   case AMDGPU::V_LSHRREV_B64_gfx10:
3988   case AMDGPU::V_LSHRREV_B64_vi:
3989 
3990   case AMDGPU::V_ASHRREV_I64_e64:
3991   case AMDGPU::V_ASHRREV_I64_gfx10:
3992   case AMDGPU::V_ASHRREV_I64_vi:
3993 
3994   case AMDGPU::V_PK_LSHLREV_B16:
3995   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3996   case AMDGPU::V_PK_LSHLREV_B16_vi:
3997 
3998   case AMDGPU::V_PK_LSHRREV_B16:
3999   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4000   case AMDGPU::V_PK_LSHRREV_B16_vi:
4001   case AMDGPU::V_PK_ASHRREV_I16:
4002   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4003   case AMDGPU::V_PK_ASHRREV_I16_vi:
4004     return true;
4005   default:
4006     return false;
4007   }
4008 }
4009 
4010 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4011 
4012   using namespace SIInstrFlags;
4013   const unsigned Opcode = Inst.getOpcode();
4014   const MCInstrDesc &Desc = MII.get(Opcode);
4015 
4016   // lds_direct register is defined so that it can be used
4017   // with 9-bit operands only. Ignore encodings which do not accept these.
4018   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4019   if ((Desc.TSFlags & Enc) == 0)
4020     return None;
4021 
4022   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4023     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4024     if (SrcIdx == -1)
4025       break;
4026     const auto &Src = Inst.getOperand(SrcIdx);
4027     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4028 
4029       if (isGFX90A() || isGFX11Plus())
4030         return StringRef("lds_direct is not supported on this GPU");
4031 
4032       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4033         return StringRef("lds_direct cannot be used with this instruction");
4034 
4035       if (SrcName != OpName::src0)
4036         return StringRef("lds_direct may be used as src0 only");
4037     }
4038   }
4039 
4040   return None;
4041 }
4042 
4043 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4044   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4045     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4046     if (Op.isFlatOffset())
4047       return Op.getStartLoc();
4048   }
4049   return getLoc();
4050 }
4051 
4052 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4053                                          const OperandVector &Operands) {
4054   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4055   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4056     return true;
4057 
4058   auto Opcode = Inst.getOpcode();
4059   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4060   assert(OpNum != -1);
4061 
4062   const auto &Op = Inst.getOperand(OpNum);
4063   if (!hasFlatOffsets() && Op.getImm() != 0) {
4064     Error(getFlatOffsetLoc(Operands),
4065           "flat offset modifier is not supported on this GPU");
4066     return false;
4067   }
4068 
4069   // For FLAT segment the offset must be positive;
4070   // MSB is ignored and forced to zero.
4071   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
4072     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4073     if (!isIntN(OffsetSize, Op.getImm())) {
4074       Error(getFlatOffsetLoc(Operands),
4075             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4076       return false;
4077     }
4078   } else {
4079     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4080     if (!isUIntN(OffsetSize, Op.getImm())) {
4081       Error(getFlatOffsetLoc(Operands),
4082             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4083       return false;
4084     }
4085   }
4086 
4087   return true;
4088 }
4089 
4090 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4091   // Start with second operand because SMEM Offset cannot be dst or src0.
4092   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4093     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4094     if (Op.isSMEMOffset())
4095       return Op.getStartLoc();
4096   }
4097   return getLoc();
4098 }
4099 
4100 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4101                                          const OperandVector &Operands) {
4102   if (isCI() || isSI())
4103     return true;
4104 
4105   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4106   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4107     return true;
4108 
4109   auto Opcode = Inst.getOpcode();
4110   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4111   if (OpNum == -1)
4112     return true;
4113 
4114   const auto &Op = Inst.getOperand(OpNum);
4115   if (!Op.isImm())
4116     return true;
4117 
4118   uint64_t Offset = Op.getImm();
4119   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4120   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4121       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4122     return true;
4123 
4124   Error(getSMEMOffsetLoc(Operands),
4125         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4126                                "expected a 21-bit signed offset");
4127 
4128   return false;
4129 }
4130 
4131 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4132   unsigned Opcode = Inst.getOpcode();
4133   const MCInstrDesc &Desc = MII.get(Opcode);
4134   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4135     return true;
4136 
4137   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4138   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4139 
4140   const int OpIndices[] = { Src0Idx, Src1Idx };
4141 
4142   unsigned NumExprs = 0;
4143   unsigned NumLiterals = 0;
4144   uint32_t LiteralValue;
4145 
4146   for (int OpIdx : OpIndices) {
4147     if (OpIdx == -1) break;
4148 
4149     const MCOperand &MO = Inst.getOperand(OpIdx);
4150     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4151     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4152       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4153         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4154         if (NumLiterals == 0 || LiteralValue != Value) {
4155           LiteralValue = Value;
4156           ++NumLiterals;
4157         }
4158       } else if (MO.isExpr()) {
4159         ++NumExprs;
4160       }
4161     }
4162   }
4163 
4164   return NumLiterals + NumExprs <= 1;
4165 }
4166 
4167 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4168   const unsigned Opc = Inst.getOpcode();
4169   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4170       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4171     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4172     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4173 
4174     if (OpSel & ~3)
4175       return false;
4176   }
4177 
4178   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4179     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4180     if (OpSelIdx != -1) {
4181       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4182         return false;
4183     }
4184     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4185     if (OpSelHiIdx != -1) {
4186       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4187         return false;
4188     }
4189   }
4190 
4191   return true;
4192 }
4193 
4194 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4195                                   const OperandVector &Operands) {
4196   const unsigned Opc = Inst.getOpcode();
4197   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4198   if (DppCtrlIdx < 0)
4199     return true;
4200   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4201 
4202   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4203     // DPP64 is supported for row_newbcast only.
4204     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4205     if (Src0Idx >= 0 &&
4206         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4207       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4208       Error(S, "64 bit dpp only supports row_newbcast");
4209       return false;
4210     }
4211   }
4212 
4213   return true;
4214 }
4215 
4216 // Check if VCC register matches wavefront size
4217 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4218   auto FB = getFeatureBits();
4219   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4220     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4221 }
4222 
4223 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4224 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4225                                          const OperandVector &Operands) {
4226   unsigned Opcode = Inst.getOpcode();
4227   const MCInstrDesc &Desc = MII.get(Opcode);
4228   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4229   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4230       ImmIdx == -1)
4231     return true;
4232 
4233   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4234   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4235   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4236 
4237   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4238 
4239   unsigned NumExprs = 0;
4240   unsigned NumLiterals = 0;
4241   uint32_t LiteralValue;
4242 
4243   for (int OpIdx : OpIndices) {
4244     if (OpIdx == -1)
4245       continue;
4246 
4247     const MCOperand &MO = Inst.getOperand(OpIdx);
4248     if (!MO.isImm() && !MO.isExpr())
4249       continue;
4250     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4251       continue;
4252 
4253     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4254         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4255       Error(getConstLoc(Operands),
4256             "inline constants are not allowed for this operand");
4257       return false;
4258     }
4259 
4260     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4261       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4262       if (NumLiterals == 0 || LiteralValue != Value) {
4263         LiteralValue = Value;
4264         ++NumLiterals;
4265       }
4266     } else if (MO.isExpr()) {
4267       ++NumExprs;
4268     }
4269   }
4270   NumLiterals += NumExprs;
4271 
4272   if (!NumLiterals)
4273     return true;
4274 
4275   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4276     Error(getLitLoc(Operands), "literal operands are not supported");
4277     return false;
4278   }
4279 
4280   if (NumLiterals > 1) {
4281     Error(getLitLoc(Operands), "only one literal operand is allowed");
4282     return false;
4283   }
4284 
4285   return true;
4286 }
4287 
4288 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4289 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4290                          const MCRegisterInfo *MRI) {
4291   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4292   if (OpIdx < 0)
4293     return -1;
4294 
4295   const MCOperand &Op = Inst.getOperand(OpIdx);
4296   if (!Op.isReg())
4297     return -1;
4298 
4299   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4300   auto Reg = Sub ? Sub : Op.getReg();
4301   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4302   return AGPR32.contains(Reg) ? 1 : 0;
4303 }
4304 
4305 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4306   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4307   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4308                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4309                   SIInstrFlags::DS)) == 0)
4310     return true;
4311 
4312   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4313                                                       : AMDGPU::OpName::vdata;
4314 
4315   const MCRegisterInfo *MRI = getMRI();
4316   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4317   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4318 
4319   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4320     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4321     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4322       return false;
4323   }
4324 
4325   auto FB = getFeatureBits();
4326   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4327     if (DataAreg < 0 || DstAreg < 0)
4328       return true;
4329     return DstAreg == DataAreg;
4330   }
4331 
4332   return DstAreg < 1 && DataAreg < 1;
4333 }
4334 
4335 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4336   auto FB = getFeatureBits();
4337   if (!FB[AMDGPU::FeatureGFX90AInsts])
4338     return true;
4339 
4340   const MCRegisterInfo *MRI = getMRI();
4341   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4342   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4343   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4344     const MCOperand &Op = Inst.getOperand(I);
4345     if (!Op.isReg())
4346       continue;
4347 
4348     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4349     if (!Sub)
4350       continue;
4351 
4352     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4353       return false;
4354     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4355       return false;
4356   }
4357 
4358   return true;
4359 }
4360 
4361 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4362   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4363     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4364     if (Op.isBLGP())
4365       return Op.getStartLoc();
4366   }
4367   return SMLoc();
4368 }
4369 
4370 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4371                                    const OperandVector &Operands) {
4372   unsigned Opc = Inst.getOpcode();
4373   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4374   if (BlgpIdx == -1)
4375     return true;
4376   SMLoc BLGPLoc = getBLGPLoc(Operands);
4377   if (!BLGPLoc.isValid())
4378     return true;
4379   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4380   auto FB = getFeatureBits();
4381   bool UsesNeg = false;
4382   if (FB[AMDGPU::FeatureGFX940Insts]) {
4383     switch (Opc) {
4384     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4385     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4386     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4387     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4388       UsesNeg = true;
4389     }
4390   }
4391 
4392   if (IsNeg == UsesNeg)
4393     return true;
4394 
4395   Error(BLGPLoc,
4396         UsesNeg ? "invalid modifier: blgp is not supported"
4397                 : "invalid modifier: neg is not supported");
4398 
4399   return false;
4400 }
4401 
4402 // gfx90a has an undocumented limitation:
4403 // DS_GWS opcodes must use even aligned registers.
4404 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4405                                   const OperandVector &Operands) {
4406   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4407     return true;
4408 
4409   int Opc = Inst.getOpcode();
4410   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4411       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4412     return true;
4413 
4414   const MCRegisterInfo *MRI = getMRI();
4415   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4416   int Data0Pos =
4417       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4418   assert(Data0Pos != -1);
4419   auto Reg = Inst.getOperand(Data0Pos).getReg();
4420   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4421   if (RegIdx & 1) {
4422     SMLoc RegLoc = getRegLoc(Reg, Operands);
4423     Error(RegLoc, "vgpr must be even aligned");
4424     return false;
4425   }
4426 
4427   return true;
4428 }
4429 
4430 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4431                                             const OperandVector &Operands,
4432                                             const SMLoc &IDLoc) {
4433   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4434                                            AMDGPU::OpName::cpol);
4435   if (CPolPos == -1)
4436     return true;
4437 
4438   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4439 
4440   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4441   if (TSFlags & SIInstrFlags::SMRD) {
4442     if (CPol && (isSI() || isCI())) {
4443       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4444       Error(S, "cache policy is not supported for SMRD instructions");
4445       return false;
4446     }
4447     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4448       Error(IDLoc, "invalid cache policy for SMEM instruction");
4449       return false;
4450     }
4451   }
4452 
4453   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4454     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4455     StringRef CStr(S.getPointer());
4456     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4457     Error(S, "scc is not supported on this GPU");
4458     return false;
4459   }
4460 
4461   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4462     return true;
4463 
4464   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4465     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4466       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4467                               : "instruction must use glc");
4468       return false;
4469     }
4470   } else {
4471     if (CPol & CPol::GLC) {
4472       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4473       StringRef CStr(S.getPointer());
4474       S = SMLoc::getFromPointer(
4475           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4476       Error(S, isGFX940() ? "instruction must not use sc0"
4477                           : "instruction must not use glc");
4478       return false;
4479     }
4480   }
4481 
4482   return true;
4483 }
4484 
4485 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst,
4486                                          const OperandVector &Operands,
4487                                          const SMLoc &IDLoc) {
4488   if (isGFX940())
4489     return true;
4490 
4491   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4492   if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) !=
4493       (SIInstrFlags::VALU | SIInstrFlags::FLAT))
4494     return true;
4495   // This is FLAT LDS DMA.
4496 
4497   SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands);
4498   StringRef CStr(S.getPointer());
4499   if (!CStr.startswith("lds")) {
4500     // This is incorrectly selected LDS DMA version of a FLAT load opcode.
4501     // And LDS version should have 'lds' modifier, but it follows optional
4502     // operands so its absense is ignored by the matcher.
4503     Error(IDLoc, "invalid operands for instruction");
4504     return false;
4505   }
4506 
4507   return true;
4508 }
4509 
4510 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4511   if (!isGFX11Plus())
4512     return true;
4513   for (auto &Operand : Operands) {
4514     if (!Operand->isReg())
4515       continue;
4516     unsigned Reg = Operand->getReg();
4517     if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4518       Error(getRegLoc(Reg, Operands),
4519             "execz and vccz are not supported on this GPU");
4520       return false;
4521     }
4522   }
4523   return true;
4524 }
4525 
4526 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4527                                           const SMLoc &IDLoc,
4528                                           const OperandVector &Operands) {
4529   if (auto ErrMsg = validateLdsDirect(Inst)) {
4530     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4531     return false;
4532   }
4533   if (!validateSOPLiteral(Inst)) {
4534     Error(getLitLoc(Operands),
4535       "only one literal operand is allowed");
4536     return false;
4537   }
4538   if (!validateVOPLiteral(Inst, Operands)) {
4539     return false;
4540   }
4541   if (!validateConstantBusLimitations(Inst, Operands)) {
4542     return false;
4543   }
4544   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4545     return false;
4546   }
4547   if (!validateIntClampSupported(Inst)) {
4548     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4549       "integer clamping is not supported on this GPU");
4550     return false;
4551   }
4552   if (!validateOpSel(Inst)) {
4553     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4554       "invalid op_sel operand");
4555     return false;
4556   }
4557   if (!validateDPP(Inst, Operands)) {
4558     return false;
4559   }
4560   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4561   if (!validateMIMGD16(Inst)) {
4562     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4563       "d16 modifier is not supported on this GPU");
4564     return false;
4565   }
4566   if (!validateMIMGDim(Inst)) {
4567     Error(IDLoc, "dim modifier is required on this GPU");
4568     return false;
4569   }
4570   if (!validateMIMGMSAA(Inst)) {
4571     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4572           "invalid dim; must be MSAA type");
4573     return false;
4574   }
4575   if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4576     Error(IDLoc, *ErrMsg);
4577     return false;
4578   }
4579   if (!validateMIMGAddrSize(Inst)) {
4580     Error(IDLoc,
4581       "image address size does not match dim and a16");
4582     return false;
4583   }
4584   if (!validateMIMGAtomicDMask(Inst)) {
4585     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4586       "invalid atomic image dmask");
4587     return false;
4588   }
4589   if (!validateMIMGGatherDMask(Inst)) {
4590     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4591       "invalid image_gather dmask: only one bit must be set");
4592     return false;
4593   }
4594   if (!validateMovrels(Inst, Operands)) {
4595     return false;
4596   }
4597   if (!validateFlatOffset(Inst, Operands)) {
4598     return false;
4599   }
4600   if (!validateSMEMOffset(Inst, Operands)) {
4601     return false;
4602   }
4603   if (!validateMAIAccWrite(Inst, Operands)) {
4604     return false;
4605   }
4606   if (!validateMFMA(Inst, Operands)) {
4607     return false;
4608   }
4609   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4610     return false;
4611   }
4612 
4613   if (!validateAGPRLdSt(Inst)) {
4614     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4615     ? "invalid register class: data and dst should be all VGPR or AGPR"
4616     : "invalid register class: agpr loads and stores not supported on this GPU"
4617     );
4618     return false;
4619   }
4620   if (!validateVGPRAlign(Inst)) {
4621     Error(IDLoc,
4622       "invalid register class: vgpr tuples must be 64 bit aligned");
4623     return false;
4624   }
4625   if (!validateGWS(Inst, Operands)) {
4626     return false;
4627   }
4628 
4629   if (!validateBLGP(Inst, Operands)) {
4630     return false;
4631   }
4632 
4633   if (!validateDivScale(Inst)) {
4634     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4635     return false;
4636   }
4637   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4638     return false;
4639   }
4640   if (!validateExeczVcczOperands(Operands)) {
4641     return false;
4642   }
4643 
4644   if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) {
4645     return false;
4646   }
4647 
4648   return true;
4649 }
4650 
4651 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4652                                             const FeatureBitset &FBS,
4653                                             unsigned VariantID = 0);
4654 
4655 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4656                                 const FeatureBitset &AvailableFeatures,
4657                                 unsigned VariantID);
4658 
4659 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4660                                        const FeatureBitset &FBS) {
4661   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4662 }
4663 
4664 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4665                                        const FeatureBitset &FBS,
4666                                        ArrayRef<unsigned> Variants) {
4667   for (auto Variant : Variants) {
4668     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4669       return true;
4670   }
4671 
4672   return false;
4673 }
4674 
4675 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4676                                                   const SMLoc &IDLoc) {
4677   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4678 
4679   // Check if requested instruction variant is supported.
4680   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4681     return false;
4682 
4683   // This instruction is not supported.
4684   // Clear any other pending errors because they are no longer relevant.
4685   getParser().clearPendingErrors();
4686 
4687   // Requested instruction variant is not supported.
4688   // Check if any other variants are supported.
4689   StringRef VariantName = getMatchedVariantName();
4690   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4691     return Error(IDLoc,
4692                  Twine(VariantName,
4693                        " variant of this instruction is not supported"));
4694   }
4695 
4696   // Finally check if this instruction is supported on any other GPU.
4697   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4698     return Error(IDLoc, "instruction not supported on this GPU");
4699   }
4700 
4701   // Instruction not supported on any GPU. Probably a typo.
4702   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4703   return Error(IDLoc, "invalid instruction" + Suggestion);
4704 }
4705 
4706 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4707                                               OperandVector &Operands,
4708                                               MCStreamer &Out,
4709                                               uint64_t &ErrorInfo,
4710                                               bool MatchingInlineAsm) {
4711   MCInst Inst;
4712   unsigned Result = Match_Success;
4713   for (auto Variant : getMatchedVariants()) {
4714     uint64_t EI;
4715     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4716                                   Variant);
4717     // We order match statuses from least to most specific. We use most specific
4718     // status as resulting
4719     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4720     if ((R == Match_Success) ||
4721         (R == Match_PreferE32) ||
4722         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4723         (R == Match_InvalidOperand && Result != Match_MissingFeature
4724                                    && Result != Match_PreferE32) ||
4725         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4726                                    && Result != Match_MissingFeature
4727                                    && Result != Match_PreferE32)) {
4728       Result = R;
4729       ErrorInfo = EI;
4730     }
4731     if (R == Match_Success)
4732       break;
4733   }
4734 
4735   if (Result == Match_Success) {
4736     if (!validateInstruction(Inst, IDLoc, Operands)) {
4737       return true;
4738     }
4739     Inst.setLoc(IDLoc);
4740     Out.emitInstruction(Inst, getSTI());
4741     return false;
4742   }
4743 
4744   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4745   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4746     return true;
4747   }
4748 
4749   switch (Result) {
4750   default: break;
4751   case Match_MissingFeature:
4752     // It has been verified that the specified instruction
4753     // mnemonic is valid. A match was found but it requires
4754     // features which are not supported on this GPU.
4755     return Error(IDLoc, "operands are not valid for this GPU or mode");
4756 
4757   case Match_InvalidOperand: {
4758     SMLoc ErrorLoc = IDLoc;
4759     if (ErrorInfo != ~0ULL) {
4760       if (ErrorInfo >= Operands.size()) {
4761         return Error(IDLoc, "too few operands for instruction");
4762       }
4763       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4764       if (ErrorLoc == SMLoc())
4765         ErrorLoc = IDLoc;
4766     }
4767     return Error(ErrorLoc, "invalid operand for instruction");
4768   }
4769 
4770   case Match_PreferE32:
4771     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4772                         "should be encoded as e32");
4773   case Match_MnemonicFail:
4774     llvm_unreachable("Invalid instructions should have been handled already");
4775   }
4776   llvm_unreachable("Implement any new match types added!");
4777 }
4778 
4779 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4780   int64_t Tmp = -1;
4781   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4782     return true;
4783   }
4784   if (getParser().parseAbsoluteExpression(Tmp)) {
4785     return true;
4786   }
4787   Ret = static_cast<uint32_t>(Tmp);
4788   return false;
4789 }
4790 
4791 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4792                                                uint32_t &Minor) {
4793   if (ParseAsAbsoluteExpression(Major))
4794     return TokError("invalid major version");
4795 
4796   if (!trySkipToken(AsmToken::Comma))
4797     return TokError("minor version number required, comma expected");
4798 
4799   if (ParseAsAbsoluteExpression(Minor))
4800     return TokError("invalid minor version");
4801 
4802   return false;
4803 }
4804 
4805 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4806   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4807     return TokError("directive only supported for amdgcn architecture");
4808 
4809   std::string TargetIDDirective;
4810   SMLoc TargetStart = getTok().getLoc();
4811   if (getParser().parseEscapedString(TargetIDDirective))
4812     return true;
4813 
4814   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4815   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4816     return getParser().Error(TargetRange.Start,
4817         (Twine(".amdgcn_target directive's target id ") +
4818          Twine(TargetIDDirective) +
4819          Twine(" does not match the specified target id ") +
4820          Twine(getTargetStreamer().getTargetID()->toString())).str());
4821 
4822   return false;
4823 }
4824 
4825 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4826   return Error(Range.Start, "value out of range", Range);
4827 }
4828 
4829 bool AMDGPUAsmParser::calculateGPRBlocks(
4830     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4831     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4832     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4833     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4834   // TODO(scott.linder): These calculations are duplicated from
4835   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4836   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4837 
4838   unsigned NumVGPRs = NextFreeVGPR;
4839   unsigned NumSGPRs = NextFreeSGPR;
4840 
4841   if (Version.Major >= 10)
4842     NumSGPRs = 0;
4843   else {
4844     unsigned MaxAddressableNumSGPRs =
4845         IsaInfo::getAddressableNumSGPRs(&getSTI());
4846 
4847     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4848         NumSGPRs > MaxAddressableNumSGPRs)
4849       return OutOfRangeError(SGPRRange);
4850 
4851     NumSGPRs +=
4852         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4853 
4854     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4855         NumSGPRs > MaxAddressableNumSGPRs)
4856       return OutOfRangeError(SGPRRange);
4857 
4858     if (Features.test(FeatureSGPRInitBug))
4859       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4860   }
4861 
4862   VGPRBlocks =
4863       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4864   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4865 
4866   return false;
4867 }
4868 
4869 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4870   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4871     return TokError("directive only supported for amdgcn architecture");
4872 
4873   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4874     return TokError("directive only supported for amdhsa OS");
4875 
4876   StringRef KernelName;
4877   if (getParser().parseIdentifier(KernelName))
4878     return true;
4879 
4880   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4881 
4882   StringSet<> Seen;
4883 
4884   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4885 
4886   SMRange VGPRRange;
4887   uint64_t NextFreeVGPR = 0;
4888   uint64_t AccumOffset = 0;
4889   uint64_t SharedVGPRCount = 0;
4890   SMRange SGPRRange;
4891   uint64_t NextFreeSGPR = 0;
4892 
4893   // Count the number of user SGPRs implied from the enabled feature bits.
4894   unsigned ImpliedUserSGPRCount = 0;
4895 
4896   // Track if the asm explicitly contains the directive for the user SGPR
4897   // count.
4898   Optional<unsigned> ExplicitUserSGPRCount;
4899   bool ReserveVCC = true;
4900   bool ReserveFlatScr = true;
4901   Optional<bool> EnableWavefrontSize32;
4902 
4903   while (true) {
4904     while (trySkipToken(AsmToken::EndOfStatement));
4905 
4906     StringRef ID;
4907     SMRange IDRange = getTok().getLocRange();
4908     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4909       return true;
4910 
4911     if (ID == ".end_amdhsa_kernel")
4912       break;
4913 
4914     if (Seen.find(ID) != Seen.end())
4915       return TokError(".amdhsa_ directives cannot be repeated");
4916     Seen.insert(ID);
4917 
4918     SMLoc ValStart = getLoc();
4919     int64_t IVal;
4920     if (getParser().parseAbsoluteExpression(IVal))
4921       return true;
4922     SMLoc ValEnd = getLoc();
4923     SMRange ValRange = SMRange(ValStart, ValEnd);
4924 
4925     if (IVal < 0)
4926       return OutOfRangeError(ValRange);
4927 
4928     uint64_t Val = IVal;
4929 
4930 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4931   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4932     return OutOfRangeError(RANGE);                                             \
4933   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4934 
4935     if (ID == ".amdhsa_group_segment_fixed_size") {
4936       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4937         return OutOfRangeError(ValRange);
4938       KD.group_segment_fixed_size = Val;
4939     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4940       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4941         return OutOfRangeError(ValRange);
4942       KD.private_segment_fixed_size = Val;
4943     } else if (ID == ".amdhsa_kernarg_size") {
4944       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4945         return OutOfRangeError(ValRange);
4946       KD.kernarg_size = Val;
4947     } else if (ID == ".amdhsa_user_sgpr_count") {
4948       ExplicitUserSGPRCount = Val;
4949     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4950       if (hasArchitectedFlatScratch())
4951         return Error(IDRange.Start,
4952                      "directive is not supported with architected flat scratch",
4953                      IDRange);
4954       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4955                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4956                        Val, ValRange);
4957       if (Val)
4958         ImpliedUserSGPRCount += 4;
4959     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4960       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4961                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4962                        ValRange);
4963       if (Val)
4964         ImpliedUserSGPRCount += 2;
4965     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4966       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4967                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4968                        ValRange);
4969       if (Val)
4970         ImpliedUserSGPRCount += 2;
4971     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4972       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4973                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4974                        Val, ValRange);
4975       if (Val)
4976         ImpliedUserSGPRCount += 2;
4977     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4978       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4979                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4980                        ValRange);
4981       if (Val)
4982         ImpliedUserSGPRCount += 2;
4983     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4984       if (hasArchitectedFlatScratch())
4985         return Error(IDRange.Start,
4986                      "directive is not supported with architected flat scratch",
4987                      IDRange);
4988       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4989                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4990                        ValRange);
4991       if (Val)
4992         ImpliedUserSGPRCount += 2;
4993     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4994       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4995                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4996                        Val, ValRange);
4997       if (Val)
4998         ImpliedUserSGPRCount += 1;
4999     } else if (ID == ".amdhsa_wavefront_size32") {
5000       if (IVersion.Major < 10)
5001         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5002       EnableWavefrontSize32 = Val;
5003       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5004                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5005                        Val, ValRange);
5006     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5007       if (hasArchitectedFlatScratch())
5008         return Error(IDRange.Start,
5009                      "directive is not supported with architected flat scratch",
5010                      IDRange);
5011       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5012                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5013     } else if (ID == ".amdhsa_enable_private_segment") {
5014       if (!hasArchitectedFlatScratch())
5015         return Error(
5016             IDRange.Start,
5017             "directive is not supported without architected flat scratch",
5018             IDRange);
5019       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5020                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5021     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5022       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5023                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5024                        ValRange);
5025     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5026       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5027                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5028                        ValRange);
5029     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5030       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5031                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5032                        ValRange);
5033     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5034       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5035                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5036                        ValRange);
5037     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5038       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5039                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5040                        ValRange);
5041     } else if (ID == ".amdhsa_next_free_vgpr") {
5042       VGPRRange = ValRange;
5043       NextFreeVGPR = Val;
5044     } else if (ID == ".amdhsa_next_free_sgpr") {
5045       SGPRRange = ValRange;
5046       NextFreeSGPR = Val;
5047     } else if (ID == ".amdhsa_accum_offset") {
5048       if (!isGFX90A())
5049         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5050       AccumOffset = Val;
5051     } else if (ID == ".amdhsa_reserve_vcc") {
5052       if (!isUInt<1>(Val))
5053         return OutOfRangeError(ValRange);
5054       ReserveVCC = Val;
5055     } else if (ID == ".amdhsa_reserve_flat_scratch") {
5056       if (IVersion.Major < 7)
5057         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5058       if (hasArchitectedFlatScratch())
5059         return Error(IDRange.Start,
5060                      "directive is not supported with architected flat scratch",
5061                      IDRange);
5062       if (!isUInt<1>(Val))
5063         return OutOfRangeError(ValRange);
5064       ReserveFlatScr = Val;
5065     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5066       if (IVersion.Major < 8)
5067         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5068       if (!isUInt<1>(Val))
5069         return OutOfRangeError(ValRange);
5070       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5071         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5072                                  IDRange);
5073     } else if (ID == ".amdhsa_float_round_mode_32") {
5074       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5075                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5076     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5077       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5078                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5079     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5080       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5081                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5082     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5083       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5084                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5085                        ValRange);
5086     } else if (ID == ".amdhsa_dx10_clamp") {
5087       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5088                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5089     } else if (ID == ".amdhsa_ieee_mode") {
5090       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5091                        Val, ValRange);
5092     } else if (ID == ".amdhsa_fp16_overflow") {
5093       if (IVersion.Major < 9)
5094         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5095       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5096                        ValRange);
5097     } else if (ID == ".amdhsa_tg_split") {
5098       if (!isGFX90A())
5099         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5100       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5101                        ValRange);
5102     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5103       if (IVersion.Major < 10)
5104         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5105       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5106                        ValRange);
5107     } else if (ID == ".amdhsa_memory_ordered") {
5108       if (IVersion.Major < 10)
5109         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5110       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5111                        ValRange);
5112     } else if (ID == ".amdhsa_forward_progress") {
5113       if (IVersion.Major < 10)
5114         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5115       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5116                        ValRange);
5117     } else if (ID == ".amdhsa_shared_vgpr_count") {
5118       if (IVersion.Major < 10)
5119         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5120       SharedVGPRCount = Val;
5121       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5122                        COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5123                        ValRange);
5124     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5125       PARSE_BITS_ENTRY(
5126           KD.compute_pgm_rsrc2,
5127           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5128           ValRange);
5129     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5130       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5131                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5132                        Val, ValRange);
5133     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5134       PARSE_BITS_ENTRY(
5135           KD.compute_pgm_rsrc2,
5136           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5137           ValRange);
5138     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5139       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5140                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5141                        Val, ValRange);
5142     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5143       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5144                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5145                        Val, ValRange);
5146     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5147       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5148                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5149                        Val, ValRange);
5150     } else if (ID == ".amdhsa_exception_int_div_zero") {
5151       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5152                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5153                        Val, ValRange);
5154     } else {
5155       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5156     }
5157 
5158 #undef PARSE_BITS_ENTRY
5159   }
5160 
5161   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5162     return TokError(".amdhsa_next_free_vgpr directive is required");
5163 
5164   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5165     return TokError(".amdhsa_next_free_sgpr directive is required");
5166 
5167   unsigned VGPRBlocks;
5168   unsigned SGPRBlocks;
5169   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5170                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5171                          EnableWavefrontSize32, NextFreeVGPR,
5172                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5173                          SGPRBlocks))
5174     return true;
5175 
5176   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5177           VGPRBlocks))
5178     return OutOfRangeError(VGPRRange);
5179   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5180                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5181 
5182   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5183           SGPRBlocks))
5184     return OutOfRangeError(SGPRRange);
5185   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5186                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5187                   SGPRBlocks);
5188 
5189   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5190     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5191                     "enabled user SGPRs");
5192 
5193   unsigned UserSGPRCount =
5194       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5195 
5196   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5197     return TokError("too many user SGPRs enabled");
5198   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5199                   UserSGPRCount);
5200 
5201   if (isGFX90A()) {
5202     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5203       return TokError(".amdhsa_accum_offset directive is required");
5204     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5205       return TokError("accum_offset should be in range [4..256] in "
5206                       "increments of 4");
5207     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5208       return TokError("accum_offset exceeds total VGPR allocation");
5209     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5210                     (AccumOffset / 4 - 1));
5211   }
5212 
5213   if (IVersion.Major == 10) {
5214     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5215     if (SharedVGPRCount && EnableWavefrontSize32) {
5216       return TokError("shared_vgpr_count directive not valid on "
5217                       "wavefront size 32");
5218     }
5219     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5220       return TokError("shared_vgpr_count*2 + "
5221                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5222                       "exceed 63\n");
5223     }
5224   }
5225 
5226   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5227       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5228       ReserveFlatScr);
5229   return false;
5230 }
5231 
5232 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5233   uint32_t Major;
5234   uint32_t Minor;
5235 
5236   if (ParseDirectiveMajorMinor(Major, Minor))
5237     return true;
5238 
5239   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5240   return false;
5241 }
5242 
5243 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5244   uint32_t Major;
5245   uint32_t Minor;
5246   uint32_t Stepping;
5247   StringRef VendorName;
5248   StringRef ArchName;
5249 
5250   // If this directive has no arguments, then use the ISA version for the
5251   // targeted GPU.
5252   if (isToken(AsmToken::EndOfStatement)) {
5253     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5254     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5255                                                         ISA.Stepping,
5256                                                         "AMD", "AMDGPU");
5257     return false;
5258   }
5259 
5260   if (ParseDirectiveMajorMinor(Major, Minor))
5261     return true;
5262 
5263   if (!trySkipToken(AsmToken::Comma))
5264     return TokError("stepping version number required, comma expected");
5265 
5266   if (ParseAsAbsoluteExpression(Stepping))
5267     return TokError("invalid stepping version");
5268 
5269   if (!trySkipToken(AsmToken::Comma))
5270     return TokError("vendor name required, comma expected");
5271 
5272   if (!parseString(VendorName, "invalid vendor name"))
5273     return true;
5274 
5275   if (!trySkipToken(AsmToken::Comma))
5276     return TokError("arch name required, comma expected");
5277 
5278   if (!parseString(ArchName, "invalid arch name"))
5279     return true;
5280 
5281   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5282                                                       VendorName, ArchName);
5283   return false;
5284 }
5285 
5286 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5287                                                amd_kernel_code_t &Header) {
5288   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5289   // assembly for backwards compatibility.
5290   if (ID == "max_scratch_backing_memory_byte_size") {
5291     Parser.eatToEndOfStatement();
5292     return false;
5293   }
5294 
5295   SmallString<40> ErrStr;
5296   raw_svector_ostream Err(ErrStr);
5297   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5298     return TokError(Err.str());
5299   }
5300   Lex();
5301 
5302   if (ID == "enable_wavefront_size32") {
5303     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5304       if (!isGFX10Plus())
5305         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5306       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5307         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5308     } else {
5309       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5310         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5311     }
5312   }
5313 
5314   if (ID == "wavefront_size") {
5315     if (Header.wavefront_size == 5) {
5316       if (!isGFX10Plus())
5317         return TokError("wavefront_size=5 is only allowed on GFX10+");
5318       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5319         return TokError("wavefront_size=5 requires +WavefrontSize32");
5320     } else if (Header.wavefront_size == 6) {
5321       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5322         return TokError("wavefront_size=6 requires +WavefrontSize64");
5323     }
5324   }
5325 
5326   if (ID == "enable_wgp_mode") {
5327     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5328         !isGFX10Plus())
5329       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5330   }
5331 
5332   if (ID == "enable_mem_ordered") {
5333     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5334         !isGFX10Plus())
5335       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5336   }
5337 
5338   if (ID == "enable_fwd_progress") {
5339     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5340         !isGFX10Plus())
5341       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5342   }
5343 
5344   return false;
5345 }
5346 
5347 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5348   amd_kernel_code_t Header;
5349   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5350 
5351   while (true) {
5352     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5353     // will set the current token to EndOfStatement.
5354     while(trySkipToken(AsmToken::EndOfStatement));
5355 
5356     StringRef ID;
5357     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5358       return true;
5359 
5360     if (ID == ".end_amd_kernel_code_t")
5361       break;
5362 
5363     if (ParseAMDKernelCodeTValue(ID, Header))
5364       return true;
5365   }
5366 
5367   getTargetStreamer().EmitAMDKernelCodeT(Header);
5368 
5369   return false;
5370 }
5371 
5372 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5373   StringRef KernelName;
5374   if (!parseId(KernelName, "expected symbol name"))
5375     return true;
5376 
5377   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5378                                            ELF::STT_AMDGPU_HSA_KERNEL);
5379 
5380   KernelScope.initialize(getContext());
5381   return false;
5382 }
5383 
5384 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5385   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5386     return Error(getLoc(),
5387                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5388                  "architectures");
5389   }
5390 
5391   auto TargetIDDirective = getLexer().getTok().getStringContents();
5392   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5393     return Error(getParser().getTok().getLoc(), "target id must match options");
5394 
5395   getTargetStreamer().EmitISAVersion();
5396   Lex();
5397 
5398   return false;
5399 }
5400 
5401 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5402   const char *AssemblerDirectiveBegin;
5403   const char *AssemblerDirectiveEnd;
5404   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5405       isHsaAbiVersion3AndAbove(&getSTI())
5406           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5407                             HSAMD::V3::AssemblerDirectiveEnd)
5408           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5409                             HSAMD::AssemblerDirectiveEnd);
5410 
5411   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5412     return Error(getLoc(),
5413                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5414                  "not available on non-amdhsa OSes")).str());
5415   }
5416 
5417   std::string HSAMetadataString;
5418   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5419                           HSAMetadataString))
5420     return true;
5421 
5422   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5423     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5424       return Error(getLoc(), "invalid HSA metadata");
5425   } else {
5426     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5427       return Error(getLoc(), "invalid HSA metadata");
5428   }
5429 
5430   return false;
5431 }
5432 
5433 /// Common code to parse out a block of text (typically YAML) between start and
5434 /// end directives.
5435 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5436                                           const char *AssemblerDirectiveEnd,
5437                                           std::string &CollectString) {
5438 
5439   raw_string_ostream CollectStream(CollectString);
5440 
5441   getLexer().setSkipSpace(false);
5442 
5443   bool FoundEnd = false;
5444   while (!isToken(AsmToken::Eof)) {
5445     while (isToken(AsmToken::Space)) {
5446       CollectStream << getTokenStr();
5447       Lex();
5448     }
5449 
5450     if (trySkipId(AssemblerDirectiveEnd)) {
5451       FoundEnd = true;
5452       break;
5453     }
5454 
5455     CollectStream << Parser.parseStringToEndOfStatement()
5456                   << getContext().getAsmInfo()->getSeparatorString();
5457 
5458     Parser.eatToEndOfStatement();
5459   }
5460 
5461   getLexer().setSkipSpace(true);
5462 
5463   if (isToken(AsmToken::Eof) && !FoundEnd) {
5464     return TokError(Twine("expected directive ") +
5465                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5466   }
5467 
5468   CollectStream.flush();
5469   return false;
5470 }
5471 
5472 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5473 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5474   std::string String;
5475   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5476                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5477     return true;
5478 
5479   auto PALMetadata = getTargetStreamer().getPALMetadata();
5480   if (!PALMetadata->setFromString(String))
5481     return Error(getLoc(), "invalid PAL metadata");
5482   return false;
5483 }
5484 
5485 /// Parse the assembler directive for old linear-format PAL metadata.
5486 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5487   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5488     return Error(getLoc(),
5489                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5490                  "not available on non-amdpal OSes")).str());
5491   }
5492 
5493   auto PALMetadata = getTargetStreamer().getPALMetadata();
5494   PALMetadata->setLegacy();
5495   for (;;) {
5496     uint32_t Key, Value;
5497     if (ParseAsAbsoluteExpression(Key)) {
5498       return TokError(Twine("invalid value in ") +
5499                       Twine(PALMD::AssemblerDirective));
5500     }
5501     if (!trySkipToken(AsmToken::Comma)) {
5502       return TokError(Twine("expected an even number of values in ") +
5503                       Twine(PALMD::AssemblerDirective));
5504     }
5505     if (ParseAsAbsoluteExpression(Value)) {
5506       return TokError(Twine("invalid value in ") +
5507                       Twine(PALMD::AssemblerDirective));
5508     }
5509     PALMetadata->setRegister(Key, Value);
5510     if (!trySkipToken(AsmToken::Comma))
5511       break;
5512   }
5513   return false;
5514 }
5515 
5516 /// ParseDirectiveAMDGPULDS
5517 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5518 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5519   if (getParser().checkForValidSection())
5520     return true;
5521 
5522   StringRef Name;
5523   SMLoc NameLoc = getLoc();
5524   if (getParser().parseIdentifier(Name))
5525     return TokError("expected identifier in directive");
5526 
5527   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5528   if (parseToken(AsmToken::Comma, "expected ','"))
5529     return true;
5530 
5531   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5532 
5533   int64_t Size;
5534   SMLoc SizeLoc = getLoc();
5535   if (getParser().parseAbsoluteExpression(Size))
5536     return true;
5537   if (Size < 0)
5538     return Error(SizeLoc, "size must be non-negative");
5539   if (Size > LocalMemorySize)
5540     return Error(SizeLoc, "size is too large");
5541 
5542   int64_t Alignment = 4;
5543   if (trySkipToken(AsmToken::Comma)) {
5544     SMLoc AlignLoc = getLoc();
5545     if (getParser().parseAbsoluteExpression(Alignment))
5546       return true;
5547     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5548       return Error(AlignLoc, "alignment must be a power of two");
5549 
5550     // Alignment larger than the size of LDS is possible in theory, as long
5551     // as the linker manages to place to symbol at address 0, but we do want
5552     // to make sure the alignment fits nicely into a 32-bit integer.
5553     if (Alignment >= 1u << 31)
5554       return Error(AlignLoc, "alignment is too large");
5555   }
5556 
5557   if (parseEOL())
5558     return true;
5559 
5560   Symbol->redefineIfPossible();
5561   if (!Symbol->isUndefined())
5562     return Error(NameLoc, "invalid symbol redefinition");
5563 
5564   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5565   return false;
5566 }
5567 
5568 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5569   StringRef IDVal = DirectiveID.getString();
5570 
5571   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5572     if (IDVal == ".amdhsa_kernel")
5573      return ParseDirectiveAMDHSAKernel();
5574 
5575     // TODO: Restructure/combine with PAL metadata directive.
5576     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5577       return ParseDirectiveHSAMetadata();
5578   } else {
5579     if (IDVal == ".hsa_code_object_version")
5580       return ParseDirectiveHSACodeObjectVersion();
5581 
5582     if (IDVal == ".hsa_code_object_isa")
5583       return ParseDirectiveHSACodeObjectISA();
5584 
5585     if (IDVal == ".amd_kernel_code_t")
5586       return ParseDirectiveAMDKernelCodeT();
5587 
5588     if (IDVal == ".amdgpu_hsa_kernel")
5589       return ParseDirectiveAMDGPUHsaKernel();
5590 
5591     if (IDVal == ".amd_amdgpu_isa")
5592       return ParseDirectiveISAVersion();
5593 
5594     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5595       return ParseDirectiveHSAMetadata();
5596   }
5597 
5598   if (IDVal == ".amdgcn_target")
5599     return ParseDirectiveAMDGCNTarget();
5600 
5601   if (IDVal == ".amdgpu_lds")
5602     return ParseDirectiveAMDGPULDS();
5603 
5604   if (IDVal == PALMD::AssemblerDirectiveBegin)
5605     return ParseDirectivePALMetadataBegin();
5606 
5607   if (IDVal == PALMD::AssemblerDirective)
5608     return ParseDirectivePALMetadata();
5609 
5610   return true;
5611 }
5612 
5613 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5614                                            unsigned RegNo) {
5615 
5616   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5617     return isGFX9Plus();
5618 
5619   // GFX10+ has 2 more SGPRs 104 and 105.
5620   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5621     return hasSGPR104_SGPR105();
5622 
5623   switch (RegNo) {
5624   case AMDGPU::SRC_SHARED_BASE:
5625   case AMDGPU::SRC_SHARED_LIMIT:
5626   case AMDGPU::SRC_PRIVATE_BASE:
5627   case AMDGPU::SRC_PRIVATE_LIMIT:
5628     return isGFX9Plus();
5629   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5630     return isGFX9Plus() && !isGFX11Plus();
5631   case AMDGPU::TBA:
5632   case AMDGPU::TBA_LO:
5633   case AMDGPU::TBA_HI:
5634   case AMDGPU::TMA:
5635   case AMDGPU::TMA_LO:
5636   case AMDGPU::TMA_HI:
5637     return !isGFX9Plus();
5638   case AMDGPU::XNACK_MASK:
5639   case AMDGPU::XNACK_MASK_LO:
5640   case AMDGPU::XNACK_MASK_HI:
5641     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5642   case AMDGPU::SGPR_NULL:
5643     return isGFX10Plus();
5644   default:
5645     break;
5646   }
5647 
5648   if (isCI())
5649     return true;
5650 
5651   if (isSI() || isGFX10Plus()) {
5652     // No flat_scr on SI.
5653     // On GFX10Plus flat scratch is not a valid register operand and can only be
5654     // accessed with s_setreg/s_getreg.
5655     switch (RegNo) {
5656     case AMDGPU::FLAT_SCR:
5657     case AMDGPU::FLAT_SCR_LO:
5658     case AMDGPU::FLAT_SCR_HI:
5659       return false;
5660     default:
5661       return true;
5662     }
5663   }
5664 
5665   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5666   // SI/CI have.
5667   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5668     return hasSGPR102_SGPR103();
5669 
5670   return true;
5671 }
5672 
5673 OperandMatchResultTy
5674 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5675                               OperandMode Mode) {
5676   // Try to parse with a custom parser
5677   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5678 
5679   // If we successfully parsed the operand or if there as an error parsing,
5680   // we are done.
5681   //
5682   // If we are parsing after we reach EndOfStatement then this means we
5683   // are appending default values to the Operands list.  This is only done
5684   // by custom parser, so we shouldn't continue on to the generic parsing.
5685   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5686       isToken(AsmToken::EndOfStatement))
5687     return ResTy;
5688 
5689   SMLoc RBraceLoc;
5690   SMLoc LBraceLoc = getLoc();
5691   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5692     unsigned Prefix = Operands.size();
5693 
5694     for (;;) {
5695       auto Loc = getLoc();
5696       ResTy = parseReg(Operands);
5697       if (ResTy == MatchOperand_NoMatch)
5698         Error(Loc, "expected a register");
5699       if (ResTy != MatchOperand_Success)
5700         return MatchOperand_ParseFail;
5701 
5702       RBraceLoc = getLoc();
5703       if (trySkipToken(AsmToken::RBrac))
5704         break;
5705 
5706       if (!skipToken(AsmToken::Comma,
5707                      "expected a comma or a closing square bracket")) {
5708         return MatchOperand_ParseFail;
5709       }
5710     }
5711 
5712     if (Operands.size() - Prefix > 1) {
5713       Operands.insert(Operands.begin() + Prefix,
5714                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5715       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5716     }
5717 
5718     return MatchOperand_Success;
5719   }
5720 
5721   return parseRegOrImm(Operands);
5722 }
5723 
5724 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5725   // Clear any forced encodings from the previous instruction.
5726   setForcedEncodingSize(0);
5727   setForcedDPP(false);
5728   setForcedSDWA(false);
5729 
5730   if (Name.endswith("_e64_dpp")) {
5731     setForcedDPP(true);
5732     setForcedEncodingSize(64);
5733     return Name.substr(0, Name.size() - 8);
5734   } else if (Name.endswith("_e64")) {
5735     setForcedEncodingSize(64);
5736     return Name.substr(0, Name.size() - 4);
5737   } else if (Name.endswith("_e32")) {
5738     setForcedEncodingSize(32);
5739     return Name.substr(0, Name.size() - 4);
5740   } else if (Name.endswith("_dpp")) {
5741     setForcedDPP(true);
5742     return Name.substr(0, Name.size() - 4);
5743   } else if (Name.endswith("_sdwa")) {
5744     setForcedSDWA(true);
5745     return Name.substr(0, Name.size() - 5);
5746   }
5747   return Name;
5748 }
5749 
5750 static void applyMnemonicAliases(StringRef &Mnemonic,
5751                                  const FeatureBitset &Features,
5752                                  unsigned VariantID);
5753 
5754 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5755                                        StringRef Name,
5756                                        SMLoc NameLoc, OperandVector &Operands) {
5757   // Add the instruction mnemonic
5758   Name = parseMnemonicSuffix(Name);
5759 
5760   // If the target architecture uses MnemonicAlias, call it here to parse
5761   // operands correctly.
5762   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5763 
5764   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5765 
5766   bool IsMIMG = Name.startswith("image_");
5767 
5768   while (!trySkipToken(AsmToken::EndOfStatement)) {
5769     OperandMode Mode = OperandMode_Default;
5770     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5771       Mode = OperandMode_NSA;
5772     CPolSeen = 0;
5773     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5774 
5775     if (Res != MatchOperand_Success) {
5776       checkUnsupportedInstruction(Name, NameLoc);
5777       if (!Parser.hasPendingError()) {
5778         // FIXME: use real operand location rather than the current location.
5779         StringRef Msg =
5780           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5781                                             "not a valid operand.";
5782         Error(getLoc(), Msg);
5783       }
5784       while (!trySkipToken(AsmToken::EndOfStatement)) {
5785         lex();
5786       }
5787       return true;
5788     }
5789 
5790     // Eat the comma or space if there is one.
5791     trySkipToken(AsmToken::Comma);
5792   }
5793 
5794   return false;
5795 }
5796 
5797 //===----------------------------------------------------------------------===//
5798 // Utility functions
5799 //===----------------------------------------------------------------------===//
5800 
5801 OperandMatchResultTy
5802 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5803 
5804   if (!trySkipId(Prefix, AsmToken::Colon))
5805     return MatchOperand_NoMatch;
5806 
5807   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5808 }
5809 
5810 OperandMatchResultTy
5811 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5812                                     AMDGPUOperand::ImmTy ImmTy,
5813                                     bool (*ConvertResult)(int64_t&)) {
5814   SMLoc S = getLoc();
5815   int64_t Value = 0;
5816 
5817   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5818   if (Res != MatchOperand_Success)
5819     return Res;
5820 
5821   if (ConvertResult && !ConvertResult(Value)) {
5822     Error(S, "invalid " + StringRef(Prefix) + " value.");
5823   }
5824 
5825   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5826   return MatchOperand_Success;
5827 }
5828 
5829 OperandMatchResultTy
5830 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5831                                              OperandVector &Operands,
5832                                              AMDGPUOperand::ImmTy ImmTy,
5833                                              bool (*ConvertResult)(int64_t&)) {
5834   SMLoc S = getLoc();
5835   if (!trySkipId(Prefix, AsmToken::Colon))
5836     return MatchOperand_NoMatch;
5837 
5838   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5839     return MatchOperand_ParseFail;
5840 
5841   unsigned Val = 0;
5842   const unsigned MaxSize = 4;
5843 
5844   // FIXME: How to verify the number of elements matches the number of src
5845   // operands?
5846   for (int I = 0; ; ++I) {
5847     int64_t Op;
5848     SMLoc Loc = getLoc();
5849     if (!parseExpr(Op))
5850       return MatchOperand_ParseFail;
5851 
5852     if (Op != 0 && Op != 1) {
5853       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5854       return MatchOperand_ParseFail;
5855     }
5856 
5857     Val |= (Op << I);
5858 
5859     if (trySkipToken(AsmToken::RBrac))
5860       break;
5861 
5862     if (I + 1 == MaxSize) {
5863       Error(getLoc(), "expected a closing square bracket");
5864       return MatchOperand_ParseFail;
5865     }
5866 
5867     if (!skipToken(AsmToken::Comma, "expected a comma"))
5868       return MatchOperand_ParseFail;
5869   }
5870 
5871   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5872   return MatchOperand_Success;
5873 }
5874 
5875 OperandMatchResultTy
5876 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5877                                AMDGPUOperand::ImmTy ImmTy) {
5878   int64_t Bit;
5879   SMLoc S = getLoc();
5880 
5881   if (trySkipId(Name)) {
5882     Bit = 1;
5883   } else if (trySkipId("no", Name)) {
5884     Bit = 0;
5885   } else {
5886     return MatchOperand_NoMatch;
5887   }
5888 
5889   if (Name == "r128" && !hasMIMG_R128()) {
5890     Error(S, "r128 modifier is not supported on this GPU");
5891     return MatchOperand_ParseFail;
5892   }
5893   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5894     Error(S, "a16 modifier is not supported on this GPU");
5895     return MatchOperand_ParseFail;
5896   }
5897 
5898   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5899     ImmTy = AMDGPUOperand::ImmTyR128A16;
5900 
5901   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5902   return MatchOperand_Success;
5903 }
5904 
5905 OperandMatchResultTy
5906 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5907   unsigned CPolOn = 0;
5908   unsigned CPolOff = 0;
5909   SMLoc S = getLoc();
5910 
5911   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5912   if (isGFX940() && !Mnemo.startswith("s_")) {
5913     if (trySkipId("sc0"))
5914       CPolOn = AMDGPU::CPol::SC0;
5915     else if (trySkipId("nosc0"))
5916       CPolOff = AMDGPU::CPol::SC0;
5917     else if (trySkipId("nt"))
5918       CPolOn = AMDGPU::CPol::NT;
5919     else if (trySkipId("nont"))
5920       CPolOff = AMDGPU::CPol::NT;
5921     else if (trySkipId("sc1"))
5922       CPolOn = AMDGPU::CPol::SC1;
5923     else if (trySkipId("nosc1"))
5924       CPolOff = AMDGPU::CPol::SC1;
5925     else
5926       return MatchOperand_NoMatch;
5927   }
5928   else if (trySkipId("glc"))
5929     CPolOn = AMDGPU::CPol::GLC;
5930   else if (trySkipId("noglc"))
5931     CPolOff = AMDGPU::CPol::GLC;
5932   else if (trySkipId("slc"))
5933     CPolOn = AMDGPU::CPol::SLC;
5934   else if (trySkipId("noslc"))
5935     CPolOff = AMDGPU::CPol::SLC;
5936   else if (trySkipId("dlc"))
5937     CPolOn = AMDGPU::CPol::DLC;
5938   else if (trySkipId("nodlc"))
5939     CPolOff = AMDGPU::CPol::DLC;
5940   else if (trySkipId("scc"))
5941     CPolOn = AMDGPU::CPol::SCC;
5942   else if (trySkipId("noscc"))
5943     CPolOff = AMDGPU::CPol::SCC;
5944   else
5945     return MatchOperand_NoMatch;
5946 
5947   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5948     Error(S, "dlc modifier is not supported on this GPU");
5949     return MatchOperand_ParseFail;
5950   }
5951 
5952   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5953     Error(S, "scc modifier is not supported on this GPU");
5954     return MatchOperand_ParseFail;
5955   }
5956 
5957   if (CPolSeen & (CPolOn | CPolOff)) {
5958     Error(S, "duplicate cache policy modifier");
5959     return MatchOperand_ParseFail;
5960   }
5961 
5962   CPolSeen |= (CPolOn | CPolOff);
5963 
5964   for (unsigned I = 1; I != Operands.size(); ++I) {
5965     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5966     if (Op.isCPol()) {
5967       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5968       return MatchOperand_Success;
5969     }
5970   }
5971 
5972   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5973                                               AMDGPUOperand::ImmTyCPol));
5974 
5975   return MatchOperand_Success;
5976 }
5977 
5978 static void addOptionalImmOperand(
5979   MCInst& Inst, const OperandVector& Operands,
5980   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5981   AMDGPUOperand::ImmTy ImmT,
5982   int64_t Default = 0) {
5983   auto i = OptionalIdx.find(ImmT);
5984   if (i != OptionalIdx.end()) {
5985     unsigned Idx = i->second;
5986     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5987   } else {
5988     Inst.addOperand(MCOperand::createImm(Default));
5989   }
5990 }
5991 
5992 OperandMatchResultTy
5993 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5994                                        StringRef &Value,
5995                                        SMLoc &StringLoc) {
5996   if (!trySkipId(Prefix, AsmToken::Colon))
5997     return MatchOperand_NoMatch;
5998 
5999   StringLoc = getLoc();
6000   return parseId(Value, "expected an identifier") ? MatchOperand_Success
6001                                                   : MatchOperand_ParseFail;
6002 }
6003 
6004 //===----------------------------------------------------------------------===//
6005 // MTBUF format
6006 //===----------------------------------------------------------------------===//
6007 
6008 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6009                                   int64_t MaxVal,
6010                                   int64_t &Fmt) {
6011   int64_t Val;
6012   SMLoc Loc = getLoc();
6013 
6014   auto Res = parseIntWithPrefix(Pref, Val);
6015   if (Res == MatchOperand_ParseFail)
6016     return false;
6017   if (Res == MatchOperand_NoMatch)
6018     return true;
6019 
6020   if (Val < 0 || Val > MaxVal) {
6021     Error(Loc, Twine("out of range ", StringRef(Pref)));
6022     return false;
6023   }
6024 
6025   Fmt = Val;
6026   return true;
6027 }
6028 
6029 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6030 // values to live in a joint format operand in the MCInst encoding.
6031 OperandMatchResultTy
6032 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6033   using namespace llvm::AMDGPU::MTBUFFormat;
6034 
6035   int64_t Dfmt = DFMT_UNDEF;
6036   int64_t Nfmt = NFMT_UNDEF;
6037 
6038   // dfmt and nfmt can appear in either order, and each is optional.
6039   for (int I = 0; I < 2; ++I) {
6040     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6041       return MatchOperand_ParseFail;
6042 
6043     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
6044       return MatchOperand_ParseFail;
6045     }
6046     // Skip optional comma between dfmt/nfmt
6047     // but guard against 2 commas following each other.
6048     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6049         !peekToken().is(AsmToken::Comma)) {
6050       trySkipToken(AsmToken::Comma);
6051     }
6052   }
6053 
6054   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6055     return MatchOperand_NoMatch;
6056 
6057   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6058   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6059 
6060   Format = encodeDfmtNfmt(Dfmt, Nfmt);
6061   return MatchOperand_Success;
6062 }
6063 
6064 OperandMatchResultTy
6065 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6066   using namespace llvm::AMDGPU::MTBUFFormat;
6067 
6068   int64_t Fmt = UFMT_UNDEF;
6069 
6070   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6071     return MatchOperand_ParseFail;
6072 
6073   if (Fmt == UFMT_UNDEF)
6074     return MatchOperand_NoMatch;
6075 
6076   Format = Fmt;
6077   return MatchOperand_Success;
6078 }
6079 
6080 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6081                                     int64_t &Nfmt,
6082                                     StringRef FormatStr,
6083                                     SMLoc Loc) {
6084   using namespace llvm::AMDGPU::MTBUFFormat;
6085   int64_t Format;
6086 
6087   Format = getDfmt(FormatStr);
6088   if (Format != DFMT_UNDEF) {
6089     Dfmt = Format;
6090     return true;
6091   }
6092 
6093   Format = getNfmt(FormatStr, getSTI());
6094   if (Format != NFMT_UNDEF) {
6095     Nfmt = Format;
6096     return true;
6097   }
6098 
6099   Error(Loc, "unsupported format");
6100   return false;
6101 }
6102 
6103 OperandMatchResultTy
6104 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6105                                           SMLoc FormatLoc,
6106                                           int64_t &Format) {
6107   using namespace llvm::AMDGPU::MTBUFFormat;
6108 
6109   int64_t Dfmt = DFMT_UNDEF;
6110   int64_t Nfmt = NFMT_UNDEF;
6111   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6112     return MatchOperand_ParseFail;
6113 
6114   if (trySkipToken(AsmToken::Comma)) {
6115     StringRef Str;
6116     SMLoc Loc = getLoc();
6117     if (!parseId(Str, "expected a format string") ||
6118         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
6119       return MatchOperand_ParseFail;
6120     }
6121     if (Dfmt == DFMT_UNDEF) {
6122       Error(Loc, "duplicate numeric format");
6123       return MatchOperand_ParseFail;
6124     } else if (Nfmt == NFMT_UNDEF) {
6125       Error(Loc, "duplicate data format");
6126       return MatchOperand_ParseFail;
6127     }
6128   }
6129 
6130   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6131   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6132 
6133   if (isGFX10Plus()) {
6134     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6135     if (Ufmt == UFMT_UNDEF) {
6136       Error(FormatLoc, "unsupported format");
6137       return MatchOperand_ParseFail;
6138     }
6139     Format = Ufmt;
6140   } else {
6141     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6142   }
6143 
6144   return MatchOperand_Success;
6145 }
6146 
6147 OperandMatchResultTy
6148 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6149                                             SMLoc Loc,
6150                                             int64_t &Format) {
6151   using namespace llvm::AMDGPU::MTBUFFormat;
6152 
6153   auto Id = getUnifiedFormat(FormatStr, getSTI());
6154   if (Id == UFMT_UNDEF)
6155     return MatchOperand_NoMatch;
6156 
6157   if (!isGFX10Plus()) {
6158     Error(Loc, "unified format is not supported on this GPU");
6159     return MatchOperand_ParseFail;
6160   }
6161 
6162   Format = Id;
6163   return MatchOperand_Success;
6164 }
6165 
6166 OperandMatchResultTy
6167 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6168   using namespace llvm::AMDGPU::MTBUFFormat;
6169   SMLoc Loc = getLoc();
6170 
6171   if (!parseExpr(Format))
6172     return MatchOperand_ParseFail;
6173   if (!isValidFormatEncoding(Format, getSTI())) {
6174     Error(Loc, "out of range format");
6175     return MatchOperand_ParseFail;
6176   }
6177 
6178   return MatchOperand_Success;
6179 }
6180 
6181 OperandMatchResultTy
6182 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6183   using namespace llvm::AMDGPU::MTBUFFormat;
6184 
6185   if (!trySkipId("format", AsmToken::Colon))
6186     return MatchOperand_NoMatch;
6187 
6188   if (trySkipToken(AsmToken::LBrac)) {
6189     StringRef FormatStr;
6190     SMLoc Loc = getLoc();
6191     if (!parseId(FormatStr, "expected a format string"))
6192       return MatchOperand_ParseFail;
6193 
6194     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6195     if (Res == MatchOperand_NoMatch)
6196       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6197     if (Res != MatchOperand_Success)
6198       return Res;
6199 
6200     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6201       return MatchOperand_ParseFail;
6202 
6203     return MatchOperand_Success;
6204   }
6205 
6206   return parseNumericFormat(Format);
6207 }
6208 
6209 OperandMatchResultTy
6210 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6211   using namespace llvm::AMDGPU::MTBUFFormat;
6212 
6213   int64_t Format = getDefaultFormatEncoding(getSTI());
6214   OperandMatchResultTy Res;
6215   SMLoc Loc = getLoc();
6216 
6217   // Parse legacy format syntax.
6218   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6219   if (Res == MatchOperand_ParseFail)
6220     return Res;
6221 
6222   bool FormatFound = (Res == MatchOperand_Success);
6223 
6224   Operands.push_back(
6225     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6226 
6227   if (FormatFound)
6228     trySkipToken(AsmToken::Comma);
6229 
6230   if (isToken(AsmToken::EndOfStatement)) {
6231     // We are expecting an soffset operand,
6232     // but let matcher handle the error.
6233     return MatchOperand_Success;
6234   }
6235 
6236   // Parse soffset.
6237   Res = parseRegOrImm(Operands);
6238   if (Res != MatchOperand_Success)
6239     return Res;
6240 
6241   trySkipToken(AsmToken::Comma);
6242 
6243   if (!FormatFound) {
6244     Res = parseSymbolicOrNumericFormat(Format);
6245     if (Res == MatchOperand_ParseFail)
6246       return Res;
6247     if (Res == MatchOperand_Success) {
6248       auto Size = Operands.size();
6249       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6250       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6251       Op.setImm(Format);
6252     }
6253     return MatchOperand_Success;
6254   }
6255 
6256   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6257     Error(getLoc(), "duplicate format");
6258     return MatchOperand_ParseFail;
6259   }
6260   return MatchOperand_Success;
6261 }
6262 
6263 //===----------------------------------------------------------------------===//
6264 // ds
6265 //===----------------------------------------------------------------------===//
6266 
6267 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6268                                     const OperandVector &Operands) {
6269   OptionalImmIndexMap OptionalIdx;
6270 
6271   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6272     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6273 
6274     // Add the register arguments
6275     if (Op.isReg()) {
6276       Op.addRegOperands(Inst, 1);
6277       continue;
6278     }
6279 
6280     // Handle optional arguments
6281     OptionalIdx[Op.getImmTy()] = i;
6282   }
6283 
6284   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6285   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6286   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6287 
6288   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6289 }
6290 
6291 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6292                                 bool IsGdsHardcoded) {
6293   OptionalImmIndexMap OptionalIdx;
6294 
6295   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6296     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6297 
6298     // Add the register arguments
6299     if (Op.isReg()) {
6300       Op.addRegOperands(Inst, 1);
6301       continue;
6302     }
6303 
6304     if (Op.isToken() && Op.getToken() == "gds") {
6305       IsGdsHardcoded = true;
6306       continue;
6307     }
6308 
6309     // Handle optional arguments
6310     OptionalIdx[Op.getImmTy()] = i;
6311   }
6312 
6313   AMDGPUOperand::ImmTy OffsetType =
6314     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6315      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6316      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6317                                                       AMDGPUOperand::ImmTyOffset;
6318 
6319   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6320 
6321   if (!IsGdsHardcoded) {
6322     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6323   }
6324   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6325 }
6326 
6327 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6328   OptionalImmIndexMap OptionalIdx;
6329 
6330   unsigned OperandIdx[4];
6331   unsigned EnMask = 0;
6332   int SrcIdx = 0;
6333 
6334   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6335     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6336 
6337     // Add the register arguments
6338     if (Op.isReg()) {
6339       assert(SrcIdx < 4);
6340       OperandIdx[SrcIdx] = Inst.size();
6341       Op.addRegOperands(Inst, 1);
6342       ++SrcIdx;
6343       continue;
6344     }
6345 
6346     if (Op.isOff()) {
6347       assert(SrcIdx < 4);
6348       OperandIdx[SrcIdx] = Inst.size();
6349       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6350       ++SrcIdx;
6351       continue;
6352     }
6353 
6354     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6355       Op.addImmOperands(Inst, 1);
6356       continue;
6357     }
6358 
6359     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6360       continue;
6361 
6362     // Handle optional arguments
6363     OptionalIdx[Op.getImmTy()] = i;
6364   }
6365 
6366   assert(SrcIdx == 4);
6367 
6368   bool Compr = false;
6369   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6370     Compr = true;
6371     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6372     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6373     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6374   }
6375 
6376   for (auto i = 0; i < SrcIdx; ++i) {
6377     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6378       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6379     }
6380   }
6381 
6382   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6383   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6384 
6385   Inst.addOperand(MCOperand::createImm(EnMask));
6386 }
6387 
6388 //===----------------------------------------------------------------------===//
6389 // s_waitcnt
6390 //===----------------------------------------------------------------------===//
6391 
6392 static bool
6393 encodeCnt(
6394   const AMDGPU::IsaVersion ISA,
6395   int64_t &IntVal,
6396   int64_t CntVal,
6397   bool Saturate,
6398   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6399   unsigned (*decode)(const IsaVersion &Version, unsigned))
6400 {
6401   bool Failed = false;
6402 
6403   IntVal = encode(ISA, IntVal, CntVal);
6404   if (CntVal != decode(ISA, IntVal)) {
6405     if (Saturate) {
6406       IntVal = encode(ISA, IntVal, -1);
6407     } else {
6408       Failed = true;
6409     }
6410   }
6411   return Failed;
6412 }
6413 
6414 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6415 
6416   SMLoc CntLoc = getLoc();
6417   StringRef CntName = getTokenStr();
6418 
6419   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6420       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6421     return false;
6422 
6423   int64_t CntVal;
6424   SMLoc ValLoc = getLoc();
6425   if (!parseExpr(CntVal))
6426     return false;
6427 
6428   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6429 
6430   bool Failed = true;
6431   bool Sat = CntName.endswith("_sat");
6432 
6433   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6434     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6435   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6436     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6437   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6438     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6439   } else {
6440     Error(CntLoc, "invalid counter name " + CntName);
6441     return false;
6442   }
6443 
6444   if (Failed) {
6445     Error(ValLoc, "too large value for " + CntName);
6446     return false;
6447   }
6448 
6449   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6450     return false;
6451 
6452   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6453     if (isToken(AsmToken::EndOfStatement)) {
6454       Error(getLoc(), "expected a counter name");
6455       return false;
6456     }
6457   }
6458 
6459   return true;
6460 }
6461 
6462 OperandMatchResultTy
6463 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6464   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6465   int64_t Waitcnt = getWaitcntBitMask(ISA);
6466   SMLoc S = getLoc();
6467 
6468   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6469     while (!isToken(AsmToken::EndOfStatement)) {
6470       if (!parseCnt(Waitcnt))
6471         return MatchOperand_ParseFail;
6472     }
6473   } else {
6474     if (!parseExpr(Waitcnt))
6475       return MatchOperand_ParseFail;
6476   }
6477 
6478   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6479   return MatchOperand_Success;
6480 }
6481 
6482 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6483   SMLoc FieldLoc = getLoc();
6484   StringRef FieldName = getTokenStr();
6485   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6486       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6487     return false;
6488 
6489   SMLoc ValueLoc = getLoc();
6490   StringRef ValueName = getTokenStr();
6491   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6492       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6493     return false;
6494 
6495   unsigned Shift;
6496   if (FieldName == "instid0") {
6497     Shift = 0;
6498   } else if (FieldName == "instskip") {
6499     Shift = 4;
6500   } else if (FieldName == "instid1") {
6501     Shift = 7;
6502   } else {
6503     Error(FieldLoc, "invalid field name " + FieldName);
6504     return false;
6505   }
6506 
6507   int Value;
6508   if (Shift == 4) {
6509     // Parse values for instskip.
6510     Value = StringSwitch<int>(ValueName)
6511                 .Case("SAME", 0)
6512                 .Case("NEXT", 1)
6513                 .Case("SKIP_1", 2)
6514                 .Case("SKIP_2", 3)
6515                 .Case("SKIP_3", 4)
6516                 .Case("SKIP_4", 5)
6517                 .Default(-1);
6518   } else {
6519     // Parse values for instid0 and instid1.
6520     Value = StringSwitch<int>(ValueName)
6521                 .Case("NO_DEP", 0)
6522                 .Case("VALU_DEP_1", 1)
6523                 .Case("VALU_DEP_2", 2)
6524                 .Case("VALU_DEP_3", 3)
6525                 .Case("VALU_DEP_4", 4)
6526                 .Case("TRANS32_DEP_1", 5)
6527                 .Case("TRANS32_DEP_2", 6)
6528                 .Case("TRANS32_DEP_3", 7)
6529                 .Case("FMA_ACCUM_CYCLE_1", 8)
6530                 .Case("SALU_CYCLE_1", 9)
6531                 .Case("SALU_CYCLE_2", 10)
6532                 .Case("SALU_CYCLE_3", 11)
6533                 .Default(-1);
6534   }
6535   if (Value < 0) {
6536     Error(ValueLoc, "invalid value name " + ValueName);
6537     return false;
6538   }
6539 
6540   Delay |= Value << Shift;
6541   return true;
6542 }
6543 
6544 OperandMatchResultTy
6545 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) {
6546   int64_t Delay = 0;
6547   SMLoc S = getLoc();
6548 
6549   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6550     do {
6551       if (!parseDelay(Delay))
6552         return MatchOperand_ParseFail;
6553     } while (trySkipToken(AsmToken::Pipe));
6554   } else {
6555     if (!parseExpr(Delay))
6556       return MatchOperand_ParseFail;
6557   }
6558 
6559   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6560   return MatchOperand_Success;
6561 }
6562 
6563 bool
6564 AMDGPUOperand::isSWaitCnt() const {
6565   return isImm();
6566 }
6567 
6568 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); }
6569 
6570 //===----------------------------------------------------------------------===//
6571 // DepCtr
6572 //===----------------------------------------------------------------------===//
6573 
6574 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6575                                   StringRef DepCtrName) {
6576   switch (ErrorId) {
6577   case OPR_ID_UNKNOWN:
6578     Error(Loc, Twine("invalid counter name ", DepCtrName));
6579     return;
6580   case OPR_ID_UNSUPPORTED:
6581     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6582     return;
6583   case OPR_ID_DUPLICATE:
6584     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6585     return;
6586   case OPR_VAL_INVALID:
6587     Error(Loc, Twine("invalid value for ", DepCtrName));
6588     return;
6589   default:
6590     assert(false);
6591   }
6592 }
6593 
6594 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6595 
6596   using namespace llvm::AMDGPU::DepCtr;
6597 
6598   SMLoc DepCtrLoc = getLoc();
6599   StringRef DepCtrName = getTokenStr();
6600 
6601   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6602       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6603     return false;
6604 
6605   int64_t ExprVal;
6606   if (!parseExpr(ExprVal))
6607     return false;
6608 
6609   unsigned PrevOprMask = UsedOprMask;
6610   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6611 
6612   if (CntVal < 0) {
6613     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6614     return false;
6615   }
6616 
6617   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6618     return false;
6619 
6620   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6621     if (isToken(AsmToken::EndOfStatement)) {
6622       Error(getLoc(), "expected a counter name");
6623       return false;
6624     }
6625   }
6626 
6627   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6628   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6629   return true;
6630 }
6631 
6632 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6633   using namespace llvm::AMDGPU::DepCtr;
6634 
6635   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6636   SMLoc Loc = getLoc();
6637 
6638   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6639     unsigned UsedOprMask = 0;
6640     while (!isToken(AsmToken::EndOfStatement)) {
6641       if (!parseDepCtr(DepCtr, UsedOprMask))
6642         return MatchOperand_ParseFail;
6643     }
6644   } else {
6645     if (!parseExpr(DepCtr))
6646       return MatchOperand_ParseFail;
6647   }
6648 
6649   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6650   return MatchOperand_Success;
6651 }
6652 
6653 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6654 
6655 //===----------------------------------------------------------------------===//
6656 // hwreg
6657 //===----------------------------------------------------------------------===//
6658 
6659 bool
6660 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6661                                 OperandInfoTy &Offset,
6662                                 OperandInfoTy &Width) {
6663   using namespace llvm::AMDGPU::Hwreg;
6664 
6665   // The register may be specified by name or using a numeric code
6666   HwReg.Loc = getLoc();
6667   if (isToken(AsmToken::Identifier) &&
6668       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6669     HwReg.IsSymbolic = true;
6670     lex(); // skip register name
6671   } else if (!parseExpr(HwReg.Id, "a register name")) {
6672     return false;
6673   }
6674 
6675   if (trySkipToken(AsmToken::RParen))
6676     return true;
6677 
6678   // parse optional params
6679   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6680     return false;
6681 
6682   Offset.Loc = getLoc();
6683   if (!parseExpr(Offset.Id))
6684     return false;
6685 
6686   if (!skipToken(AsmToken::Comma, "expected a comma"))
6687     return false;
6688 
6689   Width.Loc = getLoc();
6690   return parseExpr(Width.Id) &&
6691          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6692 }
6693 
6694 bool
6695 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6696                                const OperandInfoTy &Offset,
6697                                const OperandInfoTy &Width) {
6698 
6699   using namespace llvm::AMDGPU::Hwreg;
6700 
6701   if (HwReg.IsSymbolic) {
6702     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6703       Error(HwReg.Loc,
6704             "specified hardware register is not supported on this GPU");
6705       return false;
6706     }
6707   } else {
6708     if (!isValidHwreg(HwReg.Id)) {
6709       Error(HwReg.Loc,
6710             "invalid code of hardware register: only 6-bit values are legal");
6711       return false;
6712     }
6713   }
6714   if (!isValidHwregOffset(Offset.Id)) {
6715     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6716     return false;
6717   }
6718   if (!isValidHwregWidth(Width.Id)) {
6719     Error(Width.Loc,
6720           "invalid bitfield width: only values from 1 to 32 are legal");
6721     return false;
6722   }
6723   return true;
6724 }
6725 
6726 OperandMatchResultTy
6727 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6728   using namespace llvm::AMDGPU::Hwreg;
6729 
6730   int64_t ImmVal = 0;
6731   SMLoc Loc = getLoc();
6732 
6733   if (trySkipId("hwreg", AsmToken::LParen)) {
6734     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6735     OperandInfoTy Offset(OFFSET_DEFAULT_);
6736     OperandInfoTy Width(WIDTH_DEFAULT_);
6737     if (parseHwregBody(HwReg, Offset, Width) &&
6738         validateHwreg(HwReg, Offset, Width)) {
6739       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6740     } else {
6741       return MatchOperand_ParseFail;
6742     }
6743   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6744     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6745       Error(Loc, "invalid immediate: only 16-bit values are legal");
6746       return MatchOperand_ParseFail;
6747     }
6748   } else {
6749     return MatchOperand_ParseFail;
6750   }
6751 
6752   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6753   return MatchOperand_Success;
6754 }
6755 
6756 bool AMDGPUOperand::isHwreg() const {
6757   return isImmTy(ImmTyHwreg);
6758 }
6759 
6760 //===----------------------------------------------------------------------===//
6761 // sendmsg
6762 //===----------------------------------------------------------------------===//
6763 
6764 bool
6765 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6766                                   OperandInfoTy &Op,
6767                                   OperandInfoTy &Stream) {
6768   using namespace llvm::AMDGPU::SendMsg;
6769 
6770   Msg.Loc = getLoc();
6771   if (isToken(AsmToken::Identifier) &&
6772       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6773     Msg.IsSymbolic = true;
6774     lex(); // skip message name
6775   } else if (!parseExpr(Msg.Id, "a message name")) {
6776     return false;
6777   }
6778 
6779   if (trySkipToken(AsmToken::Comma)) {
6780     Op.IsDefined = true;
6781     Op.Loc = getLoc();
6782     if (isToken(AsmToken::Identifier) &&
6783         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6784       lex(); // skip operation name
6785     } else if (!parseExpr(Op.Id, "an operation name")) {
6786       return false;
6787     }
6788 
6789     if (trySkipToken(AsmToken::Comma)) {
6790       Stream.IsDefined = true;
6791       Stream.Loc = getLoc();
6792       if (!parseExpr(Stream.Id))
6793         return false;
6794     }
6795   }
6796 
6797   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6798 }
6799 
6800 bool
6801 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6802                                  const OperandInfoTy &Op,
6803                                  const OperandInfoTy &Stream) {
6804   using namespace llvm::AMDGPU::SendMsg;
6805 
6806   // Validation strictness depends on whether message is specified
6807   // in a symbolic or in a numeric form. In the latter case
6808   // only encoding possibility is checked.
6809   bool Strict = Msg.IsSymbolic;
6810 
6811   if (Strict) {
6812     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6813       Error(Msg.Loc, "specified message id is not supported on this GPU");
6814       return false;
6815     }
6816   } else {
6817     if (!isValidMsgId(Msg.Id, getSTI())) {
6818       Error(Msg.Loc, "invalid message id");
6819       return false;
6820     }
6821   }
6822   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6823     if (Op.IsDefined) {
6824       Error(Op.Loc, "message does not support operations");
6825     } else {
6826       Error(Msg.Loc, "missing message operation");
6827     }
6828     return false;
6829   }
6830   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6831     Error(Op.Loc, "invalid operation id");
6832     return false;
6833   }
6834   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6835       Stream.IsDefined) {
6836     Error(Stream.Loc, "message operation does not support streams");
6837     return false;
6838   }
6839   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6840     Error(Stream.Loc, "invalid message stream id");
6841     return false;
6842   }
6843   return true;
6844 }
6845 
6846 OperandMatchResultTy
6847 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6848   using namespace llvm::AMDGPU::SendMsg;
6849 
6850   int64_t ImmVal = 0;
6851   SMLoc Loc = getLoc();
6852 
6853   if (trySkipId("sendmsg", AsmToken::LParen)) {
6854     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6855     OperandInfoTy Op(OP_NONE_);
6856     OperandInfoTy Stream(STREAM_ID_NONE_);
6857     if (parseSendMsgBody(Msg, Op, Stream) &&
6858         validateSendMsg(Msg, Op, Stream)) {
6859       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6860     } else {
6861       return MatchOperand_ParseFail;
6862     }
6863   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6864     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6865       Error(Loc, "invalid immediate: only 16-bit values are legal");
6866       return MatchOperand_ParseFail;
6867     }
6868   } else {
6869     return MatchOperand_ParseFail;
6870   }
6871 
6872   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6873   return MatchOperand_Success;
6874 }
6875 
6876 bool AMDGPUOperand::isSendMsg() const {
6877   return isImmTy(ImmTySendMsg);
6878 }
6879 
6880 //===----------------------------------------------------------------------===//
6881 // v_interp
6882 //===----------------------------------------------------------------------===//
6883 
6884 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6885   StringRef Str;
6886   SMLoc S = getLoc();
6887 
6888   if (!parseId(Str))
6889     return MatchOperand_NoMatch;
6890 
6891   int Slot = StringSwitch<int>(Str)
6892     .Case("p10", 0)
6893     .Case("p20", 1)
6894     .Case("p0", 2)
6895     .Default(-1);
6896 
6897   if (Slot == -1) {
6898     Error(S, "invalid interpolation slot");
6899     return MatchOperand_ParseFail;
6900   }
6901 
6902   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6903                                               AMDGPUOperand::ImmTyInterpSlot));
6904   return MatchOperand_Success;
6905 }
6906 
6907 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6908   StringRef Str;
6909   SMLoc S = getLoc();
6910 
6911   if (!parseId(Str))
6912     return MatchOperand_NoMatch;
6913 
6914   if (!Str.startswith("attr")) {
6915     Error(S, "invalid interpolation attribute");
6916     return MatchOperand_ParseFail;
6917   }
6918 
6919   StringRef Chan = Str.take_back(2);
6920   int AttrChan = StringSwitch<int>(Chan)
6921     .Case(".x", 0)
6922     .Case(".y", 1)
6923     .Case(".z", 2)
6924     .Case(".w", 3)
6925     .Default(-1);
6926   if (AttrChan == -1) {
6927     Error(S, "invalid or missing interpolation attribute channel");
6928     return MatchOperand_ParseFail;
6929   }
6930 
6931   Str = Str.drop_back(2).drop_front(4);
6932 
6933   uint8_t Attr;
6934   if (Str.getAsInteger(10, Attr)) {
6935     Error(S, "invalid or missing interpolation attribute number");
6936     return MatchOperand_ParseFail;
6937   }
6938 
6939   if (Attr > 63) {
6940     Error(S, "out of bounds interpolation attribute number");
6941     return MatchOperand_ParseFail;
6942   }
6943 
6944   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6945 
6946   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6947                                               AMDGPUOperand::ImmTyInterpAttr));
6948   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6949                                               AMDGPUOperand::ImmTyAttrChan));
6950   return MatchOperand_Success;
6951 }
6952 
6953 //===----------------------------------------------------------------------===//
6954 // exp
6955 //===----------------------------------------------------------------------===//
6956 
6957 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6958   using namespace llvm::AMDGPU::Exp;
6959 
6960   StringRef Str;
6961   SMLoc S = getLoc();
6962 
6963   if (!parseId(Str))
6964     return MatchOperand_NoMatch;
6965 
6966   unsigned Id = getTgtId(Str);
6967   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6968     Error(S, (Id == ET_INVALID) ?
6969                 "invalid exp target" :
6970                 "exp target is not supported on this GPU");
6971     return MatchOperand_ParseFail;
6972   }
6973 
6974   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6975                                               AMDGPUOperand::ImmTyExpTgt));
6976   return MatchOperand_Success;
6977 }
6978 
6979 //===----------------------------------------------------------------------===//
6980 // parser helpers
6981 //===----------------------------------------------------------------------===//
6982 
6983 bool
6984 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6985   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6986 }
6987 
6988 bool
6989 AMDGPUAsmParser::isId(const StringRef Id) const {
6990   return isId(getToken(), Id);
6991 }
6992 
6993 bool
6994 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6995   return getTokenKind() == Kind;
6996 }
6997 
6998 bool
6999 AMDGPUAsmParser::trySkipId(const StringRef Id) {
7000   if (isId(Id)) {
7001     lex();
7002     return true;
7003   }
7004   return false;
7005 }
7006 
7007 bool
7008 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7009   if (isToken(AsmToken::Identifier)) {
7010     StringRef Tok = getTokenStr();
7011     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
7012       lex();
7013       return true;
7014     }
7015   }
7016   return false;
7017 }
7018 
7019 bool
7020 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7021   if (isId(Id) && peekToken().is(Kind)) {
7022     lex();
7023     lex();
7024     return true;
7025   }
7026   return false;
7027 }
7028 
7029 bool
7030 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7031   if (isToken(Kind)) {
7032     lex();
7033     return true;
7034   }
7035   return false;
7036 }
7037 
7038 bool
7039 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7040                            const StringRef ErrMsg) {
7041   if (!trySkipToken(Kind)) {
7042     Error(getLoc(), ErrMsg);
7043     return false;
7044   }
7045   return true;
7046 }
7047 
7048 bool
7049 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7050   SMLoc S = getLoc();
7051 
7052   const MCExpr *Expr;
7053   if (Parser.parseExpression(Expr))
7054     return false;
7055 
7056   if (Expr->evaluateAsAbsolute(Imm))
7057     return true;
7058 
7059   if (Expected.empty()) {
7060     Error(S, "expected absolute expression");
7061   } else {
7062     Error(S, Twine("expected ", Expected) +
7063              Twine(" or an absolute expression"));
7064   }
7065   return false;
7066 }
7067 
7068 bool
7069 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7070   SMLoc S = getLoc();
7071 
7072   const MCExpr *Expr;
7073   if (Parser.parseExpression(Expr))
7074     return false;
7075 
7076   int64_t IntVal;
7077   if (Expr->evaluateAsAbsolute(IntVal)) {
7078     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7079   } else {
7080     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7081   }
7082   return true;
7083 }
7084 
7085 bool
7086 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7087   if (isToken(AsmToken::String)) {
7088     Val = getToken().getStringContents();
7089     lex();
7090     return true;
7091   } else {
7092     Error(getLoc(), ErrMsg);
7093     return false;
7094   }
7095 }
7096 
7097 bool
7098 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7099   if (isToken(AsmToken::Identifier)) {
7100     Val = getTokenStr();
7101     lex();
7102     return true;
7103   } else {
7104     if (!ErrMsg.empty())
7105       Error(getLoc(), ErrMsg);
7106     return false;
7107   }
7108 }
7109 
7110 AsmToken
7111 AMDGPUAsmParser::getToken() const {
7112   return Parser.getTok();
7113 }
7114 
7115 AsmToken
7116 AMDGPUAsmParser::peekToken() {
7117   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
7118 }
7119 
7120 void
7121 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7122   auto TokCount = getLexer().peekTokens(Tokens);
7123 
7124   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7125     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7126 }
7127 
7128 AsmToken::TokenKind
7129 AMDGPUAsmParser::getTokenKind() const {
7130   return getLexer().getKind();
7131 }
7132 
7133 SMLoc
7134 AMDGPUAsmParser::getLoc() const {
7135   return getToken().getLoc();
7136 }
7137 
7138 StringRef
7139 AMDGPUAsmParser::getTokenStr() const {
7140   return getToken().getString();
7141 }
7142 
7143 void
7144 AMDGPUAsmParser::lex() {
7145   Parser.Lex();
7146 }
7147 
7148 SMLoc
7149 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7150                                const OperandVector &Operands) const {
7151   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7152     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7153     if (Test(Op))
7154       return Op.getStartLoc();
7155   }
7156   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7157 }
7158 
7159 SMLoc
7160 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7161                            const OperandVector &Operands) const {
7162   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7163   return getOperandLoc(Test, Operands);
7164 }
7165 
7166 SMLoc
7167 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7168                            const OperandVector &Operands) const {
7169   auto Test = [=](const AMDGPUOperand& Op) {
7170     return Op.isRegKind() && Op.getReg() == Reg;
7171   };
7172   return getOperandLoc(Test, Operands);
7173 }
7174 
7175 SMLoc
7176 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
7177   auto Test = [](const AMDGPUOperand& Op) {
7178     return Op.IsImmKindLiteral() || Op.isExpr();
7179   };
7180   return getOperandLoc(Test, Operands);
7181 }
7182 
7183 SMLoc
7184 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7185   auto Test = [](const AMDGPUOperand& Op) {
7186     return Op.isImmKindConst();
7187   };
7188   return getOperandLoc(Test, Operands);
7189 }
7190 
7191 //===----------------------------------------------------------------------===//
7192 // swizzle
7193 //===----------------------------------------------------------------------===//
7194 
7195 LLVM_READNONE
7196 static unsigned
7197 encodeBitmaskPerm(const unsigned AndMask,
7198                   const unsigned OrMask,
7199                   const unsigned XorMask) {
7200   using namespace llvm::AMDGPU::Swizzle;
7201 
7202   return BITMASK_PERM_ENC |
7203          (AndMask << BITMASK_AND_SHIFT) |
7204          (OrMask  << BITMASK_OR_SHIFT)  |
7205          (XorMask << BITMASK_XOR_SHIFT);
7206 }
7207 
7208 bool
7209 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7210                                      const unsigned MinVal,
7211                                      const unsigned MaxVal,
7212                                      const StringRef ErrMsg,
7213                                      SMLoc &Loc) {
7214   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7215     return false;
7216   }
7217   Loc = getLoc();
7218   if (!parseExpr(Op)) {
7219     return false;
7220   }
7221   if (Op < MinVal || Op > MaxVal) {
7222     Error(Loc, ErrMsg);
7223     return false;
7224   }
7225 
7226   return true;
7227 }
7228 
7229 bool
7230 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7231                                       const unsigned MinVal,
7232                                       const unsigned MaxVal,
7233                                       const StringRef ErrMsg) {
7234   SMLoc Loc;
7235   for (unsigned i = 0; i < OpNum; ++i) {
7236     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7237       return false;
7238   }
7239 
7240   return true;
7241 }
7242 
7243 bool
7244 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7245   using namespace llvm::AMDGPU::Swizzle;
7246 
7247   int64_t Lane[LANE_NUM];
7248   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7249                            "expected a 2-bit lane id")) {
7250     Imm = QUAD_PERM_ENC;
7251     for (unsigned I = 0; I < LANE_NUM; ++I) {
7252       Imm |= Lane[I] << (LANE_SHIFT * I);
7253     }
7254     return true;
7255   }
7256   return false;
7257 }
7258 
7259 bool
7260 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7261   using namespace llvm::AMDGPU::Swizzle;
7262 
7263   SMLoc Loc;
7264   int64_t GroupSize;
7265   int64_t LaneIdx;
7266 
7267   if (!parseSwizzleOperand(GroupSize,
7268                            2, 32,
7269                            "group size must be in the interval [2,32]",
7270                            Loc)) {
7271     return false;
7272   }
7273   if (!isPowerOf2_64(GroupSize)) {
7274     Error(Loc, "group size must be a power of two");
7275     return false;
7276   }
7277   if (parseSwizzleOperand(LaneIdx,
7278                           0, GroupSize - 1,
7279                           "lane id must be in the interval [0,group size - 1]",
7280                           Loc)) {
7281     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7282     return true;
7283   }
7284   return false;
7285 }
7286 
7287 bool
7288 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7289   using namespace llvm::AMDGPU::Swizzle;
7290 
7291   SMLoc Loc;
7292   int64_t GroupSize;
7293 
7294   if (!parseSwizzleOperand(GroupSize,
7295                            2, 32,
7296                            "group size must be in the interval [2,32]",
7297                            Loc)) {
7298     return false;
7299   }
7300   if (!isPowerOf2_64(GroupSize)) {
7301     Error(Loc, "group size must be a power of two");
7302     return false;
7303   }
7304 
7305   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7306   return true;
7307 }
7308 
7309 bool
7310 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7311   using namespace llvm::AMDGPU::Swizzle;
7312 
7313   SMLoc Loc;
7314   int64_t GroupSize;
7315 
7316   if (!parseSwizzleOperand(GroupSize,
7317                            1, 16,
7318                            "group size must be in the interval [1,16]",
7319                            Loc)) {
7320     return false;
7321   }
7322   if (!isPowerOf2_64(GroupSize)) {
7323     Error(Loc, "group size must be a power of two");
7324     return false;
7325   }
7326 
7327   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7328   return true;
7329 }
7330 
7331 bool
7332 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7333   using namespace llvm::AMDGPU::Swizzle;
7334 
7335   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7336     return false;
7337   }
7338 
7339   StringRef Ctl;
7340   SMLoc StrLoc = getLoc();
7341   if (!parseString(Ctl)) {
7342     return false;
7343   }
7344   if (Ctl.size() != BITMASK_WIDTH) {
7345     Error(StrLoc, "expected a 5-character mask");
7346     return false;
7347   }
7348 
7349   unsigned AndMask = 0;
7350   unsigned OrMask = 0;
7351   unsigned XorMask = 0;
7352 
7353   for (size_t i = 0; i < Ctl.size(); ++i) {
7354     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7355     switch(Ctl[i]) {
7356     default:
7357       Error(StrLoc, "invalid mask");
7358       return false;
7359     case '0':
7360       break;
7361     case '1':
7362       OrMask |= Mask;
7363       break;
7364     case 'p':
7365       AndMask |= Mask;
7366       break;
7367     case 'i':
7368       AndMask |= Mask;
7369       XorMask |= Mask;
7370       break;
7371     }
7372   }
7373 
7374   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7375   return true;
7376 }
7377 
7378 bool
7379 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7380 
7381   SMLoc OffsetLoc = getLoc();
7382 
7383   if (!parseExpr(Imm, "a swizzle macro")) {
7384     return false;
7385   }
7386   if (!isUInt<16>(Imm)) {
7387     Error(OffsetLoc, "expected a 16-bit offset");
7388     return false;
7389   }
7390   return true;
7391 }
7392 
7393 bool
7394 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7395   using namespace llvm::AMDGPU::Swizzle;
7396 
7397   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7398 
7399     SMLoc ModeLoc = getLoc();
7400     bool Ok = false;
7401 
7402     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7403       Ok = parseSwizzleQuadPerm(Imm);
7404     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7405       Ok = parseSwizzleBitmaskPerm(Imm);
7406     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7407       Ok = parseSwizzleBroadcast(Imm);
7408     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7409       Ok = parseSwizzleSwap(Imm);
7410     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7411       Ok = parseSwizzleReverse(Imm);
7412     } else {
7413       Error(ModeLoc, "expected a swizzle mode");
7414     }
7415 
7416     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7417   }
7418 
7419   return false;
7420 }
7421 
7422 OperandMatchResultTy
7423 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7424   SMLoc S = getLoc();
7425   int64_t Imm = 0;
7426 
7427   if (trySkipId("offset")) {
7428 
7429     bool Ok = false;
7430     if (skipToken(AsmToken::Colon, "expected a colon")) {
7431       if (trySkipId("swizzle")) {
7432         Ok = parseSwizzleMacro(Imm);
7433       } else {
7434         Ok = parseSwizzleOffset(Imm);
7435       }
7436     }
7437 
7438     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7439 
7440     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7441   } else {
7442     // Swizzle "offset" operand is optional.
7443     // If it is omitted, try parsing other optional operands.
7444     return parseOptionalOpr(Operands);
7445   }
7446 }
7447 
7448 bool
7449 AMDGPUOperand::isSwizzle() const {
7450   return isImmTy(ImmTySwizzle);
7451 }
7452 
7453 //===----------------------------------------------------------------------===//
7454 // VGPR Index Mode
7455 //===----------------------------------------------------------------------===//
7456 
7457 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7458 
7459   using namespace llvm::AMDGPU::VGPRIndexMode;
7460 
7461   if (trySkipToken(AsmToken::RParen)) {
7462     return OFF;
7463   }
7464 
7465   int64_t Imm = 0;
7466 
7467   while (true) {
7468     unsigned Mode = 0;
7469     SMLoc S = getLoc();
7470 
7471     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7472       if (trySkipId(IdSymbolic[ModeId])) {
7473         Mode = 1 << ModeId;
7474         break;
7475       }
7476     }
7477 
7478     if (Mode == 0) {
7479       Error(S, (Imm == 0)?
7480                "expected a VGPR index mode or a closing parenthesis" :
7481                "expected a VGPR index mode");
7482       return UNDEF;
7483     }
7484 
7485     if (Imm & Mode) {
7486       Error(S, "duplicate VGPR index mode");
7487       return UNDEF;
7488     }
7489     Imm |= Mode;
7490 
7491     if (trySkipToken(AsmToken::RParen))
7492       break;
7493     if (!skipToken(AsmToken::Comma,
7494                    "expected a comma or a closing parenthesis"))
7495       return UNDEF;
7496   }
7497 
7498   return Imm;
7499 }
7500 
7501 OperandMatchResultTy
7502 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7503 
7504   using namespace llvm::AMDGPU::VGPRIndexMode;
7505 
7506   int64_t Imm = 0;
7507   SMLoc S = getLoc();
7508 
7509   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7510     Imm = parseGPRIdxMacro();
7511     if (Imm == UNDEF)
7512       return MatchOperand_ParseFail;
7513   } else {
7514     if (getParser().parseAbsoluteExpression(Imm))
7515       return MatchOperand_ParseFail;
7516     if (Imm < 0 || !isUInt<4>(Imm)) {
7517       Error(S, "invalid immediate: only 4-bit values are legal");
7518       return MatchOperand_ParseFail;
7519     }
7520   }
7521 
7522   Operands.push_back(
7523       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7524   return MatchOperand_Success;
7525 }
7526 
7527 bool AMDGPUOperand::isGPRIdxMode() const {
7528   return isImmTy(ImmTyGprIdxMode);
7529 }
7530 
7531 //===----------------------------------------------------------------------===//
7532 // sopp branch targets
7533 //===----------------------------------------------------------------------===//
7534 
7535 OperandMatchResultTy
7536 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7537 
7538   // Make sure we are not parsing something
7539   // that looks like a label or an expression but is not.
7540   // This will improve error messages.
7541   if (isRegister() || isModifier())
7542     return MatchOperand_NoMatch;
7543 
7544   if (!parseExpr(Operands))
7545     return MatchOperand_ParseFail;
7546 
7547   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7548   assert(Opr.isImm() || Opr.isExpr());
7549   SMLoc Loc = Opr.getStartLoc();
7550 
7551   // Currently we do not support arbitrary expressions as branch targets.
7552   // Only labels and absolute expressions are accepted.
7553   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7554     Error(Loc, "expected an absolute expression or a label");
7555   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7556     Error(Loc, "expected a 16-bit signed jump offset");
7557   }
7558 
7559   return MatchOperand_Success;
7560 }
7561 
7562 //===----------------------------------------------------------------------===//
7563 // Boolean holding registers
7564 //===----------------------------------------------------------------------===//
7565 
7566 OperandMatchResultTy
7567 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7568   return parseReg(Operands);
7569 }
7570 
7571 //===----------------------------------------------------------------------===//
7572 // mubuf
7573 //===----------------------------------------------------------------------===//
7574 
7575 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7576   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7577 }
7578 
7579 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7580                                    const OperandVector &Operands,
7581                                    bool IsAtomic,
7582                                    bool IsLds) {
7583   OptionalImmIndexMap OptionalIdx;
7584   unsigned FirstOperandIdx = 1;
7585   bool IsAtomicReturn = false;
7586 
7587   if (IsAtomic) {
7588     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7589       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7590       if (!Op.isCPol())
7591         continue;
7592       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7593       break;
7594     }
7595 
7596     if (!IsAtomicReturn) {
7597       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7598       if (NewOpc != -1)
7599         Inst.setOpcode(NewOpc);
7600     }
7601 
7602     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7603                       SIInstrFlags::IsAtomicRet;
7604   }
7605 
7606   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7607     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7608 
7609     // Add the register arguments
7610     if (Op.isReg()) {
7611       Op.addRegOperands(Inst, 1);
7612       // Insert a tied src for atomic return dst.
7613       // This cannot be postponed as subsequent calls to
7614       // addImmOperands rely on correct number of MC operands.
7615       if (IsAtomicReturn && i == FirstOperandIdx)
7616         Op.addRegOperands(Inst, 1);
7617       continue;
7618     }
7619 
7620     // Handle the case where soffset is an immediate
7621     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7622       Op.addImmOperands(Inst, 1);
7623       continue;
7624     }
7625 
7626     // Handle tokens like 'offen' which are sometimes hard-coded into the
7627     // asm string.  There are no MCInst operands for these.
7628     if (Op.isToken()) {
7629       continue;
7630     }
7631     assert(Op.isImm());
7632 
7633     // Handle optional arguments
7634     OptionalIdx[Op.getImmTy()] = i;
7635   }
7636 
7637   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7638   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7639 
7640   if (!IsLds) { // tfe is not legal with lds opcodes
7641     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7642   }
7643   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7644 }
7645 
7646 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7647   OptionalImmIndexMap OptionalIdx;
7648 
7649   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7650     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7651 
7652     // Add the register arguments
7653     if (Op.isReg()) {
7654       Op.addRegOperands(Inst, 1);
7655       continue;
7656     }
7657 
7658     // Handle the case where soffset is an immediate
7659     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7660       Op.addImmOperands(Inst, 1);
7661       continue;
7662     }
7663 
7664     // Handle tokens like 'offen' which are sometimes hard-coded into the
7665     // asm string.  There are no MCInst operands for these.
7666     if (Op.isToken()) {
7667       continue;
7668     }
7669     assert(Op.isImm());
7670 
7671     // Handle optional arguments
7672     OptionalIdx[Op.getImmTy()] = i;
7673   }
7674 
7675   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7676                         AMDGPUOperand::ImmTyOffset);
7677   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7678   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7679   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7680   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7681 }
7682 
7683 //===----------------------------------------------------------------------===//
7684 // mimg
7685 //===----------------------------------------------------------------------===//
7686 
7687 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7688                               bool IsAtomic) {
7689   unsigned I = 1;
7690   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7691   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7692     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7693   }
7694 
7695   if (IsAtomic) {
7696     // Add src, same as dst
7697     assert(Desc.getNumDefs() == 1);
7698     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7699   }
7700 
7701   OptionalImmIndexMap OptionalIdx;
7702 
7703   for (unsigned E = Operands.size(); I != E; ++I) {
7704     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7705 
7706     // Add the register arguments
7707     if (Op.isReg()) {
7708       Op.addRegOperands(Inst, 1);
7709     } else if (Op.isImmModifier()) {
7710       OptionalIdx[Op.getImmTy()] = I;
7711     } else if (!Op.isToken()) {
7712       llvm_unreachable("unexpected operand type");
7713     }
7714   }
7715 
7716   bool IsGFX10Plus = isGFX10Plus();
7717 
7718   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7719   if (IsGFX10Plus)
7720     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7721   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7722   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7723   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7724   if (IsGFX10Plus)
7725     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7726   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7727     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7728   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7729   if (!IsGFX10Plus)
7730     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7731   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7732 }
7733 
7734 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7735   cvtMIMG(Inst, Operands, true);
7736 }
7737 
7738 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7739   OptionalImmIndexMap OptionalIdx;
7740   bool IsAtomicReturn = false;
7741 
7742   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7743     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7744     if (!Op.isCPol())
7745       continue;
7746     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7747     break;
7748   }
7749 
7750   if (!IsAtomicReturn) {
7751     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7752     if (NewOpc != -1)
7753       Inst.setOpcode(NewOpc);
7754   }
7755 
7756   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7757                     SIInstrFlags::IsAtomicRet;
7758 
7759   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7760     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7761 
7762     // Add the register arguments
7763     if (Op.isReg()) {
7764       Op.addRegOperands(Inst, 1);
7765       if (IsAtomicReturn && i == 1)
7766         Op.addRegOperands(Inst, 1);
7767       continue;
7768     }
7769 
7770     // Handle the case where soffset is an immediate
7771     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7772       Op.addImmOperands(Inst, 1);
7773       continue;
7774     }
7775 
7776     // Handle tokens like 'offen' which are sometimes hard-coded into the
7777     // asm string.  There are no MCInst operands for these.
7778     if (Op.isToken()) {
7779       continue;
7780     }
7781     assert(Op.isImm());
7782 
7783     // Handle optional arguments
7784     OptionalIdx[Op.getImmTy()] = i;
7785   }
7786 
7787   if ((int)Inst.getNumOperands() <=
7788       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7789     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7790   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7791 }
7792 
7793 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7794                                       const OperandVector &Operands) {
7795   for (unsigned I = 1; I < Operands.size(); ++I) {
7796     auto &Operand = (AMDGPUOperand &)*Operands[I];
7797     if (Operand.isReg())
7798       Operand.addRegOperands(Inst, 1);
7799   }
7800 
7801   Inst.addOperand(MCOperand::createImm(1)); // a16
7802 }
7803 
7804 //===----------------------------------------------------------------------===//
7805 // smrd
7806 //===----------------------------------------------------------------------===//
7807 
7808 bool AMDGPUOperand::isSMRDOffset8() const {
7809   return isImm() && isUInt<8>(getImm());
7810 }
7811 
7812 bool AMDGPUOperand::isSMEMOffset() const {
7813   return isImmTy(ImmTyNone) ||
7814          isImmTy(ImmTyOffset); // Offset range is checked later by validator.
7815 }
7816 
7817 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7818   // 32-bit literals are only supported on CI and we only want to use them
7819   // when the offset is > 8-bits.
7820   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7821 }
7822 
7823 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7824   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7825 }
7826 
7827 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7828   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7829 }
7830 
7831 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7832   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7833 }
7834 
7835 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7836   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7837 }
7838 
7839 //===----------------------------------------------------------------------===//
7840 // vop3
7841 //===----------------------------------------------------------------------===//
7842 
7843 static bool ConvertOmodMul(int64_t &Mul) {
7844   if (Mul != 1 && Mul != 2 && Mul != 4)
7845     return false;
7846 
7847   Mul >>= 1;
7848   return true;
7849 }
7850 
7851 static bool ConvertOmodDiv(int64_t &Div) {
7852   if (Div == 1) {
7853     Div = 0;
7854     return true;
7855   }
7856 
7857   if (Div == 2) {
7858     Div = 3;
7859     return true;
7860   }
7861 
7862   return false;
7863 }
7864 
7865 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7866 // This is intentional and ensures compatibility with sp3.
7867 // See bug 35397 for details.
7868 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7869   if (BoundCtrl == 0 || BoundCtrl == 1) {
7870     BoundCtrl = 1;
7871     return true;
7872   }
7873   return false;
7874 }
7875 
7876 // Note: the order in this table matches the order of operands in AsmString.
7877 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7878   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7879   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7880   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7881   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7882   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7883   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7884   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7885   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7886   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7887   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7888   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7889   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7890   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7891   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7892   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7893   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7894   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7895   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7896   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7897   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7898   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7899   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7900   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7901   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7902   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7903   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7904   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7905   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7906   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7907   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7908   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7909   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7910   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7911   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7912   {"dpp8",     AMDGPUOperand::ImmTyDPP8, false, nullptr},
7913   {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr},
7914   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7915   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7916   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7917   {"fi",   AMDGPUOperand::ImmTyDppFi, false, nullptr},
7918   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7919   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7920   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr},
7921   {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr},
7922   {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr}
7923 };
7924 
7925 void AMDGPUAsmParser::onBeginOfFile() {
7926   if (!getParser().getStreamer().getTargetStreamer() ||
7927       getSTI().getTargetTriple().getArch() == Triple::r600)
7928     return;
7929 
7930   if (!getTargetStreamer().getTargetID())
7931     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7932 
7933   if (isHsaAbiVersion3AndAbove(&getSTI()))
7934     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7935 }
7936 
7937 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7938 
7939   OperandMatchResultTy res = parseOptionalOpr(Operands);
7940 
7941   // This is a hack to enable hardcoded mandatory operands which follow
7942   // optional operands.
7943   //
7944   // Current design assumes that all operands after the first optional operand
7945   // are also optional. However implementation of some instructions violates
7946   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7947   //
7948   // To alleviate this problem, we have to (implicitly) parse extra operands
7949   // to make sure autogenerated parser of custom operands never hit hardcoded
7950   // mandatory operands.
7951 
7952   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7953     if (res != MatchOperand_Success ||
7954         isToken(AsmToken::EndOfStatement))
7955       break;
7956 
7957     trySkipToken(AsmToken::Comma);
7958     res = parseOptionalOpr(Operands);
7959   }
7960 
7961   return res;
7962 }
7963 
7964 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7965   OperandMatchResultTy res;
7966   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7967     // try to parse any optional operand here
7968     if (Op.IsBit) {
7969       res = parseNamedBit(Op.Name, Operands, Op.Type);
7970     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7971       res = parseOModOperand(Operands);
7972     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7973                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7974                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7975       res = parseSDWASel(Operands, Op.Name, Op.Type);
7976     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7977       res = parseSDWADstUnused(Operands);
7978     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7979                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7980                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7981                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7982       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7983                                         Op.ConvertResult);
7984     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7985       res = parseDim(Operands);
7986     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7987       res = parseCPol(Operands);
7988     } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) {
7989       res = parseDPP8(Operands);
7990     } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) {
7991       res = parseDPPCtrl(Operands);
7992     } else {
7993       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7994       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
7995         res = parseOperandArrayWithPrefix("neg", Operands,
7996                                           AMDGPUOperand::ImmTyBLGP,
7997                                           nullptr);
7998       }
7999     }
8000     if (res != MatchOperand_NoMatch) {
8001       return res;
8002     }
8003   }
8004   return MatchOperand_NoMatch;
8005 }
8006 
8007 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
8008   StringRef Name = getTokenStr();
8009   if (Name == "mul") {
8010     return parseIntWithPrefix("mul", Operands,
8011                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8012   }
8013 
8014   if (Name == "div") {
8015     return parseIntWithPrefix("div", Operands,
8016                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8017   }
8018 
8019   return MatchOperand_NoMatch;
8020 }
8021 
8022 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
8023   cvtVOP3P(Inst, Operands);
8024 
8025   int Opc = Inst.getOpcode();
8026 
8027   int SrcNum;
8028   const int Ops[] = { AMDGPU::OpName::src0,
8029                       AMDGPU::OpName::src1,
8030                       AMDGPU::OpName::src2 };
8031   for (SrcNum = 0;
8032        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
8033        ++SrcNum);
8034   assert(SrcNum > 0);
8035 
8036   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8037   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8038 
8039   if ((OpSel & (1 << SrcNum)) != 0) {
8040     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8041     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8042     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
8043   }
8044 }
8045 
8046 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8047       // 1. This operand is input modifiers
8048   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8049       // 2. This is not last operand
8050       && Desc.NumOperands > (OpNum + 1)
8051       // 3. Next operand is register class
8052       && Desc.OpInfo[OpNum + 1].RegClass != -1
8053       // 4. Next register is not tied to any other operand
8054       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
8055 }
8056 
8057 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8058 {
8059   OptionalImmIndexMap OptionalIdx;
8060   unsigned Opc = Inst.getOpcode();
8061 
8062   unsigned I = 1;
8063   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8064   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8065     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8066   }
8067 
8068   for (unsigned E = Operands.size(); I != E; ++I) {
8069     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8070     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8071       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8072     } else if (Op.isInterpSlot() ||
8073                Op.isInterpAttr() ||
8074                Op.isAttrChan()) {
8075       Inst.addOperand(MCOperand::createImm(Op.getImm()));
8076     } else if (Op.isImmModifier()) {
8077       OptionalIdx[Op.getImmTy()] = I;
8078     } else {
8079       llvm_unreachable("unhandled operand type");
8080     }
8081   }
8082 
8083   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
8084     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
8085   }
8086 
8087   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8088     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8089   }
8090 
8091   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8092     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8093   }
8094 }
8095 
8096 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8097 {
8098   OptionalImmIndexMap OptionalIdx;
8099   unsigned Opc = Inst.getOpcode();
8100 
8101   unsigned I = 1;
8102   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8103   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8104     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8105   }
8106 
8107   for (unsigned E = Operands.size(); I != E; ++I) {
8108     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8109     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8110       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8111     } else if (Op.isImmModifier()) {
8112       OptionalIdx[Op.getImmTy()] = I;
8113     } else {
8114       llvm_unreachable("unhandled operand type");
8115     }
8116   }
8117 
8118   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8119 
8120   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8121   if (OpSelIdx != -1)
8122     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8123 
8124   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8125 
8126   if (OpSelIdx == -1)
8127     return;
8128 
8129   const int Ops[] = { AMDGPU::OpName::src0,
8130                       AMDGPU::OpName::src1,
8131                       AMDGPU::OpName::src2 };
8132   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8133                          AMDGPU::OpName::src1_modifiers,
8134                          AMDGPU::OpName::src2_modifiers };
8135 
8136   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8137 
8138   for (int J = 0; J < 3; ++J) {
8139     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8140     if (OpIdx == -1)
8141       break;
8142 
8143     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8144     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8145 
8146     if ((OpSel & (1 << J)) != 0)
8147       ModVal |= SISrcMods::OP_SEL_0;
8148     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8149         (OpSel & (1 << 3)) != 0)
8150       ModVal |= SISrcMods::DST_OP_SEL;
8151 
8152     Inst.getOperand(ModIdx).setImm(ModVal);
8153   }
8154 }
8155 
8156 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8157                               OptionalImmIndexMap &OptionalIdx) {
8158   unsigned Opc = Inst.getOpcode();
8159 
8160   unsigned I = 1;
8161   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8162   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8163     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8164   }
8165 
8166   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
8167     // This instruction has src modifiers
8168     for (unsigned E = Operands.size(); I != E; ++I) {
8169       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8170       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8171         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8172       } else if (Op.isImmModifier()) {
8173         OptionalIdx[Op.getImmTy()] = I;
8174       } else if (Op.isRegOrImm()) {
8175         Op.addRegOrImmOperands(Inst, 1);
8176       } else {
8177         llvm_unreachable("unhandled operand type");
8178       }
8179     }
8180   } else {
8181     // No src modifiers
8182     for (unsigned E = Operands.size(); I != E; ++I) {
8183       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8184       if (Op.isMod()) {
8185         OptionalIdx[Op.getImmTy()] = I;
8186       } else {
8187         Op.addRegOrImmOperands(Inst, 1);
8188       }
8189     }
8190   }
8191 
8192   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8193     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8194   }
8195 
8196   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8197     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8198   }
8199 
8200   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8201   // it has src2 register operand that is tied to dst operand
8202   // we don't allow modifiers for this operand in assembler so src2_modifiers
8203   // should be 0.
8204   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
8205       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
8206       Opc == AMDGPU::V_MAC_F32_e64_vi ||
8207       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
8208       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
8209       Opc == AMDGPU::V_MAC_F16_e64_vi ||
8210       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
8211       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
8212       Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
8213       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
8214       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
8215       Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
8216       Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
8217       Opc == AMDGPU::V_FMAC_F16_e64_gfx11) {
8218     auto it = Inst.begin();
8219     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8220     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8221     ++it;
8222     // Copy the operand to ensure it's not invalidated when Inst grows.
8223     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8224   }
8225 }
8226 
8227 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8228   OptionalImmIndexMap OptionalIdx;
8229   cvtVOP3(Inst, Operands, OptionalIdx);
8230 }
8231 
8232 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8233                                OptionalImmIndexMap &OptIdx) {
8234   const int Opc = Inst.getOpcode();
8235   const MCInstrDesc &Desc = MII.get(Opc);
8236 
8237   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8238 
8239   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
8240     assert(!IsPacked);
8241     Inst.addOperand(Inst.getOperand(0));
8242   }
8243 
8244   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8245   // instruction, and then figure out where to actually put the modifiers
8246 
8247   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8248   if (OpSelIdx != -1) {
8249     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8250   }
8251 
8252   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8253   if (OpSelHiIdx != -1) {
8254     int DefaultVal = IsPacked ? -1 : 0;
8255     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8256                           DefaultVal);
8257   }
8258 
8259   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8260   if (NegLoIdx != -1) {
8261     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8262     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8263   }
8264 
8265   const int Ops[] = { AMDGPU::OpName::src0,
8266                       AMDGPU::OpName::src1,
8267                       AMDGPU::OpName::src2 };
8268   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8269                          AMDGPU::OpName::src1_modifiers,
8270                          AMDGPU::OpName::src2_modifiers };
8271 
8272   unsigned OpSel = 0;
8273   unsigned OpSelHi = 0;
8274   unsigned NegLo = 0;
8275   unsigned NegHi = 0;
8276 
8277   if (OpSelIdx != -1)
8278     OpSel = Inst.getOperand(OpSelIdx).getImm();
8279 
8280   if (OpSelHiIdx != -1)
8281     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8282 
8283   if (NegLoIdx != -1) {
8284     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8285     NegLo = Inst.getOperand(NegLoIdx).getImm();
8286     NegHi = Inst.getOperand(NegHiIdx).getImm();
8287   }
8288 
8289   for (int J = 0; J < 3; ++J) {
8290     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8291     if (OpIdx == -1)
8292       break;
8293 
8294     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8295 
8296     if (ModIdx == -1)
8297       continue;
8298 
8299     uint32_t ModVal = 0;
8300 
8301     if ((OpSel & (1 << J)) != 0)
8302       ModVal |= SISrcMods::OP_SEL_0;
8303 
8304     if ((OpSelHi & (1 << J)) != 0)
8305       ModVal |= SISrcMods::OP_SEL_1;
8306 
8307     if ((NegLo & (1 << J)) != 0)
8308       ModVal |= SISrcMods::NEG;
8309 
8310     if ((NegHi & (1 << J)) != 0)
8311       ModVal |= SISrcMods::NEG_HI;
8312 
8313     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8314   }
8315 }
8316 
8317 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8318   OptionalImmIndexMap OptIdx;
8319   cvtVOP3(Inst, Operands, OptIdx);
8320   cvtVOP3P(Inst, Operands, OptIdx);
8321 }
8322 
8323 //===----------------------------------------------------------------------===//
8324 // dpp
8325 //===----------------------------------------------------------------------===//
8326 
8327 bool AMDGPUOperand::isDPP8() const {
8328   return isImmTy(ImmTyDPP8);
8329 }
8330 
8331 bool AMDGPUOperand::isDPPCtrl() const {
8332   using namespace AMDGPU::DPP;
8333 
8334   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8335   if (result) {
8336     int64_t Imm = getImm();
8337     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8338            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8339            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8340            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8341            (Imm == DppCtrl::WAVE_SHL1) ||
8342            (Imm == DppCtrl::WAVE_ROL1) ||
8343            (Imm == DppCtrl::WAVE_SHR1) ||
8344            (Imm == DppCtrl::WAVE_ROR1) ||
8345            (Imm == DppCtrl::ROW_MIRROR) ||
8346            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8347            (Imm == DppCtrl::BCAST15) ||
8348            (Imm == DppCtrl::BCAST31) ||
8349            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8350            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8351   }
8352   return false;
8353 }
8354 
8355 //===----------------------------------------------------------------------===//
8356 // mAI
8357 //===----------------------------------------------------------------------===//
8358 
8359 bool AMDGPUOperand::isBLGP() const {
8360   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8361 }
8362 
8363 bool AMDGPUOperand::isCBSZ() const {
8364   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8365 }
8366 
8367 bool AMDGPUOperand::isABID() const {
8368   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8369 }
8370 
8371 bool AMDGPUOperand::isS16Imm() const {
8372   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8373 }
8374 
8375 bool AMDGPUOperand::isU16Imm() const {
8376   return isImm() && isUInt<16>(getImm());
8377 }
8378 
8379 //===----------------------------------------------------------------------===//
8380 // dim
8381 //===----------------------------------------------------------------------===//
8382 
8383 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8384   // We want to allow "dim:1D" etc.,
8385   // but the initial 1 is tokenized as an integer.
8386   std::string Token;
8387   if (isToken(AsmToken::Integer)) {
8388     SMLoc Loc = getToken().getEndLoc();
8389     Token = std::string(getTokenStr());
8390     lex();
8391     if (getLoc() != Loc)
8392       return false;
8393   }
8394 
8395   StringRef Suffix;
8396   if (!parseId(Suffix))
8397     return false;
8398   Token += Suffix;
8399 
8400   StringRef DimId = Token;
8401   if (DimId.startswith("SQ_RSRC_IMG_"))
8402     DimId = DimId.drop_front(12);
8403 
8404   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8405   if (!DimInfo)
8406     return false;
8407 
8408   Encoding = DimInfo->Encoding;
8409   return true;
8410 }
8411 
8412 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8413   if (!isGFX10Plus())
8414     return MatchOperand_NoMatch;
8415 
8416   SMLoc S = getLoc();
8417 
8418   if (!trySkipId("dim", AsmToken::Colon))
8419     return MatchOperand_NoMatch;
8420 
8421   unsigned Encoding;
8422   SMLoc Loc = getLoc();
8423   if (!parseDimId(Encoding)) {
8424     Error(Loc, "invalid dim value");
8425     return MatchOperand_ParseFail;
8426   }
8427 
8428   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8429                                               AMDGPUOperand::ImmTyDim));
8430   return MatchOperand_Success;
8431 }
8432 
8433 //===----------------------------------------------------------------------===//
8434 // dpp
8435 //===----------------------------------------------------------------------===//
8436 
8437 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8438   SMLoc S = getLoc();
8439 
8440   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8441     return MatchOperand_NoMatch;
8442 
8443   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8444 
8445   int64_t Sels[8];
8446 
8447   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8448     return MatchOperand_ParseFail;
8449 
8450   for (size_t i = 0; i < 8; ++i) {
8451     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8452       return MatchOperand_ParseFail;
8453 
8454     SMLoc Loc = getLoc();
8455     if (getParser().parseAbsoluteExpression(Sels[i]))
8456       return MatchOperand_ParseFail;
8457     if (0 > Sels[i] || 7 < Sels[i]) {
8458       Error(Loc, "expected a 3-bit value");
8459       return MatchOperand_ParseFail;
8460     }
8461   }
8462 
8463   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8464     return MatchOperand_ParseFail;
8465 
8466   unsigned DPP8 = 0;
8467   for (size_t i = 0; i < 8; ++i)
8468     DPP8 |= (Sels[i] << (i * 3));
8469 
8470   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8471   return MatchOperand_Success;
8472 }
8473 
8474 bool
8475 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8476                                     const OperandVector &Operands) {
8477   if (Ctrl == "row_newbcast")
8478     return isGFX90A();
8479 
8480   if (Ctrl == "row_share" ||
8481       Ctrl == "row_xmask")
8482     return isGFX10Plus();
8483 
8484   if (Ctrl == "wave_shl" ||
8485       Ctrl == "wave_shr" ||
8486       Ctrl == "wave_rol" ||
8487       Ctrl == "wave_ror" ||
8488       Ctrl == "row_bcast")
8489     return isVI() || isGFX9();
8490 
8491   return Ctrl == "row_mirror" ||
8492          Ctrl == "row_half_mirror" ||
8493          Ctrl == "quad_perm" ||
8494          Ctrl == "row_shl" ||
8495          Ctrl == "row_shr" ||
8496          Ctrl == "row_ror";
8497 }
8498 
8499 int64_t
8500 AMDGPUAsmParser::parseDPPCtrlPerm() {
8501   // quad_perm:[%d,%d,%d,%d]
8502 
8503   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8504     return -1;
8505 
8506   int64_t Val = 0;
8507   for (int i = 0; i < 4; ++i) {
8508     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8509       return -1;
8510 
8511     int64_t Temp;
8512     SMLoc Loc = getLoc();
8513     if (getParser().parseAbsoluteExpression(Temp))
8514       return -1;
8515     if (Temp < 0 || Temp > 3) {
8516       Error(Loc, "expected a 2-bit value");
8517       return -1;
8518     }
8519 
8520     Val += (Temp << i * 2);
8521   }
8522 
8523   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8524     return -1;
8525 
8526   return Val;
8527 }
8528 
8529 int64_t
8530 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8531   using namespace AMDGPU::DPP;
8532 
8533   // sel:%d
8534 
8535   int64_t Val;
8536   SMLoc Loc = getLoc();
8537 
8538   if (getParser().parseAbsoluteExpression(Val))
8539     return -1;
8540 
8541   struct DppCtrlCheck {
8542     int64_t Ctrl;
8543     int Lo;
8544     int Hi;
8545   };
8546 
8547   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8548     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8549     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8550     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8551     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8552     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8553     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8554     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8555     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8556     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8557     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8558     .Default({-1, 0, 0});
8559 
8560   bool Valid;
8561   if (Check.Ctrl == -1) {
8562     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8563     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8564   } else {
8565     Valid = Check.Lo <= Val && Val <= Check.Hi;
8566     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8567   }
8568 
8569   if (!Valid) {
8570     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8571     return -1;
8572   }
8573 
8574   return Val;
8575 }
8576 
8577 OperandMatchResultTy
8578 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8579   using namespace AMDGPU::DPP;
8580 
8581   if (!isToken(AsmToken::Identifier) ||
8582       !isSupportedDPPCtrl(getTokenStr(), Operands))
8583     return MatchOperand_NoMatch;
8584 
8585   SMLoc S = getLoc();
8586   int64_t Val = -1;
8587   StringRef Ctrl;
8588 
8589   parseId(Ctrl);
8590 
8591   if (Ctrl == "row_mirror") {
8592     Val = DppCtrl::ROW_MIRROR;
8593   } else if (Ctrl == "row_half_mirror") {
8594     Val = DppCtrl::ROW_HALF_MIRROR;
8595   } else {
8596     if (skipToken(AsmToken::Colon, "expected a colon")) {
8597       if (Ctrl == "quad_perm") {
8598         Val = parseDPPCtrlPerm();
8599       } else {
8600         Val = parseDPPCtrlSel(Ctrl);
8601       }
8602     }
8603   }
8604 
8605   if (Val == -1)
8606     return MatchOperand_ParseFail;
8607 
8608   Operands.push_back(
8609     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8610   return MatchOperand_Success;
8611 }
8612 
8613 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8614   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8615 }
8616 
8617 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8618   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8619 }
8620 
8621 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8622   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8623 }
8624 
8625 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8626   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8627 }
8628 
8629 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8630   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8631 }
8632 
8633 // Add dummy $old operand
8634 void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst,
8635                                         const OperandVector &Operands,
8636                                         bool IsDPP8) {
8637   Inst.addOperand(MCOperand::createReg(0));
8638   cvtVOP3DPP(Inst, Operands, IsDPP8);
8639 }
8640 
8641 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8642   OptionalImmIndexMap OptionalIdx;
8643   unsigned Opc = Inst.getOpcode();
8644   bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8645   unsigned I = 1;
8646   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8647   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8648     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8649   }
8650 
8651   int Fi = 0;
8652   for (unsigned E = Operands.size(); I != E; ++I) {
8653     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8654                                             MCOI::TIED_TO);
8655     if (TiedTo != -1) {
8656       assert((unsigned)TiedTo < Inst.getNumOperands());
8657       // handle tied old or src2 for MAC instructions
8658       Inst.addOperand(Inst.getOperand(TiedTo));
8659     }
8660     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8661     // Add the register arguments
8662     if (IsDPP8 && Op.isFI()) {
8663       Fi = Op.getImm();
8664     } else if (HasModifiers &&
8665                isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8666       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8667     } else if (Op.isReg()) {
8668       Op.addRegOperands(Inst, 1);
8669     } else if (Op.isImm() &&
8670                Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) {
8671       assert(!HasModifiers && "Case should be unreachable with modifiers");
8672       assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
8673       Op.addImmOperands(Inst, 1);
8674     } else if (Op.isImm()) {
8675       OptionalIdx[Op.getImmTy()] = I;
8676     } else {
8677       llvm_unreachable("unhandled operand type");
8678     }
8679   }
8680   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8681     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8682   }
8683   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8684     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8685   }
8686   if (Desc.TSFlags & SIInstrFlags::VOP3P)
8687     cvtVOP3P(Inst, Operands, OptionalIdx);
8688   else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) {
8689     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8690   }
8691 
8692   if (IsDPP8) {
8693     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
8694     using namespace llvm::AMDGPU::DPP;
8695     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8696   } else {
8697     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
8698     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8699     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8700     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8701     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8702       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8703     }
8704   }
8705 }
8706 
8707 // Add dummy $old operand
8708 void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst,
8709                                       const OperandVector &Operands,
8710                                       bool IsDPP8) {
8711   Inst.addOperand(MCOperand::createReg(0));
8712   cvtDPP(Inst, Operands, IsDPP8);
8713 }
8714 
8715 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8716   OptionalImmIndexMap OptionalIdx;
8717 
8718   unsigned Opc = Inst.getOpcode();
8719   bool HasModifiers =
8720       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8721   unsigned I = 1;
8722   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8723   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8724     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8725   }
8726 
8727   int Fi = 0;
8728   for (unsigned E = Operands.size(); I != E; ++I) {
8729     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8730                                             MCOI::TIED_TO);
8731     if (TiedTo != -1) {
8732       assert((unsigned)TiedTo < Inst.getNumOperands());
8733       // handle tied old or src2 for MAC instructions
8734       Inst.addOperand(Inst.getOperand(TiedTo));
8735     }
8736     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8737     // Add the register arguments
8738     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8739       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8740       // Skip it.
8741       continue;
8742     }
8743 
8744     if (IsDPP8) {
8745       if (Op.isDPP8()) {
8746         Op.addImmOperands(Inst, 1);
8747       } else if (HasModifiers &&
8748                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8749         Op.addRegWithFPInputModsOperands(Inst, 2);
8750       } else if (Op.isFI()) {
8751         Fi = Op.getImm();
8752       } else if (Op.isReg()) {
8753         Op.addRegOperands(Inst, 1);
8754       } else {
8755         llvm_unreachable("Invalid operand type");
8756       }
8757     } else {
8758       if (HasModifiers &&
8759           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8760         Op.addRegWithFPInputModsOperands(Inst, 2);
8761       } else if (Op.isReg()) {
8762         Op.addRegOperands(Inst, 1);
8763       } else if (Op.isDPPCtrl()) {
8764         Op.addImmOperands(Inst, 1);
8765       } else if (Op.isImm()) {
8766         // Handle optional arguments
8767         OptionalIdx[Op.getImmTy()] = I;
8768       } else {
8769         llvm_unreachable("Invalid operand type");
8770       }
8771     }
8772   }
8773 
8774   if (IsDPP8) {
8775     using namespace llvm::AMDGPU::DPP;
8776     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8777   } else {
8778     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8779     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8780     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8781     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8782       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8783     }
8784   }
8785 }
8786 
8787 //===----------------------------------------------------------------------===//
8788 // sdwa
8789 //===----------------------------------------------------------------------===//
8790 
8791 OperandMatchResultTy
8792 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8793                               AMDGPUOperand::ImmTy Type) {
8794   using namespace llvm::AMDGPU::SDWA;
8795 
8796   SMLoc S = getLoc();
8797   StringRef Value;
8798   OperandMatchResultTy res;
8799 
8800   SMLoc StringLoc;
8801   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8802   if (res != MatchOperand_Success) {
8803     return res;
8804   }
8805 
8806   int64_t Int;
8807   Int = StringSwitch<int64_t>(Value)
8808         .Case("BYTE_0", SdwaSel::BYTE_0)
8809         .Case("BYTE_1", SdwaSel::BYTE_1)
8810         .Case("BYTE_2", SdwaSel::BYTE_2)
8811         .Case("BYTE_3", SdwaSel::BYTE_3)
8812         .Case("WORD_0", SdwaSel::WORD_0)
8813         .Case("WORD_1", SdwaSel::WORD_1)
8814         .Case("DWORD", SdwaSel::DWORD)
8815         .Default(0xffffffff);
8816 
8817   if (Int == 0xffffffff) {
8818     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8819     return MatchOperand_ParseFail;
8820   }
8821 
8822   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8823   return MatchOperand_Success;
8824 }
8825 
8826 OperandMatchResultTy
8827 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8828   using namespace llvm::AMDGPU::SDWA;
8829 
8830   SMLoc S = getLoc();
8831   StringRef Value;
8832   OperandMatchResultTy res;
8833 
8834   SMLoc StringLoc;
8835   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8836   if (res != MatchOperand_Success) {
8837     return res;
8838   }
8839 
8840   int64_t Int;
8841   Int = StringSwitch<int64_t>(Value)
8842         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8843         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8844         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8845         .Default(0xffffffff);
8846 
8847   if (Int == 0xffffffff) {
8848     Error(StringLoc, "invalid dst_unused value");
8849     return MatchOperand_ParseFail;
8850   }
8851 
8852   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8853   return MatchOperand_Success;
8854 }
8855 
8856 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8857   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8858 }
8859 
8860 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8861   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8862 }
8863 
8864 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8865   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8866 }
8867 
8868 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8869   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8870 }
8871 
8872 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8873   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8874 }
8875 
8876 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8877                               uint64_t BasicInstType,
8878                               bool SkipDstVcc,
8879                               bool SkipSrcVcc) {
8880   using namespace llvm::AMDGPU::SDWA;
8881 
8882   OptionalImmIndexMap OptionalIdx;
8883   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8884   bool SkippedVcc = false;
8885 
8886   unsigned I = 1;
8887   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8888   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8889     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8890   }
8891 
8892   for (unsigned E = Operands.size(); I != E; ++I) {
8893     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8894     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8895         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8896       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8897       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8898       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8899       // Skip VCC only if we didn't skip it on previous iteration.
8900       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8901       if (BasicInstType == SIInstrFlags::VOP2 &&
8902           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8903            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8904         SkippedVcc = true;
8905         continue;
8906       } else if (BasicInstType == SIInstrFlags::VOPC &&
8907                  Inst.getNumOperands() == 0) {
8908         SkippedVcc = true;
8909         continue;
8910       }
8911     }
8912     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8913       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8914     } else if (Op.isImm()) {
8915       // Handle optional arguments
8916       OptionalIdx[Op.getImmTy()] = I;
8917     } else {
8918       llvm_unreachable("Invalid operand type");
8919     }
8920     SkippedVcc = false;
8921   }
8922 
8923   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8924       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8925       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8926     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8927     switch (BasicInstType) {
8928     case SIInstrFlags::VOP1:
8929       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8930       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8931         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8932       }
8933       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8934       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8935       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8936       break;
8937 
8938     case SIInstrFlags::VOP2:
8939       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8940       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8941         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8942       }
8943       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8944       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8945       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8946       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8947       break;
8948 
8949     case SIInstrFlags::VOPC:
8950       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8951         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8952       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8953       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8954       break;
8955 
8956     default:
8957       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8958     }
8959   }
8960 
8961   // special case v_mac_{f16, f32}:
8962   // it has src2 register operand that is tied to dst operand
8963   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8964       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8965     auto it = Inst.begin();
8966     std::advance(
8967       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8968     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8969   }
8970 }
8971 
8972 //===----------------------------------------------------------------------===//
8973 // mAI
8974 //===----------------------------------------------------------------------===//
8975 
8976 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8977   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8978 }
8979 
8980 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8981   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8982 }
8983 
8984 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8985   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8986 }
8987 
8988 /// Force static initialization.
8989 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8990   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8991   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8992 }
8993 
8994 #define GET_REGISTER_MATCHER
8995 #define GET_MATCHER_IMPLEMENTATION
8996 #define GET_MNEMONIC_SPELL_CHECKER
8997 #define GET_MNEMONIC_CHECKER
8998 #include "AMDGPUGenAsmMatcher.inc"
8999 
9000 // This function should be defined after auto-generated include so that we have
9001 // MatchClassKind enum defined
9002 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9003                                                      unsigned Kind) {
9004   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9005   // But MatchInstructionImpl() expects to meet token and fails to validate
9006   // operand. This method checks if we are given immediate operand but expect to
9007   // get corresponding token.
9008   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9009   switch (Kind) {
9010   case MCK_addr64:
9011     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9012   case MCK_gds:
9013     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9014   case MCK_lds:
9015     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9016   case MCK_idxen:
9017     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9018   case MCK_offen:
9019     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9020   case MCK_SSrcB32:
9021     // When operands have expression values, they will return true for isToken,
9022     // because it is not possible to distinguish between a token and an
9023     // expression at parse time. MatchInstructionImpl() will always try to
9024     // match an operand as a token, when isToken returns true, and when the
9025     // name of the expression is not a valid token, the match will fail,
9026     // so we need to handle it here.
9027     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
9028   case MCK_SSrcF32:
9029     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
9030   case MCK_SoppBrTarget:
9031     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
9032   case MCK_VReg32OrOff:
9033     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9034   case MCK_InterpSlot:
9035     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9036   case MCK_Attr:
9037     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9038   case MCK_AttrChan:
9039     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
9040   case MCK_ImmSMEMOffset:
9041     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
9042   case MCK_SReg_64:
9043   case MCK_SReg_64_XEXEC:
9044     // Null is defined as a 32-bit register but
9045     // it should also be enabled with 64-bit operands.
9046     // The following code enables it for SReg_64 operands
9047     // used as source and destination. Remaining source
9048     // operands are handled in isInlinableImm.
9049     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9050   default:
9051     return Match_InvalidOperand;
9052   }
9053 }
9054 
9055 //===----------------------------------------------------------------------===//
9056 // endpgm
9057 //===----------------------------------------------------------------------===//
9058 
9059 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
9060   SMLoc S = getLoc();
9061   int64_t Imm = 0;
9062 
9063   if (!parseExpr(Imm)) {
9064     // The operand is optional, if not present default to 0
9065     Imm = 0;
9066   }
9067 
9068   if (!isUInt<16>(Imm)) {
9069     Error(S, "expected a 16-bit value");
9070     return MatchOperand_ParseFail;
9071   }
9072 
9073   Operands.push_back(
9074       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9075   return MatchOperand_Success;
9076 }
9077 
9078 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9079 
9080 //===----------------------------------------------------------------------===//
9081 // LDSDIR
9082 //===----------------------------------------------------------------------===//
9083 
9084 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const {
9085   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST);
9086 }
9087 
9088 bool AMDGPUOperand::isWaitVDST() const {
9089   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9090 }
9091 
9092 //===----------------------------------------------------------------------===//
9093 // VINTERP
9094 //===----------------------------------------------------------------------===//
9095 
9096 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const {
9097   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP);
9098 }
9099 
9100 bool AMDGPUOperand::isWaitEXP() const {
9101   return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9102 }
9103