1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/TargetParser.h"
40 
41 using namespace llvm;
42 using namespace llvm::AMDGPU;
43 using namespace llvm::amdhsa;
44 
45 namespace {
46 
47 class AMDGPUAsmParser;
48 
49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
50 
51 //===----------------------------------------------------------------------===//
52 // Operand
53 //===----------------------------------------------------------------------===//
54 
55 class AMDGPUOperand : public MCParsedAsmOperand {
56   enum KindTy {
57     Token,
58     Immediate,
59     Register,
60     Expression
61   } Kind;
62 
63   SMLoc StartLoc, EndLoc;
64   const AMDGPUAsmParser *AsmParser;
65 
66 public:
67   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
68       : Kind(Kind_), AsmParser(AsmParser_) {}
69 
70   using Ptr = std::unique_ptr<AMDGPUOperand>;
71 
72   struct Modifiers {
73     bool Abs = false;
74     bool Neg = false;
75     bool Sext = false;
76 
77     bool hasFPModifiers() const { return Abs || Neg; }
78     bool hasIntModifiers() const { return Sext; }
79     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
80 
81     int64_t getFPModifiersOperand() const {
82       int64_t Operand = 0;
83       Operand |= Abs ? SISrcMods::ABS : 0u;
84       Operand |= Neg ? SISrcMods::NEG : 0u;
85       return Operand;
86     }
87 
88     int64_t getIntModifiersOperand() const {
89       int64_t Operand = 0;
90       Operand |= Sext ? SISrcMods::SEXT : 0u;
91       return Operand;
92     }
93 
94     int64_t getModifiersOperand() const {
95       assert(!(hasFPModifiers() && hasIntModifiers())
96            && "fp and int modifiers should not be used simultaneously");
97       if (hasFPModifiers()) {
98         return getFPModifiersOperand();
99       } else if (hasIntModifiers()) {
100         return getIntModifiersOperand();
101       } else {
102         return 0;
103       }
104     }
105 
106     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
107   };
108 
109   enum ImmTy {
110     ImmTyNone,
111     ImmTyGDS,
112     ImmTyLDS,
113     ImmTyOffen,
114     ImmTyIdxen,
115     ImmTyAddr64,
116     ImmTyOffset,
117     ImmTyInstOffset,
118     ImmTyOffset0,
119     ImmTyOffset1,
120     ImmTyCPol,
121     ImmTySWZ,
122     ImmTyTFE,
123     ImmTyD16,
124     ImmTyClampSI,
125     ImmTyOModSI,
126     ImmTyDPP8,
127     ImmTyDppCtrl,
128     ImmTyDppRowMask,
129     ImmTyDppBankMask,
130     ImmTyDppBoundCtrl,
131     ImmTyDppFi,
132     ImmTySdwaDstSel,
133     ImmTySdwaSrc0Sel,
134     ImmTySdwaSrc1Sel,
135     ImmTySdwaDstUnused,
136     ImmTyDMask,
137     ImmTyDim,
138     ImmTyUNorm,
139     ImmTyDA,
140     ImmTyR128A16,
141     ImmTyA16,
142     ImmTyLWE,
143     ImmTyExpTgt,
144     ImmTyExpCompr,
145     ImmTyExpVM,
146     ImmTyFORMAT,
147     ImmTyHwreg,
148     ImmTyOff,
149     ImmTySendMsg,
150     ImmTyInterpSlot,
151     ImmTyInterpAttr,
152     ImmTyAttrChan,
153     ImmTyOpSel,
154     ImmTyOpSelHi,
155     ImmTyNegLo,
156     ImmTyNegHi,
157     ImmTySwizzle,
158     ImmTyGprIdxMode,
159     ImmTyHigh,
160     ImmTyBLGP,
161     ImmTyCBSZ,
162     ImmTyABID,
163     ImmTyEndpgm,
164     ImmTyWaitVDST,
165   };
166 
167   enum ImmKindTy {
168     ImmKindTyNone,
169     ImmKindTyLiteral,
170     ImmKindTyConst,
171   };
172 
173 private:
174   struct TokOp {
175     const char *Data;
176     unsigned Length;
177   };
178 
179   struct ImmOp {
180     int64_t Val;
181     ImmTy Type;
182     bool IsFPImm;
183     mutable ImmKindTy Kind;
184     Modifiers Mods;
185   };
186 
187   struct RegOp {
188     unsigned RegNo;
189     Modifiers Mods;
190   };
191 
192   union {
193     TokOp Tok;
194     ImmOp Imm;
195     RegOp Reg;
196     const MCExpr *Expr;
197   };
198 
199 public:
200   bool isToken() const override {
201     if (Kind == Token)
202       return true;
203 
204     // When parsing operands, we can't always tell if something was meant to be
205     // a token, like 'gds', or an expression that references a global variable.
206     // In this case, we assume the string is an expression, and if we need to
207     // interpret is a token, then we treat the symbol name as the token.
208     return isSymbolRefExpr();
209   }
210 
211   bool isSymbolRefExpr() const {
212     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
213   }
214 
215   bool isImm() const override {
216     return Kind == Immediate;
217   }
218 
219   void setImmKindNone() const {
220     assert(isImm());
221     Imm.Kind = ImmKindTyNone;
222   }
223 
224   void setImmKindLiteral() const {
225     assert(isImm());
226     Imm.Kind = ImmKindTyLiteral;
227   }
228 
229   void setImmKindConst() const {
230     assert(isImm());
231     Imm.Kind = ImmKindTyConst;
232   }
233 
234   bool IsImmKindLiteral() const {
235     return isImm() && Imm.Kind == ImmKindTyLiteral;
236   }
237 
238   bool isImmKindConst() const {
239     return isImm() && Imm.Kind == ImmKindTyConst;
240   }
241 
242   bool isInlinableImm(MVT type) const;
243   bool isLiteralImm(MVT type) const;
244 
245   bool isRegKind() const {
246     return Kind == Register;
247   }
248 
249   bool isReg() const override {
250     return isRegKind() && !hasModifiers();
251   }
252 
253   bool isRegOrInline(unsigned RCID, MVT type) const {
254     return isRegClass(RCID) || isInlinableImm(type);
255   }
256 
257   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
258     return isRegOrInline(RCID, type) || isLiteralImm(type);
259   }
260 
261   bool isRegOrImmWithInt16InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
263   }
264 
265   bool isRegOrImmWithInt32InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
267   }
268 
269   bool isRegOrImmWithInt64InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
271   }
272 
273   bool isRegOrImmWithFP16InputMods() const {
274     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
275   }
276 
277   bool isRegOrImmWithFP32InputMods() const {
278     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
279   }
280 
281   bool isRegOrImmWithFP64InputMods() const {
282     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
283   }
284 
285   bool isVReg() const {
286     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
287            isRegClass(AMDGPU::VReg_64RegClassID) ||
288            isRegClass(AMDGPU::VReg_96RegClassID) ||
289            isRegClass(AMDGPU::VReg_128RegClassID) ||
290            isRegClass(AMDGPU::VReg_160RegClassID) ||
291            isRegClass(AMDGPU::VReg_192RegClassID) ||
292            isRegClass(AMDGPU::VReg_256RegClassID) ||
293            isRegClass(AMDGPU::VReg_512RegClassID) ||
294            isRegClass(AMDGPU::VReg_1024RegClassID);
295   }
296 
297   bool isVReg32() const {
298     return isRegClass(AMDGPU::VGPR_32RegClassID);
299   }
300 
301   bool isVReg32OrOff() const {
302     return isOff() || isVReg32();
303   }
304 
305   bool isNull() const {
306     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
307   }
308 
309   bool isVRegWithInputMods() const;
310 
311   bool isSDWAOperand(MVT type) const;
312   bool isSDWAFP16Operand() const;
313   bool isSDWAFP32Operand() const;
314   bool isSDWAInt16Operand() const;
315   bool isSDWAInt32Operand() const;
316 
317   bool isImmTy(ImmTy ImmT) const {
318     return isImm() && Imm.Type == ImmT;
319   }
320 
321   bool isImmModifier() const {
322     return isImm() && Imm.Type != ImmTyNone;
323   }
324 
325   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
326   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
327   bool isDMask() const { return isImmTy(ImmTyDMask); }
328   bool isDim() const { return isImmTy(ImmTyDim); }
329   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
330   bool isDA() const { return isImmTy(ImmTyDA); }
331   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
332   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
333   bool isLWE() const { return isImmTy(ImmTyLWE); }
334   bool isOff() const { return isImmTy(ImmTyOff); }
335   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
336   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
337   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
338   bool isOffen() const { return isImmTy(ImmTyOffen); }
339   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
340   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
341   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
342   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
343   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
344 
345   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
346   bool isGDS() const { return isImmTy(ImmTyGDS); }
347   bool isLDS() const { return isImmTy(ImmTyLDS); }
348   bool isCPol() const { return isImmTy(ImmTyCPol); }
349   bool isSWZ() const { return isImmTy(ImmTySWZ); }
350   bool isTFE() const { return isImmTy(ImmTyTFE); }
351   bool isD16() const { return isImmTy(ImmTyD16); }
352   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
353   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
354   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
355   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
356   bool isFI() const { return isImmTy(ImmTyDppFi); }
357   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
358   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
359   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
360   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
361   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
362   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
363   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
364   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
365   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
366   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
367   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
368   bool isHigh() const { return isImmTy(ImmTyHigh); }
369 
370   bool isMod() const {
371     return isClampSI() || isOModSI();
372   }
373 
374   bool isRegOrImm() const {
375     return isReg() || isImm();
376   }
377 
378   bool isRegClass(unsigned RCID) const;
379 
380   bool isInlineValue() const;
381 
382   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
383     return isRegOrInline(RCID, type) && !hasModifiers();
384   }
385 
386   bool isSCSrcB16() const {
387     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
388   }
389 
390   bool isSCSrcV2B16() const {
391     return isSCSrcB16();
392   }
393 
394   bool isSCSrcB32() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
396   }
397 
398   bool isSCSrcB64() const {
399     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
400   }
401 
402   bool isBoolReg() const;
403 
404   bool isSCSrcF16() const {
405     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
406   }
407 
408   bool isSCSrcV2F16() const {
409     return isSCSrcF16();
410   }
411 
412   bool isSCSrcF32() const {
413     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
414   }
415 
416   bool isSCSrcF64() const {
417     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
418   }
419 
420   bool isSSrcB32() const {
421     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
422   }
423 
424   bool isSSrcB16() const {
425     return isSCSrcB16() || isLiteralImm(MVT::i16);
426   }
427 
428   bool isSSrcV2B16() const {
429     llvm_unreachable("cannot happen");
430     return isSSrcB16();
431   }
432 
433   bool isSSrcB64() const {
434     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
435     // See isVSrc64().
436     return isSCSrcB64() || isLiteralImm(MVT::i64);
437   }
438 
439   bool isSSrcF32() const {
440     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
441   }
442 
443   bool isSSrcF64() const {
444     return isSCSrcB64() || isLiteralImm(MVT::f64);
445   }
446 
447   bool isSSrcF16() const {
448     return isSCSrcB16() || isLiteralImm(MVT::f16);
449   }
450 
451   bool isSSrcV2F16() const {
452     llvm_unreachable("cannot happen");
453     return isSSrcF16();
454   }
455 
456   bool isSSrcV2FP32() const {
457     llvm_unreachable("cannot happen");
458     return isSSrcF32();
459   }
460 
461   bool isSCSrcV2FP32() const {
462     llvm_unreachable("cannot happen");
463     return isSCSrcF32();
464   }
465 
466   bool isSSrcV2INT32() const {
467     llvm_unreachable("cannot happen");
468     return isSSrcB32();
469   }
470 
471   bool isSCSrcV2INT32() const {
472     llvm_unreachable("cannot happen");
473     return isSCSrcB32();
474   }
475 
476   bool isSSrcOrLdsB32() const {
477     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
478            isLiteralImm(MVT::i32) || isExpr();
479   }
480 
481   bool isVCSrcB32() const {
482     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
483   }
484 
485   bool isVCSrcB64() const {
486     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
487   }
488 
489   bool isVCSrcB16() const {
490     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
491   }
492 
493   bool isVCSrcV2B16() const {
494     return isVCSrcB16();
495   }
496 
497   bool isVCSrcF32() const {
498     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
499   }
500 
501   bool isVCSrcF64() const {
502     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
503   }
504 
505   bool isVCSrcF16() const {
506     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
507   }
508 
509   bool isVCSrcV2F16() const {
510     return isVCSrcF16();
511   }
512 
513   bool isVSrcB32() const {
514     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
515   }
516 
517   bool isVSrcB64() const {
518     return isVCSrcF64() || isLiteralImm(MVT::i64);
519   }
520 
521   bool isVSrcB16() const {
522     return isVCSrcB16() || isLiteralImm(MVT::i16);
523   }
524 
525   bool isVSrcV2B16() const {
526     return isVSrcB16() || isLiteralImm(MVT::v2i16);
527   }
528 
529   bool isVCSrcV2FP32() const {
530     return isVCSrcF64();
531   }
532 
533   bool isVSrcV2FP32() const {
534     return isVSrcF64() || isLiteralImm(MVT::v2f32);
535   }
536 
537   bool isVCSrcV2INT32() const {
538     return isVCSrcB64();
539   }
540 
541   bool isVSrcV2INT32() const {
542     return isVSrcB64() || isLiteralImm(MVT::v2i32);
543   }
544 
545   bool isVSrcF32() const {
546     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
547   }
548 
549   bool isVSrcF64() const {
550     return isVCSrcF64() || isLiteralImm(MVT::f64);
551   }
552 
553   bool isVSrcF16() const {
554     return isVCSrcF16() || isLiteralImm(MVT::f16);
555   }
556 
557   bool isVSrcV2F16() const {
558     return isVSrcF16() || isLiteralImm(MVT::v2f16);
559   }
560 
561   bool isVISrcB32() const {
562     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
563   }
564 
565   bool isVISrcB16() const {
566     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
567   }
568 
569   bool isVISrcV2B16() const {
570     return isVISrcB16();
571   }
572 
573   bool isVISrcF32() const {
574     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
575   }
576 
577   bool isVISrcF16() const {
578     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
579   }
580 
581   bool isVISrcV2F16() const {
582     return isVISrcF16() || isVISrcB32();
583   }
584 
585   bool isVISrc_64B64() const {
586     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
587   }
588 
589   bool isVISrc_64F64() const {
590     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
591   }
592 
593   bool isVISrc_64V2FP32() const {
594     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
595   }
596 
597   bool isVISrc_64V2INT32() const {
598     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
599   }
600 
601   bool isVISrc_256B64() const {
602     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
603   }
604 
605   bool isVISrc_256F64() const {
606     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
607   }
608 
609   bool isVISrc_128B16() const {
610     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
611   }
612 
613   bool isVISrc_128V2B16() const {
614     return isVISrc_128B16();
615   }
616 
617   bool isVISrc_128B32() const {
618     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
619   }
620 
621   bool isVISrc_128F32() const {
622     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
623   }
624 
625   bool isVISrc_256V2FP32() const {
626     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
627   }
628 
629   bool isVISrc_256V2INT32() const {
630     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
631   }
632 
633   bool isVISrc_512B32() const {
634     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
635   }
636 
637   bool isVISrc_512B16() const {
638     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
639   }
640 
641   bool isVISrc_512V2B16() const {
642     return isVISrc_512B16();
643   }
644 
645   bool isVISrc_512F32() const {
646     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
647   }
648 
649   bool isVISrc_512F16() const {
650     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
651   }
652 
653   bool isVISrc_512V2F16() const {
654     return isVISrc_512F16() || isVISrc_512B32();
655   }
656 
657   bool isVISrc_1024B32() const {
658     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
659   }
660 
661   bool isVISrc_1024B16() const {
662     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
663   }
664 
665   bool isVISrc_1024V2B16() const {
666     return isVISrc_1024B16();
667   }
668 
669   bool isVISrc_1024F32() const {
670     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
671   }
672 
673   bool isVISrc_1024F16() const {
674     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
675   }
676 
677   bool isVISrc_1024V2F16() const {
678     return isVISrc_1024F16() || isVISrc_1024B32();
679   }
680 
681   bool isAISrcB32() const {
682     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
683   }
684 
685   bool isAISrcB16() const {
686     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
687   }
688 
689   bool isAISrcV2B16() const {
690     return isAISrcB16();
691   }
692 
693   bool isAISrcF32() const {
694     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
695   }
696 
697   bool isAISrcF16() const {
698     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
699   }
700 
701   bool isAISrcV2F16() const {
702     return isAISrcF16() || isAISrcB32();
703   }
704 
705   bool isAISrc_64B64() const {
706     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
707   }
708 
709   bool isAISrc_64F64() const {
710     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
711   }
712 
713   bool isAISrc_128B32() const {
714     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
715   }
716 
717   bool isAISrc_128B16() const {
718     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
719   }
720 
721   bool isAISrc_128V2B16() const {
722     return isAISrc_128B16();
723   }
724 
725   bool isAISrc_128F32() const {
726     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
727   }
728 
729   bool isAISrc_128F16() const {
730     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
731   }
732 
733   bool isAISrc_128V2F16() const {
734     return isAISrc_128F16() || isAISrc_128B32();
735   }
736 
737   bool isVISrc_128F16() const {
738     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
739   }
740 
741   bool isVISrc_128V2F16() const {
742     return isVISrc_128F16() || isVISrc_128B32();
743   }
744 
745   bool isAISrc_256B64() const {
746     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
747   }
748 
749   bool isAISrc_256F64() const {
750     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
751   }
752 
753   bool isAISrc_512B32() const {
754     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
755   }
756 
757   bool isAISrc_512B16() const {
758     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
759   }
760 
761   bool isAISrc_512V2B16() const {
762     return isAISrc_512B16();
763   }
764 
765   bool isAISrc_512F32() const {
766     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
767   }
768 
769   bool isAISrc_512F16() const {
770     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
771   }
772 
773   bool isAISrc_512V2F16() const {
774     return isAISrc_512F16() || isAISrc_512B32();
775   }
776 
777   bool isAISrc_1024B32() const {
778     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
779   }
780 
781   bool isAISrc_1024B16() const {
782     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
783   }
784 
785   bool isAISrc_1024V2B16() const {
786     return isAISrc_1024B16();
787   }
788 
789   bool isAISrc_1024F32() const {
790     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
791   }
792 
793   bool isAISrc_1024F16() const {
794     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
795   }
796 
797   bool isAISrc_1024V2F16() const {
798     return isAISrc_1024F16() || isAISrc_1024B32();
799   }
800 
801   bool isKImmFP32() const {
802     return isLiteralImm(MVT::f32);
803   }
804 
805   bool isKImmFP16() const {
806     return isLiteralImm(MVT::f16);
807   }
808 
809   bool isMem() const override {
810     return false;
811   }
812 
813   bool isExpr() const {
814     return Kind == Expression;
815   }
816 
817   bool isSoppBrTarget() const {
818     return isExpr() || isImm();
819   }
820 
821   bool isSWaitCnt() const;
822   bool isDepCtr() const;
823   bool isSDelayAlu() const;
824   bool isHwreg() const;
825   bool isSendMsg() const;
826   bool isSwizzle() const;
827   bool isSMRDOffset8() const;
828   bool isSMEMOffset() const;
829   bool isSMRDLiteralOffset() const;
830   bool isDPP8() const;
831   bool isDPPCtrl() const;
832   bool isBLGP() const;
833   bool isCBSZ() const;
834   bool isABID() const;
835   bool isGPRIdxMode() const;
836   bool isS16Imm() const;
837   bool isU16Imm() const;
838   bool isEndpgm() const;
839   bool isWaitVDST() const;
840 
841   StringRef getExpressionAsToken() const {
842     assert(isExpr());
843     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
844     return S->getSymbol().getName();
845   }
846 
847   StringRef getToken() const {
848     assert(isToken());
849 
850     if (Kind == Expression)
851       return getExpressionAsToken();
852 
853     return StringRef(Tok.Data, Tok.Length);
854   }
855 
856   int64_t getImm() const {
857     assert(isImm());
858     return Imm.Val;
859   }
860 
861   void setImm(int64_t Val) {
862     assert(isImm());
863     Imm.Val = Val;
864   }
865 
866   ImmTy getImmTy() const {
867     assert(isImm());
868     return Imm.Type;
869   }
870 
871   unsigned getReg() const override {
872     assert(isRegKind());
873     return Reg.RegNo;
874   }
875 
876   SMLoc getStartLoc() const override {
877     return StartLoc;
878   }
879 
880   SMLoc getEndLoc() const override {
881     return EndLoc;
882   }
883 
884   SMRange getLocRange() const {
885     return SMRange(StartLoc, EndLoc);
886   }
887 
888   Modifiers getModifiers() const {
889     assert(isRegKind() || isImmTy(ImmTyNone));
890     return isRegKind() ? Reg.Mods : Imm.Mods;
891   }
892 
893   void setModifiers(Modifiers Mods) {
894     assert(isRegKind() || isImmTy(ImmTyNone));
895     if (isRegKind())
896       Reg.Mods = Mods;
897     else
898       Imm.Mods = Mods;
899   }
900 
901   bool hasModifiers() const {
902     return getModifiers().hasModifiers();
903   }
904 
905   bool hasFPModifiers() const {
906     return getModifiers().hasFPModifiers();
907   }
908 
909   bool hasIntModifiers() const {
910     return getModifiers().hasIntModifiers();
911   }
912 
913   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
914 
915   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
916 
917   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
918 
919   template <unsigned Bitwidth>
920   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
921 
922   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
923     addKImmFPOperands<16>(Inst, N);
924   }
925 
926   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
927     addKImmFPOperands<32>(Inst, N);
928   }
929 
930   void addRegOperands(MCInst &Inst, unsigned N) const;
931 
932   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
933     addRegOperands(Inst, N);
934   }
935 
936   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
937     if (isRegKind())
938       addRegOperands(Inst, N);
939     else if (isExpr())
940       Inst.addOperand(MCOperand::createExpr(Expr));
941     else
942       addImmOperands(Inst, N);
943   }
944 
945   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
946     Modifiers Mods = getModifiers();
947     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
948     if (isRegKind()) {
949       addRegOperands(Inst, N);
950     } else {
951       addImmOperands(Inst, N, false);
952     }
953   }
954 
955   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
956     assert(!hasIntModifiers());
957     addRegOrImmWithInputModsOperands(Inst, N);
958   }
959 
960   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
961     assert(!hasFPModifiers());
962     addRegOrImmWithInputModsOperands(Inst, N);
963   }
964 
965   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
966     Modifiers Mods = getModifiers();
967     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
968     assert(isRegKind());
969     addRegOperands(Inst, N);
970   }
971 
972   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
973     assert(!hasIntModifiers());
974     addRegWithInputModsOperands(Inst, N);
975   }
976 
977   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
978     assert(!hasFPModifiers());
979     addRegWithInputModsOperands(Inst, N);
980   }
981 
982   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
983     if (isImm())
984       addImmOperands(Inst, N);
985     else {
986       assert(isExpr());
987       Inst.addOperand(MCOperand::createExpr(Expr));
988     }
989   }
990 
991   static void printImmTy(raw_ostream& OS, ImmTy Type) {
992     switch (Type) {
993     case ImmTyNone: OS << "None"; break;
994     case ImmTyGDS: OS << "GDS"; break;
995     case ImmTyLDS: OS << "LDS"; break;
996     case ImmTyOffen: OS << "Offen"; break;
997     case ImmTyIdxen: OS << "Idxen"; break;
998     case ImmTyAddr64: OS << "Addr64"; break;
999     case ImmTyOffset: OS << "Offset"; break;
1000     case ImmTyInstOffset: OS << "InstOffset"; break;
1001     case ImmTyOffset0: OS << "Offset0"; break;
1002     case ImmTyOffset1: OS << "Offset1"; break;
1003     case ImmTyCPol: OS << "CPol"; break;
1004     case ImmTySWZ: OS << "SWZ"; break;
1005     case ImmTyTFE: OS << "TFE"; break;
1006     case ImmTyD16: OS << "D16"; break;
1007     case ImmTyFORMAT: OS << "FORMAT"; break;
1008     case ImmTyClampSI: OS << "ClampSI"; break;
1009     case ImmTyOModSI: OS << "OModSI"; break;
1010     case ImmTyDPP8: OS << "DPP8"; break;
1011     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1012     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1013     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1014     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1015     case ImmTyDppFi: OS << "FI"; break;
1016     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1017     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1018     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1019     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1020     case ImmTyDMask: OS << "DMask"; break;
1021     case ImmTyDim: OS << "Dim"; break;
1022     case ImmTyUNorm: OS << "UNorm"; break;
1023     case ImmTyDA: OS << "DA"; break;
1024     case ImmTyR128A16: OS << "R128A16"; break;
1025     case ImmTyA16: OS << "A16"; break;
1026     case ImmTyLWE: OS << "LWE"; break;
1027     case ImmTyOff: OS << "Off"; break;
1028     case ImmTyExpTgt: OS << "ExpTgt"; break;
1029     case ImmTyExpCompr: OS << "ExpCompr"; break;
1030     case ImmTyExpVM: OS << "ExpVM"; break;
1031     case ImmTyHwreg: OS << "Hwreg"; break;
1032     case ImmTySendMsg: OS << "SendMsg"; break;
1033     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1034     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1035     case ImmTyAttrChan: OS << "AttrChan"; break;
1036     case ImmTyOpSel: OS << "OpSel"; break;
1037     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1038     case ImmTyNegLo: OS << "NegLo"; break;
1039     case ImmTyNegHi: OS << "NegHi"; break;
1040     case ImmTySwizzle: OS << "Swizzle"; break;
1041     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1042     case ImmTyHigh: OS << "High"; break;
1043     case ImmTyBLGP: OS << "BLGP"; break;
1044     case ImmTyCBSZ: OS << "CBSZ"; break;
1045     case ImmTyABID: OS << "ABID"; break;
1046     case ImmTyEndpgm: OS << "Endpgm"; break;
1047     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1048     }
1049   }
1050 
1051   void print(raw_ostream &OS) const override {
1052     switch (Kind) {
1053     case Register:
1054       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1055       break;
1056     case Immediate:
1057       OS << '<' << getImm();
1058       if (getImmTy() != ImmTyNone) {
1059         OS << " type: "; printImmTy(OS, getImmTy());
1060       }
1061       OS << " mods: " << Imm.Mods << '>';
1062       break;
1063     case Token:
1064       OS << '\'' << getToken() << '\'';
1065       break;
1066     case Expression:
1067       OS << "<expr " << *Expr << '>';
1068       break;
1069     }
1070   }
1071 
1072   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1073                                       int64_t Val, SMLoc Loc,
1074                                       ImmTy Type = ImmTyNone,
1075                                       bool IsFPImm = false) {
1076     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1077     Op->Imm.Val = Val;
1078     Op->Imm.IsFPImm = IsFPImm;
1079     Op->Imm.Kind = ImmKindTyNone;
1080     Op->Imm.Type = Type;
1081     Op->Imm.Mods = Modifiers();
1082     Op->StartLoc = Loc;
1083     Op->EndLoc = Loc;
1084     return Op;
1085   }
1086 
1087   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1088                                         StringRef Str, SMLoc Loc,
1089                                         bool HasExplicitEncodingSize = true) {
1090     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1091     Res->Tok.Data = Str.data();
1092     Res->Tok.Length = Str.size();
1093     Res->StartLoc = Loc;
1094     Res->EndLoc = Loc;
1095     return Res;
1096   }
1097 
1098   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1099                                       unsigned RegNo, SMLoc S,
1100                                       SMLoc E) {
1101     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1102     Op->Reg.RegNo = RegNo;
1103     Op->Reg.Mods = Modifiers();
1104     Op->StartLoc = S;
1105     Op->EndLoc = E;
1106     return Op;
1107   }
1108 
1109   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1110                                        const class MCExpr *Expr, SMLoc S) {
1111     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1112     Op->Expr = Expr;
1113     Op->StartLoc = S;
1114     Op->EndLoc = S;
1115     return Op;
1116   }
1117 };
1118 
1119 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1120   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1121   return OS;
1122 }
1123 
1124 //===----------------------------------------------------------------------===//
1125 // AsmParser
1126 //===----------------------------------------------------------------------===//
1127 
1128 // Holds info related to the current kernel, e.g. count of SGPRs used.
1129 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1130 // .amdgpu_hsa_kernel or at EOF.
1131 class KernelScopeInfo {
1132   int SgprIndexUnusedMin = -1;
1133   int VgprIndexUnusedMin = -1;
1134   int AgprIndexUnusedMin = -1;
1135   MCContext *Ctx = nullptr;
1136   MCSubtargetInfo const *MSTI = nullptr;
1137 
1138   void usesSgprAt(int i) {
1139     if (i >= SgprIndexUnusedMin) {
1140       SgprIndexUnusedMin = ++i;
1141       if (Ctx) {
1142         MCSymbol* const Sym =
1143           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1144         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1145       }
1146     }
1147   }
1148 
1149   void usesVgprAt(int i) {
1150     if (i >= VgprIndexUnusedMin) {
1151       VgprIndexUnusedMin = ++i;
1152       if (Ctx) {
1153         MCSymbol* const Sym =
1154           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1155         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1156                                          VgprIndexUnusedMin);
1157         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1158       }
1159     }
1160   }
1161 
1162   void usesAgprAt(int i) {
1163     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1164     if (!hasMAIInsts(*MSTI))
1165       return;
1166 
1167     if (i >= AgprIndexUnusedMin) {
1168       AgprIndexUnusedMin = ++i;
1169       if (Ctx) {
1170         MCSymbol* const Sym =
1171           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1172         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1173 
1174         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1175         MCSymbol* const vSym =
1176           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1177         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1178                                          VgprIndexUnusedMin);
1179         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1180       }
1181     }
1182   }
1183 
1184 public:
1185   KernelScopeInfo() = default;
1186 
1187   void initialize(MCContext &Context) {
1188     Ctx = &Context;
1189     MSTI = Ctx->getSubtargetInfo();
1190 
1191     usesSgprAt(SgprIndexUnusedMin = -1);
1192     usesVgprAt(VgprIndexUnusedMin = -1);
1193     if (hasMAIInsts(*MSTI)) {
1194       usesAgprAt(AgprIndexUnusedMin = -1);
1195     }
1196   }
1197 
1198   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1199                     unsigned RegWidth) {
1200     switch (RegKind) {
1201     case IS_SGPR:
1202       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1203       break;
1204     case IS_AGPR:
1205       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1206       break;
1207     case IS_VGPR:
1208       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1209       break;
1210     default:
1211       break;
1212     }
1213   }
1214 };
1215 
1216 class AMDGPUAsmParser : public MCTargetAsmParser {
1217   MCAsmParser &Parser;
1218 
1219   // Number of extra operands parsed after the first optional operand.
1220   // This may be necessary to skip hardcoded mandatory operands.
1221   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1222 
1223   unsigned ForcedEncodingSize = 0;
1224   bool ForcedDPP = false;
1225   bool ForcedSDWA = false;
1226   KernelScopeInfo KernelScope;
1227   unsigned CPolSeen;
1228 
1229   /// @name Auto-generated Match Functions
1230   /// {
1231 
1232 #define GET_ASSEMBLER_HEADER
1233 #include "AMDGPUGenAsmMatcher.inc"
1234 
1235   /// }
1236 
1237 private:
1238   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1239   bool OutOfRangeError(SMRange Range);
1240   /// Calculate VGPR/SGPR blocks required for given target, reserved
1241   /// registers, and user-specified NextFreeXGPR values.
1242   ///
1243   /// \param Features [in] Target features, used for bug corrections.
1244   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1245   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1246   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1247   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1248   /// descriptor field, if valid.
1249   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1250   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1251   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1252   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1253   /// \param VGPRBlocks [out] Result VGPR block count.
1254   /// \param SGPRBlocks [out] Result SGPR block count.
1255   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1256                           bool FlatScrUsed, bool XNACKUsed,
1257                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1258                           SMRange VGPRRange, unsigned NextFreeSGPR,
1259                           SMRange SGPRRange, unsigned &VGPRBlocks,
1260                           unsigned &SGPRBlocks);
1261   bool ParseDirectiveAMDGCNTarget();
1262   bool ParseDirectiveAMDHSAKernel();
1263   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1264   bool ParseDirectiveHSACodeObjectVersion();
1265   bool ParseDirectiveHSACodeObjectISA();
1266   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1267   bool ParseDirectiveAMDKernelCodeT();
1268   // TODO: Possibly make subtargetHasRegister const.
1269   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1270   bool ParseDirectiveAMDGPUHsaKernel();
1271 
1272   bool ParseDirectiveISAVersion();
1273   bool ParseDirectiveHSAMetadata();
1274   bool ParseDirectivePALMetadataBegin();
1275   bool ParseDirectivePALMetadata();
1276   bool ParseDirectiveAMDGPULDS();
1277 
1278   /// Common code to parse out a block of text (typically YAML) between start and
1279   /// end directives.
1280   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1281                            const char *AssemblerDirectiveEnd,
1282                            std::string &CollectString);
1283 
1284   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1285                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1286   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1287                            unsigned &RegNum, unsigned &RegWidth,
1288                            bool RestoreOnFailure = false);
1289   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1290                            unsigned &RegNum, unsigned &RegWidth,
1291                            SmallVectorImpl<AsmToken> &Tokens);
1292   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1293                            unsigned &RegWidth,
1294                            SmallVectorImpl<AsmToken> &Tokens);
1295   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1296                            unsigned &RegWidth,
1297                            SmallVectorImpl<AsmToken> &Tokens);
1298   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1299                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1300   bool ParseRegRange(unsigned& Num, unsigned& Width);
1301   unsigned getRegularReg(RegisterKind RegKind,
1302                          unsigned RegNum,
1303                          unsigned RegWidth,
1304                          SMLoc Loc);
1305 
1306   bool isRegister();
1307   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1308   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1309   void initializeGprCountSymbol(RegisterKind RegKind);
1310   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1311                              unsigned RegWidth);
1312   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1313                     bool IsAtomic, bool IsLds = false);
1314   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1315                  bool IsGdsHardcoded);
1316 
1317 public:
1318   enum AMDGPUMatchResultTy {
1319     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1320   };
1321   enum OperandMode {
1322     OperandMode_Default,
1323     OperandMode_NSA,
1324   };
1325 
1326   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1327 
1328   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1329                const MCInstrInfo &MII,
1330                const MCTargetOptions &Options)
1331       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1332     MCAsmParserExtension::Initialize(Parser);
1333 
1334     if (getFeatureBits().none()) {
1335       // Set default features.
1336       copySTI().ToggleFeature("southern-islands");
1337     }
1338 
1339     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1340 
1341     {
1342       // TODO: make those pre-defined variables read-only.
1343       // Currently there is none suitable machinery in the core llvm-mc for this.
1344       // MCSymbol::isRedefinable is intended for another purpose, and
1345       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1346       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1347       MCContext &Ctx = getContext();
1348       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1349         MCSymbol *Sym =
1350             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1351         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1352         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1353         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1354         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1355         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1356       } else {
1357         MCSymbol *Sym =
1358             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1359         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1360         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1361         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1362         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1363         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1364       }
1365       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1366         initializeGprCountSymbol(IS_VGPR);
1367         initializeGprCountSymbol(IS_SGPR);
1368       } else
1369         KernelScope.initialize(getContext());
1370     }
1371   }
1372 
1373   bool hasMIMG_R128() const {
1374     return AMDGPU::hasMIMG_R128(getSTI());
1375   }
1376 
1377   bool hasPackedD16() const {
1378     return AMDGPU::hasPackedD16(getSTI());
1379   }
1380 
1381   bool hasGFX10A16() const {
1382     return AMDGPU::hasGFX10A16(getSTI());
1383   }
1384 
1385   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1386 
1387   bool isSI() const {
1388     return AMDGPU::isSI(getSTI());
1389   }
1390 
1391   bool isCI() const {
1392     return AMDGPU::isCI(getSTI());
1393   }
1394 
1395   bool isVI() const {
1396     return AMDGPU::isVI(getSTI());
1397   }
1398 
1399   bool isGFX9() const {
1400     return AMDGPU::isGFX9(getSTI());
1401   }
1402 
1403   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1404   bool isGFX90A() const {
1405     return AMDGPU::isGFX90A(getSTI());
1406   }
1407 
1408   bool isGFX940() const {
1409     return AMDGPU::isGFX940(getSTI());
1410   }
1411 
1412   bool isGFX9Plus() const {
1413     return AMDGPU::isGFX9Plus(getSTI());
1414   }
1415 
1416   bool isGFX10() const {
1417     return AMDGPU::isGFX10(getSTI());
1418   }
1419 
1420   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1421 
1422   bool isGFX11() const {
1423     return AMDGPU::isGFX11(getSTI());
1424   }
1425 
1426   bool isGFX11Plus() const {
1427     return AMDGPU::isGFX11Plus(getSTI());
1428   }
1429 
1430   bool isGFX10_BEncoding() const {
1431     return AMDGPU::isGFX10_BEncoding(getSTI());
1432   }
1433 
1434   bool hasInv2PiInlineImm() const {
1435     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1436   }
1437 
1438   bool hasFlatOffsets() const {
1439     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1440   }
1441 
1442   bool hasArchitectedFlatScratch() const {
1443     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1444   }
1445 
1446   bool hasSGPR102_SGPR103() const {
1447     return !isVI() && !isGFX9();
1448   }
1449 
1450   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1451 
1452   bool hasIntClamp() const {
1453     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1454   }
1455 
1456   AMDGPUTargetStreamer &getTargetStreamer() {
1457     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1458     return static_cast<AMDGPUTargetStreamer &>(TS);
1459   }
1460 
1461   const MCRegisterInfo *getMRI() const {
1462     // We need this const_cast because for some reason getContext() is not const
1463     // in MCAsmParser.
1464     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1465   }
1466 
1467   const MCInstrInfo *getMII() const {
1468     return &MII;
1469   }
1470 
1471   const FeatureBitset &getFeatureBits() const {
1472     return getSTI().getFeatureBits();
1473   }
1474 
1475   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1476   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1477   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1478 
1479   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1480   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1481   bool isForcedDPP() const { return ForcedDPP; }
1482   bool isForcedSDWA() const { return ForcedSDWA; }
1483   ArrayRef<unsigned> getMatchedVariants() const;
1484   StringRef getMatchedVariantName() const;
1485 
1486   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1487   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1488                      bool RestoreOnFailure);
1489   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1490   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1491                                         SMLoc &EndLoc) override;
1492   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1493   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1494                                       unsigned Kind) override;
1495   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1496                                OperandVector &Operands, MCStreamer &Out,
1497                                uint64_t &ErrorInfo,
1498                                bool MatchingInlineAsm) override;
1499   bool ParseDirective(AsmToken DirectiveID) override;
1500   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1501                                     OperandMode Mode = OperandMode_Default);
1502   StringRef parseMnemonicSuffix(StringRef Name);
1503   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1504                         SMLoc NameLoc, OperandVector &Operands) override;
1505   //bool ProcessInstruction(MCInst &Inst);
1506 
1507   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1508 
1509   OperandMatchResultTy
1510   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1511                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1512                      bool (*ConvertResult)(int64_t &) = nullptr);
1513 
1514   OperandMatchResultTy
1515   parseOperandArrayWithPrefix(const char *Prefix,
1516                               OperandVector &Operands,
1517                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1518                               bool (*ConvertResult)(int64_t&) = nullptr);
1519 
1520   OperandMatchResultTy
1521   parseNamedBit(StringRef Name, OperandVector &Operands,
1522                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1523   OperandMatchResultTy parseCPol(OperandVector &Operands);
1524   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1525                                              StringRef &Value,
1526                                              SMLoc &StringLoc);
1527 
1528   bool isModifier();
1529   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1530   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1531   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1532   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1533   bool parseSP3NegModifier();
1534   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1535   OperandMatchResultTy parseReg(OperandVector &Operands);
1536   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1537   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1538   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1539   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1540   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1541   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1542   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1543   OperandMatchResultTy parseUfmt(int64_t &Format);
1544   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1545   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1546   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1547   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1548   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1549   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1550   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1551 
1552   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1553   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1554   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1555   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1556 
1557   bool parseCnt(int64_t &IntVal);
1558   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1559 
1560   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1561   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1562   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1563 
1564   bool parseDelay(int64_t &Delay);
1565   OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1566 
1567   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1568 
1569 private:
1570   struct OperandInfoTy {
1571     SMLoc Loc;
1572     int64_t Id;
1573     bool IsSymbolic = false;
1574     bool IsDefined = false;
1575 
1576     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1577   };
1578 
1579   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1580   bool validateSendMsg(const OperandInfoTy &Msg,
1581                        const OperandInfoTy &Op,
1582                        const OperandInfoTy &Stream);
1583 
1584   bool parseHwregBody(OperandInfoTy &HwReg,
1585                       OperandInfoTy &Offset,
1586                       OperandInfoTy &Width);
1587   bool validateHwreg(const OperandInfoTy &HwReg,
1588                      const OperandInfoTy &Offset,
1589                      const OperandInfoTy &Width);
1590 
1591   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1592   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1593   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1594 
1595   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1596                       const OperandVector &Operands) const;
1597   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1598   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1599   SMLoc getLitLoc(const OperandVector &Operands) const;
1600   SMLoc getConstLoc(const OperandVector &Operands) const;
1601 
1602   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1603   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1604   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1605   bool validateSOPLiteral(const MCInst &Inst) const;
1606   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1607   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1608   bool validateIntClampSupported(const MCInst &Inst);
1609   bool validateMIMGAtomicDMask(const MCInst &Inst);
1610   bool validateMIMGGatherDMask(const MCInst &Inst);
1611   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1612   Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1613   bool validateMIMGAddrSize(const MCInst &Inst);
1614   bool validateMIMGD16(const MCInst &Inst);
1615   bool validateMIMGDim(const MCInst &Inst);
1616   bool validateMIMGMSAA(const MCInst &Inst);
1617   bool validateOpSel(const MCInst &Inst);
1618   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1619   bool validateVccOperand(unsigned Reg) const;
1620   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1621   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1622   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1623   bool validateAGPRLdSt(const MCInst &Inst) const;
1624   bool validateVGPRAlign(const MCInst &Inst) const;
1625   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1626   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1627   bool validateDivScale(const MCInst &Inst);
1628   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1629                              const SMLoc &IDLoc);
1630   bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands,
1631                           const SMLoc &IDLoc);
1632   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1633   unsigned getConstantBusLimit(unsigned Opcode) const;
1634   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1635   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1636   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1637 
1638   bool isSupportedMnemo(StringRef Mnemo,
1639                         const FeatureBitset &FBS);
1640   bool isSupportedMnemo(StringRef Mnemo,
1641                         const FeatureBitset &FBS,
1642                         ArrayRef<unsigned> Variants);
1643   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1644 
1645   bool isId(const StringRef Id) const;
1646   bool isId(const AsmToken &Token, const StringRef Id) const;
1647   bool isToken(const AsmToken::TokenKind Kind) const;
1648   bool trySkipId(const StringRef Id);
1649   bool trySkipId(const StringRef Pref, const StringRef Id);
1650   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1651   bool trySkipToken(const AsmToken::TokenKind Kind);
1652   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1653   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1654   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1655 
1656   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1657   AsmToken::TokenKind getTokenKind() const;
1658   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1659   bool parseExpr(OperandVector &Operands);
1660   StringRef getTokenStr() const;
1661   AsmToken peekToken();
1662   AsmToken getToken() const;
1663   SMLoc getLoc() const;
1664   void lex();
1665 
1666 public:
1667   void onBeginOfFile() override;
1668 
1669   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1670   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1671 
1672   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1673   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1674   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1675   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1676   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1677   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1678 
1679   bool parseSwizzleOperand(int64_t &Op,
1680                            const unsigned MinVal,
1681                            const unsigned MaxVal,
1682                            const StringRef ErrMsg,
1683                            SMLoc &Loc);
1684   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1685                             const unsigned MinVal,
1686                             const unsigned MaxVal,
1687                             const StringRef ErrMsg);
1688   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1689   bool parseSwizzleOffset(int64_t &Imm);
1690   bool parseSwizzleMacro(int64_t &Imm);
1691   bool parseSwizzleQuadPerm(int64_t &Imm);
1692   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1693   bool parseSwizzleBroadcast(int64_t &Imm);
1694   bool parseSwizzleSwap(int64_t &Imm);
1695   bool parseSwizzleReverse(int64_t &Imm);
1696 
1697   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1698   int64_t parseGPRIdxMacro();
1699 
1700   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1701   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1702   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1703   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1704 
1705   AMDGPUOperand::Ptr defaultCPol() const;
1706 
1707   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1708   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1709   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1710   AMDGPUOperand::Ptr defaultFlatOffset() const;
1711 
1712   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1713 
1714   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1715                OptionalImmIndexMap &OptionalIdx);
1716   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1717   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1718   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1719   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1720                 OptionalImmIndexMap &OptionalIdx);
1721 
1722   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1723 
1724   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1725                bool IsAtomic = false);
1726   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1727   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1728 
1729   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1730 
1731   bool parseDimId(unsigned &Encoding);
1732   OperandMatchResultTy parseDim(OperandVector &Operands);
1733   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1734   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1735   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1736   int64_t parseDPPCtrlSel(StringRef Ctrl);
1737   int64_t parseDPPCtrlPerm();
1738   AMDGPUOperand::Ptr defaultRowMask() const;
1739   AMDGPUOperand::Ptr defaultBankMask() const;
1740   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1741   AMDGPUOperand::Ptr defaultFI() const;
1742   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1743   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1744 
1745   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1746                                     AMDGPUOperand::ImmTy Type);
1747   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1748   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1749   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1750   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1751   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1752   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1753   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1754                uint64_t BasicInstType,
1755                bool SkipDstVcc = false,
1756                bool SkipSrcVcc = false);
1757 
1758   AMDGPUOperand::Ptr defaultBLGP() const;
1759   AMDGPUOperand::Ptr defaultCBSZ() const;
1760   AMDGPUOperand::Ptr defaultABID() const;
1761 
1762   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1763   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1764 
1765   OperandMatchResultTy parseWaitVDST(OperandVector &Operands);
1766   AMDGPUOperand::Ptr defaultWaitVDST() const;
1767 };
1768 
1769 struct OptionalOperand {
1770   const char *Name;
1771   AMDGPUOperand::ImmTy Type;
1772   bool IsBit;
1773   bool (*ConvertResult)(int64_t&);
1774 };
1775 
1776 } // end anonymous namespace
1777 
1778 // May be called with integer type with equivalent bitwidth.
1779 static const fltSemantics *getFltSemantics(unsigned Size) {
1780   switch (Size) {
1781   case 4:
1782     return &APFloat::IEEEsingle();
1783   case 8:
1784     return &APFloat::IEEEdouble();
1785   case 2:
1786     return &APFloat::IEEEhalf();
1787   default:
1788     llvm_unreachable("unsupported fp type");
1789   }
1790 }
1791 
1792 static const fltSemantics *getFltSemantics(MVT VT) {
1793   return getFltSemantics(VT.getSizeInBits() / 8);
1794 }
1795 
1796 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1797   switch (OperandType) {
1798   case AMDGPU::OPERAND_REG_IMM_INT32:
1799   case AMDGPU::OPERAND_REG_IMM_FP32:
1800   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1801   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1802   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1803   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1804   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1805   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1806   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1807   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1808   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1809   case AMDGPU::OPERAND_KIMM32:
1810     return &APFloat::IEEEsingle();
1811   case AMDGPU::OPERAND_REG_IMM_INT64:
1812   case AMDGPU::OPERAND_REG_IMM_FP64:
1813   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1814   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1815   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1816     return &APFloat::IEEEdouble();
1817   case AMDGPU::OPERAND_REG_IMM_INT16:
1818   case AMDGPU::OPERAND_REG_IMM_FP16:
1819   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1820   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1821   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1822   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1823   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1824   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1825   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1826   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1827   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1828   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1829   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1830   case AMDGPU::OPERAND_KIMM16:
1831     return &APFloat::IEEEhalf();
1832   default:
1833     llvm_unreachable("unsupported fp type");
1834   }
1835 }
1836 
1837 //===----------------------------------------------------------------------===//
1838 // Operand
1839 //===----------------------------------------------------------------------===//
1840 
1841 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1842   bool Lost;
1843 
1844   // Convert literal to single precision
1845   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1846                                                APFloat::rmNearestTiesToEven,
1847                                                &Lost);
1848   // We allow precision lost but not overflow or underflow
1849   if (Status != APFloat::opOK &&
1850       Lost &&
1851       ((Status & APFloat::opOverflow)  != 0 ||
1852        (Status & APFloat::opUnderflow) != 0)) {
1853     return false;
1854   }
1855 
1856   return true;
1857 }
1858 
1859 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1860   return isUIntN(Size, Val) || isIntN(Size, Val);
1861 }
1862 
1863 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1864   if (VT.getScalarType() == MVT::i16) {
1865     // FP immediate values are broken.
1866     return isInlinableIntLiteral(Val);
1867   }
1868 
1869   // f16/v2f16 operands work correctly for all values.
1870   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1871 }
1872 
1873 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1874 
1875   // This is a hack to enable named inline values like
1876   // shared_base with both 32-bit and 64-bit operands.
1877   // Note that these values are defined as
1878   // 32-bit operands only.
1879   if (isInlineValue()) {
1880     return true;
1881   }
1882 
1883   if (!isImmTy(ImmTyNone)) {
1884     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1885     return false;
1886   }
1887   // TODO: We should avoid using host float here. It would be better to
1888   // check the float bit values which is what a few other places do.
1889   // We've had bot failures before due to weird NaN support on mips hosts.
1890 
1891   APInt Literal(64, Imm.Val);
1892 
1893   if (Imm.IsFPImm) { // We got fp literal token
1894     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1895       return AMDGPU::isInlinableLiteral64(Imm.Val,
1896                                           AsmParser->hasInv2PiInlineImm());
1897     }
1898 
1899     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1900     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1901       return false;
1902 
1903     if (type.getScalarSizeInBits() == 16) {
1904       return isInlineableLiteralOp16(
1905         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1906         type, AsmParser->hasInv2PiInlineImm());
1907     }
1908 
1909     // Check if single precision literal is inlinable
1910     return AMDGPU::isInlinableLiteral32(
1911       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1912       AsmParser->hasInv2PiInlineImm());
1913   }
1914 
1915   // We got int literal token.
1916   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1917     return AMDGPU::isInlinableLiteral64(Imm.Val,
1918                                         AsmParser->hasInv2PiInlineImm());
1919   }
1920 
1921   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1922     return false;
1923   }
1924 
1925   if (type.getScalarSizeInBits() == 16) {
1926     return isInlineableLiteralOp16(
1927       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1928       type, AsmParser->hasInv2PiInlineImm());
1929   }
1930 
1931   return AMDGPU::isInlinableLiteral32(
1932     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1933     AsmParser->hasInv2PiInlineImm());
1934 }
1935 
1936 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1937   // Check that this immediate can be added as literal
1938   if (!isImmTy(ImmTyNone)) {
1939     return false;
1940   }
1941 
1942   if (!Imm.IsFPImm) {
1943     // We got int literal token.
1944 
1945     if (type == MVT::f64 && hasFPModifiers()) {
1946       // Cannot apply fp modifiers to int literals preserving the same semantics
1947       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1948       // disable these cases.
1949       return false;
1950     }
1951 
1952     unsigned Size = type.getSizeInBits();
1953     if (Size == 64)
1954       Size = 32;
1955 
1956     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1957     // types.
1958     return isSafeTruncation(Imm.Val, Size);
1959   }
1960 
1961   // We got fp literal token
1962   if (type == MVT::f64) { // Expected 64-bit fp operand
1963     // We would set low 64-bits of literal to zeroes but we accept this literals
1964     return true;
1965   }
1966 
1967   if (type == MVT::i64) { // Expected 64-bit int operand
1968     // We don't allow fp literals in 64-bit integer instructions. It is
1969     // unclear how we should encode them.
1970     return false;
1971   }
1972 
1973   // We allow fp literals with f16x2 operands assuming that the specified
1974   // literal goes into the lower half and the upper half is zero. We also
1975   // require that the literal may be losslessly converted to f16.
1976   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1977                      (type == MVT::v2i16)? MVT::i16 :
1978                      (type == MVT::v2f32)? MVT::f32 : type;
1979 
1980   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1981   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1982 }
1983 
1984 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1985   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1986 }
1987 
1988 bool AMDGPUOperand::isVRegWithInputMods() const {
1989   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1990          // GFX90A allows DPP on 64-bit operands.
1991          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1992           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1993 }
1994 
1995 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1996   if (AsmParser->isVI())
1997     return isVReg32();
1998   else if (AsmParser->isGFX9Plus())
1999     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2000   else
2001     return false;
2002 }
2003 
2004 bool AMDGPUOperand::isSDWAFP16Operand() const {
2005   return isSDWAOperand(MVT::f16);
2006 }
2007 
2008 bool AMDGPUOperand::isSDWAFP32Operand() const {
2009   return isSDWAOperand(MVT::f32);
2010 }
2011 
2012 bool AMDGPUOperand::isSDWAInt16Operand() const {
2013   return isSDWAOperand(MVT::i16);
2014 }
2015 
2016 bool AMDGPUOperand::isSDWAInt32Operand() const {
2017   return isSDWAOperand(MVT::i32);
2018 }
2019 
2020 bool AMDGPUOperand::isBoolReg() const {
2021   auto FB = AsmParser->getFeatureBits();
2022   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2023                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2024 }
2025 
2026 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2027 {
2028   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2029   assert(Size == 2 || Size == 4 || Size == 8);
2030 
2031   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2032 
2033   if (Imm.Mods.Abs) {
2034     Val &= ~FpSignMask;
2035   }
2036   if (Imm.Mods.Neg) {
2037     Val ^= FpSignMask;
2038   }
2039 
2040   return Val;
2041 }
2042 
2043 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2044   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2045                              Inst.getNumOperands())) {
2046     addLiteralImmOperand(Inst, Imm.Val,
2047                          ApplyModifiers &
2048                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2049   } else {
2050     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2051     Inst.addOperand(MCOperand::createImm(Imm.Val));
2052     setImmKindNone();
2053   }
2054 }
2055 
2056 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2057   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2058   auto OpNum = Inst.getNumOperands();
2059   // Check that this operand accepts literals
2060   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2061 
2062   if (ApplyModifiers) {
2063     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2064     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2065     Val = applyInputFPModifiers(Val, Size);
2066   }
2067 
2068   APInt Literal(64, Val);
2069   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2070 
2071   if (Imm.IsFPImm) { // We got fp literal token
2072     switch (OpTy) {
2073     case AMDGPU::OPERAND_REG_IMM_INT64:
2074     case AMDGPU::OPERAND_REG_IMM_FP64:
2075     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2076     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2077     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2078       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2079                                        AsmParser->hasInv2PiInlineImm())) {
2080         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2081         setImmKindConst();
2082         return;
2083       }
2084 
2085       // Non-inlineable
2086       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2087         // For fp operands we check if low 32 bits are zeros
2088         if (Literal.getLoBits(32) != 0) {
2089           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2090           "Can't encode literal as exact 64-bit floating-point operand. "
2091           "Low 32-bits will be set to zero");
2092         }
2093 
2094         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2095         setImmKindLiteral();
2096         return;
2097       }
2098 
2099       // We don't allow fp literals in 64-bit integer instructions. It is
2100       // unclear how we should encode them. This case should be checked earlier
2101       // in predicate methods (isLiteralImm())
2102       llvm_unreachable("fp literal in 64-bit integer instruction.");
2103 
2104     case AMDGPU::OPERAND_REG_IMM_INT32:
2105     case AMDGPU::OPERAND_REG_IMM_FP32:
2106     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2107     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2108     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2109     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2110     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2111     case AMDGPU::OPERAND_REG_IMM_INT16:
2112     case AMDGPU::OPERAND_REG_IMM_FP16:
2113     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2114     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2115     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2116     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2117     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2118     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2119     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2120     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2121     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2122     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2123     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2124     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2125     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2126     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2127     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2128     case AMDGPU::OPERAND_KIMM32:
2129     case AMDGPU::OPERAND_KIMM16: {
2130       bool lost;
2131       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2132       // Convert literal to single precision
2133       FPLiteral.convert(*getOpFltSemantics(OpTy),
2134                         APFloat::rmNearestTiesToEven, &lost);
2135       // We allow precision lost but not overflow or underflow. This should be
2136       // checked earlier in isLiteralImm()
2137 
2138       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2139       Inst.addOperand(MCOperand::createImm(ImmVal));
2140       setImmKindLiteral();
2141       return;
2142     }
2143     default:
2144       llvm_unreachable("invalid operand size");
2145     }
2146 
2147     return;
2148   }
2149 
2150   // We got int literal token.
2151   // Only sign extend inline immediates.
2152   switch (OpTy) {
2153   case AMDGPU::OPERAND_REG_IMM_INT32:
2154   case AMDGPU::OPERAND_REG_IMM_FP32:
2155   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2156   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2157   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2158   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2159   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2160   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2161   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2162   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2163   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2164   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2165   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2166     if (isSafeTruncation(Val, 32) &&
2167         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2168                                      AsmParser->hasInv2PiInlineImm())) {
2169       Inst.addOperand(MCOperand::createImm(Val));
2170       setImmKindConst();
2171       return;
2172     }
2173 
2174     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2175     setImmKindLiteral();
2176     return;
2177 
2178   case AMDGPU::OPERAND_REG_IMM_INT64:
2179   case AMDGPU::OPERAND_REG_IMM_FP64:
2180   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2181   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2182   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2183     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2184       Inst.addOperand(MCOperand::createImm(Val));
2185       setImmKindConst();
2186       return;
2187     }
2188 
2189     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2190     setImmKindLiteral();
2191     return;
2192 
2193   case AMDGPU::OPERAND_REG_IMM_INT16:
2194   case AMDGPU::OPERAND_REG_IMM_FP16:
2195   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2196   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2197   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2198   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2199   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2200     if (isSafeTruncation(Val, 16) &&
2201         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2202                                      AsmParser->hasInv2PiInlineImm())) {
2203       Inst.addOperand(MCOperand::createImm(Val));
2204       setImmKindConst();
2205       return;
2206     }
2207 
2208     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2209     setImmKindLiteral();
2210     return;
2211 
2212   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2213   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2214   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2215   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2216     assert(isSafeTruncation(Val, 16));
2217     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2218                                         AsmParser->hasInv2PiInlineImm()));
2219 
2220     Inst.addOperand(MCOperand::createImm(Val));
2221     return;
2222   }
2223   case AMDGPU::OPERAND_KIMM32:
2224     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2225     setImmKindNone();
2226     return;
2227   case AMDGPU::OPERAND_KIMM16:
2228     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2229     setImmKindNone();
2230     return;
2231   default:
2232     llvm_unreachable("invalid operand size");
2233   }
2234 }
2235 
2236 template <unsigned Bitwidth>
2237 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2238   APInt Literal(64, Imm.Val);
2239   setImmKindNone();
2240 
2241   if (!Imm.IsFPImm) {
2242     // We got int literal token.
2243     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2244     return;
2245   }
2246 
2247   bool Lost;
2248   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2249   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2250                     APFloat::rmNearestTiesToEven, &Lost);
2251   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2252 }
2253 
2254 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2255   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2256 }
2257 
2258 static bool isInlineValue(unsigned Reg) {
2259   switch (Reg) {
2260   case AMDGPU::SRC_SHARED_BASE:
2261   case AMDGPU::SRC_SHARED_LIMIT:
2262   case AMDGPU::SRC_PRIVATE_BASE:
2263   case AMDGPU::SRC_PRIVATE_LIMIT:
2264   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2265     return true;
2266   case AMDGPU::SRC_VCCZ:
2267   case AMDGPU::SRC_EXECZ:
2268   case AMDGPU::SRC_SCC:
2269     return true;
2270   case AMDGPU::SGPR_NULL:
2271     return true;
2272   default:
2273     return false;
2274   }
2275 }
2276 
2277 bool AMDGPUOperand::isInlineValue() const {
2278   return isRegKind() && ::isInlineValue(getReg());
2279 }
2280 
2281 //===----------------------------------------------------------------------===//
2282 // AsmParser
2283 //===----------------------------------------------------------------------===//
2284 
2285 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2286   if (Is == IS_VGPR) {
2287     switch (RegWidth) {
2288       default: return -1;
2289       case 32:
2290         return AMDGPU::VGPR_32RegClassID;
2291       case 64:
2292         return AMDGPU::VReg_64RegClassID;
2293       case 96:
2294         return AMDGPU::VReg_96RegClassID;
2295       case 128:
2296         return AMDGPU::VReg_128RegClassID;
2297       case 160:
2298         return AMDGPU::VReg_160RegClassID;
2299       case 192:
2300         return AMDGPU::VReg_192RegClassID;
2301       case 224:
2302         return AMDGPU::VReg_224RegClassID;
2303       case 256:
2304         return AMDGPU::VReg_256RegClassID;
2305       case 512:
2306         return AMDGPU::VReg_512RegClassID;
2307       case 1024:
2308         return AMDGPU::VReg_1024RegClassID;
2309     }
2310   } else if (Is == IS_TTMP) {
2311     switch (RegWidth) {
2312       default: return -1;
2313       case 32:
2314         return AMDGPU::TTMP_32RegClassID;
2315       case 64:
2316         return AMDGPU::TTMP_64RegClassID;
2317       case 128:
2318         return AMDGPU::TTMP_128RegClassID;
2319       case 256:
2320         return AMDGPU::TTMP_256RegClassID;
2321       case 512:
2322         return AMDGPU::TTMP_512RegClassID;
2323     }
2324   } else if (Is == IS_SGPR) {
2325     switch (RegWidth) {
2326       default: return -1;
2327       case 32:
2328         return AMDGPU::SGPR_32RegClassID;
2329       case 64:
2330         return AMDGPU::SGPR_64RegClassID;
2331       case 96:
2332         return AMDGPU::SGPR_96RegClassID;
2333       case 128:
2334         return AMDGPU::SGPR_128RegClassID;
2335       case 160:
2336         return AMDGPU::SGPR_160RegClassID;
2337       case 192:
2338         return AMDGPU::SGPR_192RegClassID;
2339       case 224:
2340         return AMDGPU::SGPR_224RegClassID;
2341       case 256:
2342         return AMDGPU::SGPR_256RegClassID;
2343       case 512:
2344         return AMDGPU::SGPR_512RegClassID;
2345     }
2346   } else if (Is == IS_AGPR) {
2347     switch (RegWidth) {
2348       default: return -1;
2349       case 32:
2350         return AMDGPU::AGPR_32RegClassID;
2351       case 64:
2352         return AMDGPU::AReg_64RegClassID;
2353       case 96:
2354         return AMDGPU::AReg_96RegClassID;
2355       case 128:
2356         return AMDGPU::AReg_128RegClassID;
2357       case 160:
2358         return AMDGPU::AReg_160RegClassID;
2359       case 192:
2360         return AMDGPU::AReg_192RegClassID;
2361       case 224:
2362         return AMDGPU::AReg_224RegClassID;
2363       case 256:
2364         return AMDGPU::AReg_256RegClassID;
2365       case 512:
2366         return AMDGPU::AReg_512RegClassID;
2367       case 1024:
2368         return AMDGPU::AReg_1024RegClassID;
2369     }
2370   }
2371   return -1;
2372 }
2373 
2374 static unsigned getSpecialRegForName(StringRef RegName) {
2375   return StringSwitch<unsigned>(RegName)
2376     .Case("exec", AMDGPU::EXEC)
2377     .Case("vcc", AMDGPU::VCC)
2378     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2379     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2380     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2381     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2382     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2383     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2384     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2385     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2386     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2387     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2388     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2389     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2390     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2391     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2392     .Case("m0", AMDGPU::M0)
2393     .Case("vccz", AMDGPU::SRC_VCCZ)
2394     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2395     .Case("execz", AMDGPU::SRC_EXECZ)
2396     .Case("src_execz", AMDGPU::SRC_EXECZ)
2397     .Case("scc", AMDGPU::SRC_SCC)
2398     .Case("src_scc", AMDGPU::SRC_SCC)
2399     .Case("tba", AMDGPU::TBA)
2400     .Case("tma", AMDGPU::TMA)
2401     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2402     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2403     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2404     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2405     .Case("vcc_lo", AMDGPU::VCC_LO)
2406     .Case("vcc_hi", AMDGPU::VCC_HI)
2407     .Case("exec_lo", AMDGPU::EXEC_LO)
2408     .Case("exec_hi", AMDGPU::EXEC_HI)
2409     .Case("tma_lo", AMDGPU::TMA_LO)
2410     .Case("tma_hi", AMDGPU::TMA_HI)
2411     .Case("tba_lo", AMDGPU::TBA_LO)
2412     .Case("tba_hi", AMDGPU::TBA_HI)
2413     .Case("pc", AMDGPU::PC_REG)
2414     .Case("null", AMDGPU::SGPR_NULL)
2415     .Default(AMDGPU::NoRegister);
2416 }
2417 
2418 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2419                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2420   auto R = parseRegister();
2421   if (!R) return true;
2422   assert(R->isReg());
2423   RegNo = R->getReg();
2424   StartLoc = R->getStartLoc();
2425   EndLoc = R->getEndLoc();
2426   return false;
2427 }
2428 
2429 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2430                                     SMLoc &EndLoc) {
2431   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2432 }
2433 
2434 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2435                                                        SMLoc &StartLoc,
2436                                                        SMLoc &EndLoc) {
2437   bool Result =
2438       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2439   bool PendingErrors = getParser().hasPendingError();
2440   getParser().clearPendingErrors();
2441   if (PendingErrors)
2442     return MatchOperand_ParseFail;
2443   if (Result)
2444     return MatchOperand_NoMatch;
2445   return MatchOperand_Success;
2446 }
2447 
2448 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2449                                             RegisterKind RegKind, unsigned Reg1,
2450                                             SMLoc Loc) {
2451   switch (RegKind) {
2452   case IS_SPECIAL:
2453     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2454       Reg = AMDGPU::EXEC;
2455       RegWidth = 64;
2456       return true;
2457     }
2458     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2459       Reg = AMDGPU::FLAT_SCR;
2460       RegWidth = 64;
2461       return true;
2462     }
2463     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2464       Reg = AMDGPU::XNACK_MASK;
2465       RegWidth = 64;
2466       return true;
2467     }
2468     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2469       Reg = AMDGPU::VCC;
2470       RegWidth = 64;
2471       return true;
2472     }
2473     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2474       Reg = AMDGPU::TBA;
2475       RegWidth = 64;
2476       return true;
2477     }
2478     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2479       Reg = AMDGPU::TMA;
2480       RegWidth = 64;
2481       return true;
2482     }
2483     Error(Loc, "register does not fit in the list");
2484     return false;
2485   case IS_VGPR:
2486   case IS_SGPR:
2487   case IS_AGPR:
2488   case IS_TTMP:
2489     if (Reg1 != Reg + RegWidth / 32) {
2490       Error(Loc, "registers in a list must have consecutive indices");
2491       return false;
2492     }
2493     RegWidth += 32;
2494     return true;
2495   default:
2496     llvm_unreachable("unexpected register kind");
2497   }
2498 }
2499 
2500 struct RegInfo {
2501   StringLiteral Name;
2502   RegisterKind Kind;
2503 };
2504 
2505 static constexpr RegInfo RegularRegisters[] = {
2506   {{"v"},    IS_VGPR},
2507   {{"s"},    IS_SGPR},
2508   {{"ttmp"}, IS_TTMP},
2509   {{"acc"},  IS_AGPR},
2510   {{"a"},    IS_AGPR},
2511 };
2512 
2513 static bool isRegularReg(RegisterKind Kind) {
2514   return Kind == IS_VGPR ||
2515          Kind == IS_SGPR ||
2516          Kind == IS_TTMP ||
2517          Kind == IS_AGPR;
2518 }
2519 
2520 static const RegInfo* getRegularRegInfo(StringRef Str) {
2521   for (const RegInfo &Reg : RegularRegisters)
2522     if (Str.startswith(Reg.Name))
2523       return &Reg;
2524   return nullptr;
2525 }
2526 
2527 static bool getRegNum(StringRef Str, unsigned& Num) {
2528   return !Str.getAsInteger(10, Num);
2529 }
2530 
2531 bool
2532 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2533                             const AsmToken &NextToken) const {
2534 
2535   // A list of consecutive registers: [s0,s1,s2,s3]
2536   if (Token.is(AsmToken::LBrac))
2537     return true;
2538 
2539   if (!Token.is(AsmToken::Identifier))
2540     return false;
2541 
2542   // A single register like s0 or a range of registers like s[0:1]
2543 
2544   StringRef Str = Token.getString();
2545   const RegInfo *Reg = getRegularRegInfo(Str);
2546   if (Reg) {
2547     StringRef RegName = Reg->Name;
2548     StringRef RegSuffix = Str.substr(RegName.size());
2549     if (!RegSuffix.empty()) {
2550       unsigned Num;
2551       // A single register with an index: rXX
2552       if (getRegNum(RegSuffix, Num))
2553         return true;
2554     } else {
2555       // A range of registers: r[XX:YY].
2556       if (NextToken.is(AsmToken::LBrac))
2557         return true;
2558     }
2559   }
2560 
2561   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2562 }
2563 
2564 bool
2565 AMDGPUAsmParser::isRegister()
2566 {
2567   return isRegister(getToken(), peekToken());
2568 }
2569 
2570 unsigned
2571 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2572                                unsigned RegNum,
2573                                unsigned RegWidth,
2574                                SMLoc Loc) {
2575 
2576   assert(isRegularReg(RegKind));
2577 
2578   unsigned AlignSize = 1;
2579   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2580     // SGPR and TTMP registers must be aligned.
2581     // Max required alignment is 4 dwords.
2582     AlignSize = std::min(RegWidth / 32, 4u);
2583   }
2584 
2585   if (RegNum % AlignSize != 0) {
2586     Error(Loc, "invalid register alignment");
2587     return AMDGPU::NoRegister;
2588   }
2589 
2590   unsigned RegIdx = RegNum / AlignSize;
2591   int RCID = getRegClass(RegKind, RegWidth);
2592   if (RCID == -1) {
2593     Error(Loc, "invalid or unsupported register size");
2594     return AMDGPU::NoRegister;
2595   }
2596 
2597   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2598   const MCRegisterClass RC = TRI->getRegClass(RCID);
2599   if (RegIdx >= RC.getNumRegs()) {
2600     Error(Loc, "register index is out of range");
2601     return AMDGPU::NoRegister;
2602   }
2603 
2604   return RC.getRegister(RegIdx);
2605 }
2606 
2607 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2608   int64_t RegLo, RegHi;
2609   if (!skipToken(AsmToken::LBrac, "missing register index"))
2610     return false;
2611 
2612   SMLoc FirstIdxLoc = getLoc();
2613   SMLoc SecondIdxLoc;
2614 
2615   if (!parseExpr(RegLo))
2616     return false;
2617 
2618   if (trySkipToken(AsmToken::Colon)) {
2619     SecondIdxLoc = getLoc();
2620     if (!parseExpr(RegHi))
2621       return false;
2622   } else {
2623     RegHi = RegLo;
2624   }
2625 
2626   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2627     return false;
2628 
2629   if (!isUInt<32>(RegLo)) {
2630     Error(FirstIdxLoc, "invalid register index");
2631     return false;
2632   }
2633 
2634   if (!isUInt<32>(RegHi)) {
2635     Error(SecondIdxLoc, "invalid register index");
2636     return false;
2637   }
2638 
2639   if (RegLo > RegHi) {
2640     Error(FirstIdxLoc, "first register index should not exceed second index");
2641     return false;
2642   }
2643 
2644   Num = static_cast<unsigned>(RegLo);
2645   RegWidth = 32 * ((RegHi - RegLo) + 1);
2646   return true;
2647 }
2648 
2649 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2650                                           unsigned &RegNum, unsigned &RegWidth,
2651                                           SmallVectorImpl<AsmToken> &Tokens) {
2652   assert(isToken(AsmToken::Identifier));
2653   unsigned Reg = getSpecialRegForName(getTokenStr());
2654   if (Reg) {
2655     RegNum = 0;
2656     RegWidth = 32;
2657     RegKind = IS_SPECIAL;
2658     Tokens.push_back(getToken());
2659     lex(); // skip register name
2660   }
2661   return Reg;
2662 }
2663 
2664 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2665                                           unsigned &RegNum, unsigned &RegWidth,
2666                                           SmallVectorImpl<AsmToken> &Tokens) {
2667   assert(isToken(AsmToken::Identifier));
2668   StringRef RegName = getTokenStr();
2669   auto Loc = getLoc();
2670 
2671   const RegInfo *RI = getRegularRegInfo(RegName);
2672   if (!RI) {
2673     Error(Loc, "invalid register name");
2674     return AMDGPU::NoRegister;
2675   }
2676 
2677   Tokens.push_back(getToken());
2678   lex(); // skip register name
2679 
2680   RegKind = RI->Kind;
2681   StringRef RegSuffix = RegName.substr(RI->Name.size());
2682   if (!RegSuffix.empty()) {
2683     // Single 32-bit register: vXX.
2684     if (!getRegNum(RegSuffix, RegNum)) {
2685       Error(Loc, "invalid register index");
2686       return AMDGPU::NoRegister;
2687     }
2688     RegWidth = 32;
2689   } else {
2690     // Range of registers: v[XX:YY]. ":YY" is optional.
2691     if (!ParseRegRange(RegNum, RegWidth))
2692       return AMDGPU::NoRegister;
2693   }
2694 
2695   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2696 }
2697 
2698 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2699                                        unsigned &RegWidth,
2700                                        SmallVectorImpl<AsmToken> &Tokens) {
2701   unsigned Reg = AMDGPU::NoRegister;
2702   auto ListLoc = getLoc();
2703 
2704   if (!skipToken(AsmToken::LBrac,
2705                  "expected a register or a list of registers")) {
2706     return AMDGPU::NoRegister;
2707   }
2708 
2709   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2710 
2711   auto Loc = getLoc();
2712   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2713     return AMDGPU::NoRegister;
2714   if (RegWidth != 32) {
2715     Error(Loc, "expected a single 32-bit register");
2716     return AMDGPU::NoRegister;
2717   }
2718 
2719   for (; trySkipToken(AsmToken::Comma); ) {
2720     RegisterKind NextRegKind;
2721     unsigned NextReg, NextRegNum, NextRegWidth;
2722     Loc = getLoc();
2723 
2724     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2725                              NextRegNum, NextRegWidth,
2726                              Tokens)) {
2727       return AMDGPU::NoRegister;
2728     }
2729     if (NextRegWidth != 32) {
2730       Error(Loc, "expected a single 32-bit register");
2731       return AMDGPU::NoRegister;
2732     }
2733     if (NextRegKind != RegKind) {
2734       Error(Loc, "registers in a list must be of the same kind");
2735       return AMDGPU::NoRegister;
2736     }
2737     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2738       return AMDGPU::NoRegister;
2739   }
2740 
2741   if (!skipToken(AsmToken::RBrac,
2742                  "expected a comma or a closing square bracket")) {
2743     return AMDGPU::NoRegister;
2744   }
2745 
2746   if (isRegularReg(RegKind))
2747     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2748 
2749   return Reg;
2750 }
2751 
2752 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2753                                           unsigned &RegNum, unsigned &RegWidth,
2754                                           SmallVectorImpl<AsmToken> &Tokens) {
2755   auto Loc = getLoc();
2756   Reg = AMDGPU::NoRegister;
2757 
2758   if (isToken(AsmToken::Identifier)) {
2759     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2760     if (Reg == AMDGPU::NoRegister)
2761       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2762   } else {
2763     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2764   }
2765 
2766   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2767   if (Reg == AMDGPU::NoRegister) {
2768     assert(Parser.hasPendingError());
2769     return false;
2770   }
2771 
2772   if (!subtargetHasRegister(*TRI, Reg)) {
2773     if (Reg == AMDGPU::SGPR_NULL) {
2774       Error(Loc, "'null' operand is not supported on this GPU");
2775     } else {
2776       Error(Loc, "register not available on this GPU");
2777     }
2778     return false;
2779   }
2780 
2781   return true;
2782 }
2783 
2784 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2785                                           unsigned &RegNum, unsigned &RegWidth,
2786                                           bool RestoreOnFailure /*=false*/) {
2787   Reg = AMDGPU::NoRegister;
2788 
2789   SmallVector<AsmToken, 1> Tokens;
2790   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2791     if (RestoreOnFailure) {
2792       while (!Tokens.empty()) {
2793         getLexer().UnLex(Tokens.pop_back_val());
2794       }
2795     }
2796     return true;
2797   }
2798   return false;
2799 }
2800 
2801 Optional<StringRef>
2802 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2803   switch (RegKind) {
2804   case IS_VGPR:
2805     return StringRef(".amdgcn.next_free_vgpr");
2806   case IS_SGPR:
2807     return StringRef(".amdgcn.next_free_sgpr");
2808   default:
2809     return None;
2810   }
2811 }
2812 
2813 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2814   auto SymbolName = getGprCountSymbolName(RegKind);
2815   assert(SymbolName && "initializing invalid register kind");
2816   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2817   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2818 }
2819 
2820 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2821                                             unsigned DwordRegIndex,
2822                                             unsigned RegWidth) {
2823   // Symbols are only defined for GCN targets
2824   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2825     return true;
2826 
2827   auto SymbolName = getGprCountSymbolName(RegKind);
2828   if (!SymbolName)
2829     return true;
2830   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2831 
2832   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2833   int64_t OldCount;
2834 
2835   if (!Sym->isVariable())
2836     return !Error(getLoc(),
2837                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2838   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2839     return !Error(
2840         getLoc(),
2841         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2842 
2843   if (OldCount <= NewMax)
2844     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2845 
2846   return true;
2847 }
2848 
2849 std::unique_ptr<AMDGPUOperand>
2850 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2851   const auto &Tok = getToken();
2852   SMLoc StartLoc = Tok.getLoc();
2853   SMLoc EndLoc = Tok.getEndLoc();
2854   RegisterKind RegKind;
2855   unsigned Reg, RegNum, RegWidth;
2856 
2857   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2858     return nullptr;
2859   }
2860   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2861     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2862       return nullptr;
2863   } else
2864     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2865   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2866 }
2867 
2868 OperandMatchResultTy
2869 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2870   // TODO: add syntactic sugar for 1/(2*PI)
2871 
2872   assert(!isRegister());
2873   assert(!isModifier());
2874 
2875   const auto& Tok = getToken();
2876   const auto& NextTok = peekToken();
2877   bool IsReal = Tok.is(AsmToken::Real);
2878   SMLoc S = getLoc();
2879   bool Negate = false;
2880 
2881   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2882     lex();
2883     IsReal = true;
2884     Negate = true;
2885   }
2886 
2887   if (IsReal) {
2888     // Floating-point expressions are not supported.
2889     // Can only allow floating-point literals with an
2890     // optional sign.
2891 
2892     StringRef Num = getTokenStr();
2893     lex();
2894 
2895     APFloat RealVal(APFloat::IEEEdouble());
2896     auto roundMode = APFloat::rmNearestTiesToEven;
2897     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2898       return MatchOperand_ParseFail;
2899     }
2900     if (Negate)
2901       RealVal.changeSign();
2902 
2903     Operands.push_back(
2904       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2905                                AMDGPUOperand::ImmTyNone, true));
2906 
2907     return MatchOperand_Success;
2908 
2909   } else {
2910     int64_t IntVal;
2911     const MCExpr *Expr;
2912     SMLoc S = getLoc();
2913 
2914     if (HasSP3AbsModifier) {
2915       // This is a workaround for handling expressions
2916       // as arguments of SP3 'abs' modifier, for example:
2917       //     |1.0|
2918       //     |-1|
2919       //     |1+x|
2920       // This syntax is not compatible with syntax of standard
2921       // MC expressions (due to the trailing '|').
2922       SMLoc EndLoc;
2923       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2924         return MatchOperand_ParseFail;
2925     } else {
2926       if (Parser.parseExpression(Expr))
2927         return MatchOperand_ParseFail;
2928     }
2929 
2930     if (Expr->evaluateAsAbsolute(IntVal)) {
2931       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2932     } else {
2933       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2934     }
2935 
2936     return MatchOperand_Success;
2937   }
2938 
2939   return MatchOperand_NoMatch;
2940 }
2941 
2942 OperandMatchResultTy
2943 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2944   if (!isRegister())
2945     return MatchOperand_NoMatch;
2946 
2947   if (auto R = parseRegister()) {
2948     assert(R->isReg());
2949     Operands.push_back(std::move(R));
2950     return MatchOperand_Success;
2951   }
2952   return MatchOperand_ParseFail;
2953 }
2954 
2955 OperandMatchResultTy
2956 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2957   auto res = parseReg(Operands);
2958   if (res != MatchOperand_NoMatch) {
2959     return res;
2960   } else if (isModifier()) {
2961     return MatchOperand_NoMatch;
2962   } else {
2963     return parseImm(Operands, HasSP3AbsMod);
2964   }
2965 }
2966 
2967 bool
2968 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2969   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2970     const auto &str = Token.getString();
2971     return str == "abs" || str == "neg" || str == "sext";
2972   }
2973   return false;
2974 }
2975 
2976 bool
2977 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2978   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2979 }
2980 
2981 bool
2982 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2983   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2984 }
2985 
2986 bool
2987 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2988   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2989 }
2990 
2991 // Check if this is an operand modifier or an opcode modifier
2992 // which may look like an expression but it is not. We should
2993 // avoid parsing these modifiers as expressions. Currently
2994 // recognized sequences are:
2995 //   |...|
2996 //   abs(...)
2997 //   neg(...)
2998 //   sext(...)
2999 //   -reg
3000 //   -|...|
3001 //   -abs(...)
3002 //   name:...
3003 // Note that simple opcode modifiers like 'gds' may be parsed as
3004 // expressions; this is a special case. See getExpressionAsToken.
3005 //
3006 bool
3007 AMDGPUAsmParser::isModifier() {
3008 
3009   AsmToken Tok = getToken();
3010   AsmToken NextToken[2];
3011   peekTokens(NextToken);
3012 
3013   return isOperandModifier(Tok, NextToken[0]) ||
3014          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3015          isOpcodeModifierWithVal(Tok, NextToken[0]);
3016 }
3017 
3018 // Check if the current token is an SP3 'neg' modifier.
3019 // Currently this modifier is allowed in the following context:
3020 //
3021 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3022 // 2. Before an 'abs' modifier: -abs(...)
3023 // 3. Before an SP3 'abs' modifier: -|...|
3024 //
3025 // In all other cases "-" is handled as a part
3026 // of an expression that follows the sign.
3027 //
3028 // Note: When "-" is followed by an integer literal,
3029 // this is interpreted as integer negation rather
3030 // than a floating-point NEG modifier applied to N.
3031 // Beside being contr-intuitive, such use of floating-point
3032 // NEG modifier would have resulted in different meaning
3033 // of integer literals used with VOP1/2/C and VOP3,
3034 // for example:
3035 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3036 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3037 // Negative fp literals with preceding "-" are
3038 // handled likewise for uniformity
3039 //
3040 bool
3041 AMDGPUAsmParser::parseSP3NegModifier() {
3042 
3043   AsmToken NextToken[2];
3044   peekTokens(NextToken);
3045 
3046   if (isToken(AsmToken::Minus) &&
3047       (isRegister(NextToken[0], NextToken[1]) ||
3048        NextToken[0].is(AsmToken::Pipe) ||
3049        isId(NextToken[0], "abs"))) {
3050     lex();
3051     return true;
3052   }
3053 
3054   return false;
3055 }
3056 
3057 OperandMatchResultTy
3058 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3059                                               bool AllowImm) {
3060   bool Neg, SP3Neg;
3061   bool Abs, SP3Abs;
3062   SMLoc Loc;
3063 
3064   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3065   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3066     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3067     return MatchOperand_ParseFail;
3068   }
3069 
3070   SP3Neg = parseSP3NegModifier();
3071 
3072   Loc = getLoc();
3073   Neg = trySkipId("neg");
3074   if (Neg && SP3Neg) {
3075     Error(Loc, "expected register or immediate");
3076     return MatchOperand_ParseFail;
3077   }
3078   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3079     return MatchOperand_ParseFail;
3080 
3081   Abs = trySkipId("abs");
3082   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3083     return MatchOperand_ParseFail;
3084 
3085   Loc = getLoc();
3086   SP3Abs = trySkipToken(AsmToken::Pipe);
3087   if (Abs && SP3Abs) {
3088     Error(Loc, "expected register or immediate");
3089     return MatchOperand_ParseFail;
3090   }
3091 
3092   OperandMatchResultTy Res;
3093   if (AllowImm) {
3094     Res = parseRegOrImm(Operands, SP3Abs);
3095   } else {
3096     Res = parseReg(Operands);
3097   }
3098   if (Res != MatchOperand_Success) {
3099     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3100   }
3101 
3102   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3103     return MatchOperand_ParseFail;
3104   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3105     return MatchOperand_ParseFail;
3106   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3107     return MatchOperand_ParseFail;
3108 
3109   AMDGPUOperand::Modifiers Mods;
3110   Mods.Abs = Abs || SP3Abs;
3111   Mods.Neg = Neg || SP3Neg;
3112 
3113   if (Mods.hasFPModifiers()) {
3114     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3115     if (Op.isExpr()) {
3116       Error(Op.getStartLoc(), "expected an absolute expression");
3117       return MatchOperand_ParseFail;
3118     }
3119     Op.setModifiers(Mods);
3120   }
3121   return MatchOperand_Success;
3122 }
3123 
3124 OperandMatchResultTy
3125 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3126                                                bool AllowImm) {
3127   bool Sext = trySkipId("sext");
3128   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3129     return MatchOperand_ParseFail;
3130 
3131   OperandMatchResultTy Res;
3132   if (AllowImm) {
3133     Res = parseRegOrImm(Operands);
3134   } else {
3135     Res = parseReg(Operands);
3136   }
3137   if (Res != MatchOperand_Success) {
3138     return Sext? MatchOperand_ParseFail : Res;
3139   }
3140 
3141   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3142     return MatchOperand_ParseFail;
3143 
3144   AMDGPUOperand::Modifiers Mods;
3145   Mods.Sext = Sext;
3146 
3147   if (Mods.hasIntModifiers()) {
3148     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3149     if (Op.isExpr()) {
3150       Error(Op.getStartLoc(), "expected an absolute expression");
3151       return MatchOperand_ParseFail;
3152     }
3153     Op.setModifiers(Mods);
3154   }
3155 
3156   return MatchOperand_Success;
3157 }
3158 
3159 OperandMatchResultTy
3160 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3161   return parseRegOrImmWithFPInputMods(Operands, false);
3162 }
3163 
3164 OperandMatchResultTy
3165 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3166   return parseRegOrImmWithIntInputMods(Operands, false);
3167 }
3168 
3169 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3170   auto Loc = getLoc();
3171   if (trySkipId("off")) {
3172     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3173                                                 AMDGPUOperand::ImmTyOff, false));
3174     return MatchOperand_Success;
3175   }
3176 
3177   if (!isRegister())
3178     return MatchOperand_NoMatch;
3179 
3180   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3181   if (Reg) {
3182     Operands.push_back(std::move(Reg));
3183     return MatchOperand_Success;
3184   }
3185 
3186   return MatchOperand_ParseFail;
3187 
3188 }
3189 
3190 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3191   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3192 
3193   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3194       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3195       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3196       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3197     return Match_InvalidOperand;
3198 
3199   if ((TSFlags & SIInstrFlags::VOP3) &&
3200       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3201       getForcedEncodingSize() != 64)
3202     return Match_PreferE32;
3203 
3204   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3205       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3206     // v_mac_f32/16 allow only dst_sel == DWORD;
3207     auto OpNum =
3208         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3209     const auto &Op = Inst.getOperand(OpNum);
3210     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3211       return Match_InvalidOperand;
3212     }
3213   }
3214 
3215   return Match_Success;
3216 }
3217 
3218 static ArrayRef<unsigned> getAllVariants() {
3219   static const unsigned Variants[] = {
3220     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3221     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3222   };
3223 
3224   return makeArrayRef(Variants);
3225 }
3226 
3227 // What asm variants we should check
3228 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3229   if (getForcedEncodingSize() == 32) {
3230     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3231     return makeArrayRef(Variants);
3232   }
3233 
3234   if (isForcedVOP3()) {
3235     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3236     return makeArrayRef(Variants);
3237   }
3238 
3239   if (isForcedSDWA()) {
3240     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3241                                         AMDGPUAsmVariants::SDWA9};
3242     return makeArrayRef(Variants);
3243   }
3244 
3245   if (isForcedDPP()) {
3246     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3247     return makeArrayRef(Variants);
3248   }
3249 
3250   return getAllVariants();
3251 }
3252 
3253 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3254   if (getForcedEncodingSize() == 32)
3255     return "e32";
3256 
3257   if (isForcedVOP3())
3258     return "e64";
3259 
3260   if (isForcedSDWA())
3261     return "sdwa";
3262 
3263   if (isForcedDPP())
3264     return "dpp";
3265 
3266   return "";
3267 }
3268 
3269 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3270   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3271   const unsigned Num = Desc.getNumImplicitUses();
3272   for (unsigned i = 0; i < Num; ++i) {
3273     unsigned Reg = Desc.ImplicitUses[i];
3274     switch (Reg) {
3275     case AMDGPU::FLAT_SCR:
3276     case AMDGPU::VCC:
3277     case AMDGPU::VCC_LO:
3278     case AMDGPU::VCC_HI:
3279     case AMDGPU::M0:
3280       return Reg;
3281     default:
3282       break;
3283     }
3284   }
3285   return AMDGPU::NoRegister;
3286 }
3287 
3288 // NB: This code is correct only when used to check constant
3289 // bus limitations because GFX7 support no f16 inline constants.
3290 // Note that there are no cases when a GFX7 opcode violates
3291 // constant bus limitations due to the use of an f16 constant.
3292 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3293                                        unsigned OpIdx) const {
3294   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3295 
3296   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3297     return false;
3298   }
3299 
3300   const MCOperand &MO = Inst.getOperand(OpIdx);
3301 
3302   int64_t Val = MO.getImm();
3303   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3304 
3305   switch (OpSize) { // expected operand size
3306   case 8:
3307     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3308   case 4:
3309     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3310   case 2: {
3311     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3312     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3313         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3314         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3315       return AMDGPU::isInlinableIntLiteral(Val);
3316 
3317     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3318         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3319         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3320       return AMDGPU::isInlinableIntLiteralV216(Val);
3321 
3322     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3323         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3324         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3325       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3326 
3327     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3328   }
3329   default:
3330     llvm_unreachable("invalid operand size");
3331   }
3332 }
3333 
3334 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3335   if (!isGFX10Plus())
3336     return 1;
3337 
3338   switch (Opcode) {
3339   // 64-bit shift instructions can use only one scalar value input
3340   case AMDGPU::V_LSHLREV_B64_e64:
3341   case AMDGPU::V_LSHLREV_B64_gfx10:
3342   case AMDGPU::V_LSHRREV_B64_e64:
3343   case AMDGPU::V_LSHRREV_B64_gfx10:
3344   case AMDGPU::V_ASHRREV_I64_e64:
3345   case AMDGPU::V_ASHRREV_I64_gfx10:
3346   case AMDGPU::V_LSHL_B64_e64:
3347   case AMDGPU::V_LSHR_B64_e64:
3348   case AMDGPU::V_ASHR_I64_e64:
3349     return 1;
3350   default:
3351     return 2;
3352   }
3353 }
3354 
3355 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3356   const MCOperand &MO = Inst.getOperand(OpIdx);
3357   if (MO.isImm()) {
3358     return !isInlineConstant(Inst, OpIdx);
3359   } else if (MO.isReg()) {
3360     auto Reg = MO.getReg();
3361     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3362     auto PReg = mc2PseudoReg(Reg);
3363     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3364   } else {
3365     return true;
3366   }
3367 }
3368 
3369 bool
3370 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3371                                                 const OperandVector &Operands) {
3372   const unsigned Opcode = Inst.getOpcode();
3373   const MCInstrDesc &Desc = MII.get(Opcode);
3374   unsigned LastSGPR = AMDGPU::NoRegister;
3375   unsigned ConstantBusUseCount = 0;
3376   unsigned NumLiterals = 0;
3377   unsigned LiteralSize;
3378 
3379   if (Desc.TSFlags &
3380       (SIInstrFlags::VOPC |
3381        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3382        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3383        SIInstrFlags::SDWA)) {
3384     // Check special imm operands (used by madmk, etc)
3385     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3386       ++NumLiterals;
3387       LiteralSize = 4;
3388     }
3389 
3390     SmallDenseSet<unsigned> SGPRsUsed;
3391     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3392     if (SGPRUsed != AMDGPU::NoRegister) {
3393       SGPRsUsed.insert(SGPRUsed);
3394       ++ConstantBusUseCount;
3395     }
3396 
3397     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3398     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3399     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3400 
3401     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3402 
3403     for (int OpIdx : OpIndices) {
3404       if (OpIdx == -1) break;
3405 
3406       const MCOperand &MO = Inst.getOperand(OpIdx);
3407       if (usesConstantBus(Inst, OpIdx)) {
3408         if (MO.isReg()) {
3409           LastSGPR = mc2PseudoReg(MO.getReg());
3410           // Pairs of registers with a partial intersections like these
3411           //   s0, s[0:1]
3412           //   flat_scratch_lo, flat_scratch
3413           //   flat_scratch_lo, flat_scratch_hi
3414           // are theoretically valid but they are disabled anyway.
3415           // Note that this code mimics SIInstrInfo::verifyInstruction
3416           if (!SGPRsUsed.count(LastSGPR)) {
3417             SGPRsUsed.insert(LastSGPR);
3418             ++ConstantBusUseCount;
3419           }
3420         } else { // Expression or a literal
3421 
3422           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3423             continue; // special operand like VINTERP attr_chan
3424 
3425           // An instruction may use only one literal.
3426           // This has been validated on the previous step.
3427           // See validateVOPLiteral.
3428           // This literal may be used as more than one operand.
3429           // If all these operands are of the same size,
3430           // this literal counts as one scalar value.
3431           // Otherwise it counts as 2 scalar values.
3432           // See "GFX10 Shader Programming", section 3.6.2.3.
3433 
3434           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3435           if (Size < 4) Size = 4;
3436 
3437           if (NumLiterals == 0) {
3438             NumLiterals = 1;
3439             LiteralSize = Size;
3440           } else if (LiteralSize != Size) {
3441             NumLiterals = 2;
3442           }
3443         }
3444       }
3445     }
3446   }
3447   ConstantBusUseCount += NumLiterals;
3448 
3449   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3450     return true;
3451 
3452   SMLoc LitLoc = getLitLoc(Operands);
3453   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3454   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3455   Error(Loc, "invalid operand (violates constant bus restrictions)");
3456   return false;
3457 }
3458 
3459 bool
3460 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3461                                                  const OperandVector &Operands) {
3462   const unsigned Opcode = Inst.getOpcode();
3463   const MCInstrDesc &Desc = MII.get(Opcode);
3464 
3465   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3466   if (DstIdx == -1 ||
3467       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3468     return true;
3469   }
3470 
3471   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3472 
3473   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3474   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3475   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3476 
3477   assert(DstIdx != -1);
3478   const MCOperand &Dst = Inst.getOperand(DstIdx);
3479   assert(Dst.isReg());
3480 
3481   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3482 
3483   for (int SrcIdx : SrcIndices) {
3484     if (SrcIdx == -1) break;
3485     const MCOperand &Src = Inst.getOperand(SrcIdx);
3486     if (Src.isReg()) {
3487       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3488         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3489         Error(getRegLoc(SrcReg, Operands),
3490           "destination must be different than all sources");
3491         return false;
3492       }
3493     }
3494   }
3495 
3496   return true;
3497 }
3498 
3499 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3500 
3501   const unsigned Opc = Inst.getOpcode();
3502   const MCInstrDesc &Desc = MII.get(Opc);
3503 
3504   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3505     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3506     assert(ClampIdx != -1);
3507     return Inst.getOperand(ClampIdx).getImm() == 0;
3508   }
3509 
3510   return true;
3511 }
3512 
3513 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3514 
3515   const unsigned Opc = Inst.getOpcode();
3516   const MCInstrDesc &Desc = MII.get(Opc);
3517 
3518   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3519     return None;
3520 
3521   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3522   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3523   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3524 
3525   assert(VDataIdx != -1);
3526 
3527   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3528     return None;
3529 
3530   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3531   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3532   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3533   if (DMask == 0)
3534     DMask = 1;
3535 
3536   bool isPackedD16 = false;
3537   unsigned DataSize =
3538     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3539   if (hasPackedD16()) {
3540     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3541     isPackedD16 = D16Idx >= 0;
3542     if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3543       DataSize = (DataSize + 1) / 2;
3544   }
3545 
3546   if ((VDataSize / 4) == DataSize + TFESize)
3547     return None;
3548 
3549   return StringRef(isPackedD16
3550                        ? "image data size does not match dmask, d16 and tfe"
3551                        : "image data size does not match dmask and tfe");
3552 }
3553 
3554 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3555   const unsigned Opc = Inst.getOpcode();
3556   const MCInstrDesc &Desc = MII.get(Opc);
3557 
3558   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3559     return true;
3560 
3561   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3562 
3563   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3564       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3565   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3566   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3567   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3568   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3569 
3570   assert(VAddr0Idx != -1);
3571   assert(SrsrcIdx != -1);
3572   assert(SrsrcIdx > VAddr0Idx);
3573 
3574   if (DimIdx == -1)
3575     return true; // intersect_ray
3576 
3577   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3578   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3579   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3580   unsigned ActualAddrSize =
3581       IsNSA ? SrsrcIdx - VAddr0Idx
3582             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3583   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3584 
3585   unsigned ExpectedAddrSize =
3586       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3587 
3588   if (!IsNSA) {
3589     if (ExpectedAddrSize > 8)
3590       ExpectedAddrSize = 16;
3591 
3592     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3593     // This provides backward compatibility for assembly created
3594     // before 160b/192b/224b types were directly supported.
3595     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3596       return true;
3597   }
3598 
3599   return ActualAddrSize == ExpectedAddrSize;
3600 }
3601 
3602 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3603 
3604   const unsigned Opc = Inst.getOpcode();
3605   const MCInstrDesc &Desc = MII.get(Opc);
3606 
3607   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3608     return true;
3609   if (!Desc.mayLoad() || !Desc.mayStore())
3610     return true; // Not atomic
3611 
3612   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3613   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3614 
3615   // This is an incomplete check because image_atomic_cmpswap
3616   // may only use 0x3 and 0xf while other atomic operations
3617   // may use 0x1 and 0x3. However these limitations are
3618   // verified when we check that dmask matches dst size.
3619   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3620 }
3621 
3622 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3623 
3624   const unsigned Opc = Inst.getOpcode();
3625   const MCInstrDesc &Desc = MII.get(Opc);
3626 
3627   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3628     return true;
3629 
3630   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3631   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3632 
3633   // GATHER4 instructions use dmask in a different fashion compared to
3634   // other MIMG instructions. The only useful DMASK values are
3635   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3636   // (red,red,red,red) etc.) The ISA document doesn't mention
3637   // this.
3638   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3639 }
3640 
3641 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3642   const unsigned Opc = Inst.getOpcode();
3643   const MCInstrDesc &Desc = MII.get(Opc);
3644 
3645   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3646     return true;
3647 
3648   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3649   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3650       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3651 
3652   if (!BaseOpcode->MSAA)
3653     return true;
3654 
3655   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3656   assert(DimIdx != -1);
3657 
3658   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3659   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3660 
3661   return DimInfo->MSAA;
3662 }
3663 
3664 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3665 {
3666   switch (Opcode) {
3667   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3668   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3669   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3670     return true;
3671   default:
3672     return false;
3673   }
3674 }
3675 
3676 // movrels* opcodes should only allow VGPRS as src0.
3677 // This is specified in .td description for vop1/vop3,
3678 // but sdwa is handled differently. See isSDWAOperand.
3679 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3680                                       const OperandVector &Operands) {
3681 
3682   const unsigned Opc = Inst.getOpcode();
3683   const MCInstrDesc &Desc = MII.get(Opc);
3684 
3685   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3686     return true;
3687 
3688   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3689   assert(Src0Idx != -1);
3690 
3691   SMLoc ErrLoc;
3692   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3693   if (Src0.isReg()) {
3694     auto Reg = mc2PseudoReg(Src0.getReg());
3695     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3696     if (!isSGPR(Reg, TRI))
3697       return true;
3698     ErrLoc = getRegLoc(Reg, Operands);
3699   } else {
3700     ErrLoc = getConstLoc(Operands);
3701   }
3702 
3703   Error(ErrLoc, "source operand must be a VGPR");
3704   return false;
3705 }
3706 
3707 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3708                                           const OperandVector &Operands) {
3709 
3710   const unsigned Opc = Inst.getOpcode();
3711 
3712   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3713     return true;
3714 
3715   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3716   assert(Src0Idx != -1);
3717 
3718   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3719   if (!Src0.isReg())
3720     return true;
3721 
3722   auto Reg = mc2PseudoReg(Src0.getReg());
3723   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3724   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3725     Error(getRegLoc(Reg, Operands),
3726           "source operand must be either a VGPR or an inline constant");
3727     return false;
3728   }
3729 
3730   return true;
3731 }
3732 
3733 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3734                                    const OperandVector &Operands) {
3735   const unsigned Opc = Inst.getOpcode();
3736   const MCInstrDesc &Desc = MII.get(Opc);
3737 
3738   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3739     return true;
3740 
3741   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3742   if (Src2Idx == -1)
3743     return true;
3744 
3745   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3746   if (!Src2.isReg())
3747     return true;
3748 
3749   MCRegister Src2Reg = Src2.getReg();
3750   MCRegister DstReg = Inst.getOperand(0).getReg();
3751   if (Src2Reg == DstReg)
3752     return true;
3753 
3754   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3755   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3756     return true;
3757 
3758   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3759     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3760           "source 2 operand must not partially overlap with dst");
3761     return false;
3762   }
3763 
3764   return true;
3765 }
3766 
3767 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3768   switch (Inst.getOpcode()) {
3769   default:
3770     return true;
3771   case V_DIV_SCALE_F32_gfx6_gfx7:
3772   case V_DIV_SCALE_F32_vi:
3773   case V_DIV_SCALE_F32_gfx10:
3774   case V_DIV_SCALE_F64_gfx6_gfx7:
3775   case V_DIV_SCALE_F64_vi:
3776   case V_DIV_SCALE_F64_gfx10:
3777     break;
3778   }
3779 
3780   // TODO: Check that src0 = src1 or src2.
3781 
3782   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3783                     AMDGPU::OpName::src2_modifiers,
3784                     AMDGPU::OpName::src2_modifiers}) {
3785     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3786             .getImm() &
3787         SISrcMods::ABS) {
3788       return false;
3789     }
3790   }
3791 
3792   return true;
3793 }
3794 
3795 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3796 
3797   const unsigned Opc = Inst.getOpcode();
3798   const MCInstrDesc &Desc = MII.get(Opc);
3799 
3800   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3801     return true;
3802 
3803   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3804   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3805     if (isCI() || isSI())
3806       return false;
3807   }
3808 
3809   return true;
3810 }
3811 
3812 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3813   const unsigned Opc = Inst.getOpcode();
3814   const MCInstrDesc &Desc = MII.get(Opc);
3815 
3816   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3817     return true;
3818 
3819   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3820   if (DimIdx < 0)
3821     return true;
3822 
3823   long Imm = Inst.getOperand(DimIdx).getImm();
3824   if (Imm < 0 || Imm >= 8)
3825     return false;
3826 
3827   return true;
3828 }
3829 
3830 static bool IsRevOpcode(const unsigned Opcode)
3831 {
3832   switch (Opcode) {
3833   case AMDGPU::V_SUBREV_F32_e32:
3834   case AMDGPU::V_SUBREV_F32_e64:
3835   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3836   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3837   case AMDGPU::V_SUBREV_F32_e32_vi:
3838   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3839   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3840   case AMDGPU::V_SUBREV_F32_e64_vi:
3841 
3842   case AMDGPU::V_SUBREV_CO_U32_e32:
3843   case AMDGPU::V_SUBREV_CO_U32_e64:
3844   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3845   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3846 
3847   case AMDGPU::V_SUBBREV_U32_e32:
3848   case AMDGPU::V_SUBBREV_U32_e64:
3849   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3850   case AMDGPU::V_SUBBREV_U32_e32_vi:
3851   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3852   case AMDGPU::V_SUBBREV_U32_e64_vi:
3853 
3854   case AMDGPU::V_SUBREV_U32_e32:
3855   case AMDGPU::V_SUBREV_U32_e64:
3856   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3857   case AMDGPU::V_SUBREV_U32_e32_vi:
3858   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3859   case AMDGPU::V_SUBREV_U32_e64_vi:
3860 
3861   case AMDGPU::V_SUBREV_F16_e32:
3862   case AMDGPU::V_SUBREV_F16_e64:
3863   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3864   case AMDGPU::V_SUBREV_F16_e32_vi:
3865   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3866   case AMDGPU::V_SUBREV_F16_e64_vi:
3867 
3868   case AMDGPU::V_SUBREV_U16_e32:
3869   case AMDGPU::V_SUBREV_U16_e64:
3870   case AMDGPU::V_SUBREV_U16_e32_vi:
3871   case AMDGPU::V_SUBREV_U16_e64_vi:
3872 
3873   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3874   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3875   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3876 
3877   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3878   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3879 
3880   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3881   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3882 
3883   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3884   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3885 
3886   case AMDGPU::V_LSHRREV_B32_e32:
3887   case AMDGPU::V_LSHRREV_B32_e64:
3888   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3889   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3890   case AMDGPU::V_LSHRREV_B32_e32_vi:
3891   case AMDGPU::V_LSHRREV_B32_e64_vi:
3892   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3893   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3894 
3895   case AMDGPU::V_ASHRREV_I32_e32:
3896   case AMDGPU::V_ASHRREV_I32_e64:
3897   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3898   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3899   case AMDGPU::V_ASHRREV_I32_e32_vi:
3900   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3901   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3902   case AMDGPU::V_ASHRREV_I32_e64_vi:
3903 
3904   case AMDGPU::V_LSHLREV_B32_e32:
3905   case AMDGPU::V_LSHLREV_B32_e64:
3906   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3907   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3908   case AMDGPU::V_LSHLREV_B32_e32_vi:
3909   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3910   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3911   case AMDGPU::V_LSHLREV_B32_e64_vi:
3912 
3913   case AMDGPU::V_LSHLREV_B16_e32:
3914   case AMDGPU::V_LSHLREV_B16_e64:
3915   case AMDGPU::V_LSHLREV_B16_e32_vi:
3916   case AMDGPU::V_LSHLREV_B16_e64_vi:
3917   case AMDGPU::V_LSHLREV_B16_gfx10:
3918 
3919   case AMDGPU::V_LSHRREV_B16_e32:
3920   case AMDGPU::V_LSHRREV_B16_e64:
3921   case AMDGPU::V_LSHRREV_B16_e32_vi:
3922   case AMDGPU::V_LSHRREV_B16_e64_vi:
3923   case AMDGPU::V_LSHRREV_B16_gfx10:
3924 
3925   case AMDGPU::V_ASHRREV_I16_e32:
3926   case AMDGPU::V_ASHRREV_I16_e64:
3927   case AMDGPU::V_ASHRREV_I16_e32_vi:
3928   case AMDGPU::V_ASHRREV_I16_e64_vi:
3929   case AMDGPU::V_ASHRREV_I16_gfx10:
3930 
3931   case AMDGPU::V_LSHLREV_B64_e64:
3932   case AMDGPU::V_LSHLREV_B64_gfx10:
3933   case AMDGPU::V_LSHLREV_B64_vi:
3934 
3935   case AMDGPU::V_LSHRREV_B64_e64:
3936   case AMDGPU::V_LSHRREV_B64_gfx10:
3937   case AMDGPU::V_LSHRREV_B64_vi:
3938 
3939   case AMDGPU::V_ASHRREV_I64_e64:
3940   case AMDGPU::V_ASHRREV_I64_gfx10:
3941   case AMDGPU::V_ASHRREV_I64_vi:
3942 
3943   case AMDGPU::V_PK_LSHLREV_B16:
3944   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3945   case AMDGPU::V_PK_LSHLREV_B16_vi:
3946 
3947   case AMDGPU::V_PK_LSHRREV_B16:
3948   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3949   case AMDGPU::V_PK_LSHRREV_B16_vi:
3950   case AMDGPU::V_PK_ASHRREV_I16:
3951   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3952   case AMDGPU::V_PK_ASHRREV_I16_vi:
3953     return true;
3954   default:
3955     return false;
3956   }
3957 }
3958 
3959 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3960 
3961   using namespace SIInstrFlags;
3962   const unsigned Opcode = Inst.getOpcode();
3963   const MCInstrDesc &Desc = MII.get(Opcode);
3964 
3965   // lds_direct register is defined so that it can be used
3966   // with 9-bit operands only. Ignore encodings which do not accept these.
3967   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3968   if ((Desc.TSFlags & Enc) == 0)
3969     return None;
3970 
3971   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3972     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3973     if (SrcIdx == -1)
3974       break;
3975     const auto &Src = Inst.getOperand(SrcIdx);
3976     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3977 
3978       if (isGFX90A() || isGFX11Plus())
3979         return StringRef("lds_direct is not supported on this GPU");
3980 
3981       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3982         return StringRef("lds_direct cannot be used with this instruction");
3983 
3984       if (SrcName != OpName::src0)
3985         return StringRef("lds_direct may be used as src0 only");
3986     }
3987   }
3988 
3989   return None;
3990 }
3991 
3992 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3993   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3994     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3995     if (Op.isFlatOffset())
3996       return Op.getStartLoc();
3997   }
3998   return getLoc();
3999 }
4000 
4001 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4002                                          const OperandVector &Operands) {
4003   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4004   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4005     return true;
4006 
4007   auto Opcode = Inst.getOpcode();
4008   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4009   assert(OpNum != -1);
4010 
4011   const auto &Op = Inst.getOperand(OpNum);
4012   if (!hasFlatOffsets() && Op.getImm() != 0) {
4013     Error(getFlatOffsetLoc(Operands),
4014           "flat offset modifier is not supported on this GPU");
4015     return false;
4016   }
4017 
4018   // For FLAT segment the offset must be positive;
4019   // MSB is ignored and forced to zero.
4020   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
4021     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4022     if (!isIntN(OffsetSize, Op.getImm())) {
4023       Error(getFlatOffsetLoc(Operands),
4024             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4025       return false;
4026     }
4027   } else {
4028     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4029     if (!isUIntN(OffsetSize, Op.getImm())) {
4030       Error(getFlatOffsetLoc(Operands),
4031             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4032       return false;
4033     }
4034   }
4035 
4036   return true;
4037 }
4038 
4039 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4040   // Start with second operand because SMEM Offset cannot be dst or src0.
4041   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4042     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4043     if (Op.isSMEMOffset())
4044       return Op.getStartLoc();
4045   }
4046   return getLoc();
4047 }
4048 
4049 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4050                                          const OperandVector &Operands) {
4051   if (isCI() || isSI())
4052     return true;
4053 
4054   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4055   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4056     return true;
4057 
4058   auto Opcode = Inst.getOpcode();
4059   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4060   if (OpNum == -1)
4061     return true;
4062 
4063   const auto &Op = Inst.getOperand(OpNum);
4064   if (!Op.isImm())
4065     return true;
4066 
4067   uint64_t Offset = Op.getImm();
4068   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4069   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4070       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4071     return true;
4072 
4073   Error(getSMEMOffsetLoc(Operands),
4074         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4075                                "expected a 21-bit signed offset");
4076 
4077   return false;
4078 }
4079 
4080 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4081   unsigned Opcode = Inst.getOpcode();
4082   const MCInstrDesc &Desc = MII.get(Opcode);
4083   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4084     return true;
4085 
4086   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4087   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4088 
4089   const int OpIndices[] = { Src0Idx, Src1Idx };
4090 
4091   unsigned NumExprs = 0;
4092   unsigned NumLiterals = 0;
4093   uint32_t LiteralValue;
4094 
4095   for (int OpIdx : OpIndices) {
4096     if (OpIdx == -1) break;
4097 
4098     const MCOperand &MO = Inst.getOperand(OpIdx);
4099     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4100     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4101       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4102         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4103         if (NumLiterals == 0 || LiteralValue != Value) {
4104           LiteralValue = Value;
4105           ++NumLiterals;
4106         }
4107       } else if (MO.isExpr()) {
4108         ++NumExprs;
4109       }
4110     }
4111   }
4112 
4113   return NumLiterals + NumExprs <= 1;
4114 }
4115 
4116 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4117   const unsigned Opc = Inst.getOpcode();
4118   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4119       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4120     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4121     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4122 
4123     if (OpSel & ~3)
4124       return false;
4125   }
4126 
4127   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4128     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4129     if (OpSelIdx != -1) {
4130       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4131         return false;
4132     }
4133     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4134     if (OpSelHiIdx != -1) {
4135       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4136         return false;
4137     }
4138   }
4139 
4140   return true;
4141 }
4142 
4143 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4144                                   const OperandVector &Operands) {
4145   const unsigned Opc = Inst.getOpcode();
4146   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4147   if (DppCtrlIdx < 0)
4148     return true;
4149   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4150 
4151   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4152     // DPP64 is supported for row_newbcast only.
4153     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4154     if (Src0Idx >= 0 &&
4155         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4156       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4157       Error(S, "64 bit dpp only supports row_newbcast");
4158       return false;
4159     }
4160   }
4161 
4162   return true;
4163 }
4164 
4165 // Check if VCC register matches wavefront size
4166 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4167   auto FB = getFeatureBits();
4168   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4169     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4170 }
4171 
4172 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4173 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4174                                          const OperandVector &Operands) {
4175   unsigned Opcode = Inst.getOpcode();
4176   const MCInstrDesc &Desc = MII.get(Opcode);
4177   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4178   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4179       ImmIdx == -1)
4180     return true;
4181 
4182   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4183   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4184   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4185 
4186   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4187 
4188   unsigned NumExprs = 0;
4189   unsigned NumLiterals = 0;
4190   uint32_t LiteralValue;
4191 
4192   for (int OpIdx : OpIndices) {
4193     if (OpIdx == -1)
4194       continue;
4195 
4196     const MCOperand &MO = Inst.getOperand(OpIdx);
4197     if (!MO.isImm() && !MO.isExpr())
4198       continue;
4199     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4200       continue;
4201 
4202     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4203         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4204       Error(getConstLoc(Operands),
4205             "inline constants are not allowed for this operand");
4206       return false;
4207     }
4208 
4209     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4210       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4211       if (NumLiterals == 0 || LiteralValue != Value) {
4212         LiteralValue = Value;
4213         ++NumLiterals;
4214       }
4215     } else if (MO.isExpr()) {
4216       ++NumExprs;
4217     }
4218   }
4219   NumLiterals += NumExprs;
4220 
4221   if (!NumLiterals)
4222     return true;
4223 
4224   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4225     Error(getLitLoc(Operands), "literal operands are not supported");
4226     return false;
4227   }
4228 
4229   if (NumLiterals > 1) {
4230     Error(getLitLoc(Operands), "only one literal operand is allowed");
4231     return false;
4232   }
4233 
4234   return true;
4235 }
4236 
4237 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4238 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4239                          const MCRegisterInfo *MRI) {
4240   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4241   if (OpIdx < 0)
4242     return -1;
4243 
4244   const MCOperand &Op = Inst.getOperand(OpIdx);
4245   if (!Op.isReg())
4246     return -1;
4247 
4248   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4249   auto Reg = Sub ? Sub : Op.getReg();
4250   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4251   return AGPR32.contains(Reg) ? 1 : 0;
4252 }
4253 
4254 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4255   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4256   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4257                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4258                   SIInstrFlags::DS)) == 0)
4259     return true;
4260 
4261   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4262                                                       : AMDGPU::OpName::vdata;
4263 
4264   const MCRegisterInfo *MRI = getMRI();
4265   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4266   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4267 
4268   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4269     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4270     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4271       return false;
4272   }
4273 
4274   auto FB = getFeatureBits();
4275   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4276     if (DataAreg < 0 || DstAreg < 0)
4277       return true;
4278     return DstAreg == DataAreg;
4279   }
4280 
4281   return DstAreg < 1 && DataAreg < 1;
4282 }
4283 
4284 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4285   auto FB = getFeatureBits();
4286   if (!FB[AMDGPU::FeatureGFX90AInsts])
4287     return true;
4288 
4289   const MCRegisterInfo *MRI = getMRI();
4290   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4291   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4292   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4293     const MCOperand &Op = Inst.getOperand(I);
4294     if (!Op.isReg())
4295       continue;
4296 
4297     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4298     if (!Sub)
4299       continue;
4300 
4301     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4302       return false;
4303     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4304       return false;
4305   }
4306 
4307   return true;
4308 }
4309 
4310 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4311   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4312     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4313     if (Op.isBLGP())
4314       return Op.getStartLoc();
4315   }
4316   return SMLoc();
4317 }
4318 
4319 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4320                                    const OperandVector &Operands) {
4321   unsigned Opc = Inst.getOpcode();
4322   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4323   if (BlgpIdx == -1)
4324     return true;
4325   SMLoc BLGPLoc = getBLGPLoc(Operands);
4326   if (!BLGPLoc.isValid())
4327     return true;
4328   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4329   auto FB = getFeatureBits();
4330   bool UsesNeg = false;
4331   if (FB[AMDGPU::FeatureGFX940Insts]) {
4332     switch (Opc) {
4333     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4334     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4335     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4336     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4337       UsesNeg = true;
4338     }
4339   }
4340 
4341   if (IsNeg == UsesNeg)
4342     return true;
4343 
4344   Error(BLGPLoc,
4345         UsesNeg ? "invalid modifier: blgp is not supported"
4346                 : "invalid modifier: neg is not supported");
4347 
4348   return false;
4349 }
4350 
4351 // gfx90a has an undocumented limitation:
4352 // DS_GWS opcodes must use even aligned registers.
4353 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4354                                   const OperandVector &Operands) {
4355   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4356     return true;
4357 
4358   int Opc = Inst.getOpcode();
4359   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4360       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4361     return true;
4362 
4363   const MCRegisterInfo *MRI = getMRI();
4364   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4365   int Data0Pos =
4366       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4367   assert(Data0Pos != -1);
4368   auto Reg = Inst.getOperand(Data0Pos).getReg();
4369   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4370   if (RegIdx & 1) {
4371     SMLoc RegLoc = getRegLoc(Reg, Operands);
4372     Error(RegLoc, "vgpr must be even aligned");
4373     return false;
4374   }
4375 
4376   return true;
4377 }
4378 
4379 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4380                                             const OperandVector &Operands,
4381                                             const SMLoc &IDLoc) {
4382   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4383                                            AMDGPU::OpName::cpol);
4384   if (CPolPos == -1)
4385     return true;
4386 
4387   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4388 
4389   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4390   if (TSFlags & SIInstrFlags::SMRD) {
4391     if (CPol && (isSI() || isCI())) {
4392       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4393       Error(S, "cache policy is not supported for SMRD instructions");
4394       return false;
4395     }
4396     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4397       Error(IDLoc, "invalid cache policy for SMEM instruction");
4398       return false;
4399     }
4400   }
4401 
4402   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4403     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4404     StringRef CStr(S.getPointer());
4405     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4406     Error(S, "scc is not supported on this GPU");
4407     return false;
4408   }
4409 
4410   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4411     return true;
4412 
4413   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4414     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4415       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4416                               : "instruction must use glc");
4417       return false;
4418     }
4419   } else {
4420     if (CPol & CPol::GLC) {
4421       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4422       StringRef CStr(S.getPointer());
4423       S = SMLoc::getFromPointer(
4424           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4425       Error(S, isGFX940() ? "instruction must not use sc0"
4426                           : "instruction must not use glc");
4427       return false;
4428     }
4429   }
4430 
4431   return true;
4432 }
4433 
4434 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst,
4435                                          const OperandVector &Operands,
4436                                          const SMLoc &IDLoc) {
4437   if (isGFX940())
4438     return true;
4439 
4440   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4441   if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) !=
4442       (SIInstrFlags::VALU | SIInstrFlags::FLAT))
4443     return true;
4444   // This is FLAT LDS DMA.
4445 
4446   SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands);
4447   StringRef CStr(S.getPointer());
4448   if (!CStr.startswith("lds")) {
4449     // This is incorrectly selected LDS DMA version of a FLAT load opcode.
4450     // And LDS version should have 'lds' modifier, but it follows optional
4451     // operands so its absense is ignored by the matcher.
4452     Error(IDLoc, "invalid operands for instruction");
4453     return false;
4454   }
4455 
4456   return true;
4457 }
4458 
4459 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4460                                           const SMLoc &IDLoc,
4461                                           const OperandVector &Operands) {
4462   if (auto ErrMsg = validateLdsDirect(Inst)) {
4463     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4464     return false;
4465   }
4466   if (!validateSOPLiteral(Inst)) {
4467     Error(getLitLoc(Operands),
4468       "only one literal operand is allowed");
4469     return false;
4470   }
4471   if (!validateVOPLiteral(Inst, Operands)) {
4472     return false;
4473   }
4474   if (!validateConstantBusLimitations(Inst, Operands)) {
4475     return false;
4476   }
4477   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4478     return false;
4479   }
4480   if (!validateIntClampSupported(Inst)) {
4481     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4482       "integer clamping is not supported on this GPU");
4483     return false;
4484   }
4485   if (!validateOpSel(Inst)) {
4486     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4487       "invalid op_sel operand");
4488     return false;
4489   }
4490   if (!validateDPP(Inst, Operands)) {
4491     return false;
4492   }
4493   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4494   if (!validateMIMGD16(Inst)) {
4495     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4496       "d16 modifier is not supported on this GPU");
4497     return false;
4498   }
4499   if (!validateMIMGDim(Inst)) {
4500     Error(IDLoc, "dim modifier is required on this GPU");
4501     return false;
4502   }
4503   if (!validateMIMGMSAA(Inst)) {
4504     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4505           "invalid dim; must be MSAA type");
4506     return false;
4507   }
4508   if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4509     Error(IDLoc, *ErrMsg);
4510     return false;
4511   }
4512   if (!validateMIMGAddrSize(Inst)) {
4513     Error(IDLoc,
4514       "image address size does not match dim and a16");
4515     return false;
4516   }
4517   if (!validateMIMGAtomicDMask(Inst)) {
4518     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4519       "invalid atomic image dmask");
4520     return false;
4521   }
4522   if (!validateMIMGGatherDMask(Inst)) {
4523     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4524       "invalid image_gather dmask: only one bit must be set");
4525     return false;
4526   }
4527   if (!validateMovrels(Inst, Operands)) {
4528     return false;
4529   }
4530   if (!validateFlatOffset(Inst, Operands)) {
4531     return false;
4532   }
4533   if (!validateSMEMOffset(Inst, Operands)) {
4534     return false;
4535   }
4536   if (!validateMAIAccWrite(Inst, Operands)) {
4537     return false;
4538   }
4539   if (!validateMFMA(Inst, Operands)) {
4540     return false;
4541   }
4542   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4543     return false;
4544   }
4545 
4546   if (!validateAGPRLdSt(Inst)) {
4547     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4548     ? "invalid register class: data and dst should be all VGPR or AGPR"
4549     : "invalid register class: agpr loads and stores not supported on this GPU"
4550     );
4551     return false;
4552   }
4553   if (!validateVGPRAlign(Inst)) {
4554     Error(IDLoc,
4555       "invalid register class: vgpr tuples must be 64 bit aligned");
4556     return false;
4557   }
4558   if (!validateGWS(Inst, Operands)) {
4559     return false;
4560   }
4561 
4562   if (!validateBLGP(Inst, Operands)) {
4563     return false;
4564   }
4565 
4566   if (!validateDivScale(Inst)) {
4567     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4568     return false;
4569   }
4570   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4571     return false;
4572   }
4573 
4574   if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) {
4575     return false;
4576   }
4577 
4578   return true;
4579 }
4580 
4581 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4582                                             const FeatureBitset &FBS,
4583                                             unsigned VariantID = 0);
4584 
4585 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4586                                 const FeatureBitset &AvailableFeatures,
4587                                 unsigned VariantID);
4588 
4589 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4590                                        const FeatureBitset &FBS) {
4591   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4592 }
4593 
4594 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4595                                        const FeatureBitset &FBS,
4596                                        ArrayRef<unsigned> Variants) {
4597   for (auto Variant : Variants) {
4598     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4599       return true;
4600   }
4601 
4602   return false;
4603 }
4604 
4605 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4606                                                   const SMLoc &IDLoc) {
4607   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4608 
4609   // Check if requested instruction variant is supported.
4610   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4611     return false;
4612 
4613   // This instruction is not supported.
4614   // Clear any other pending errors because they are no longer relevant.
4615   getParser().clearPendingErrors();
4616 
4617   // Requested instruction variant is not supported.
4618   // Check if any other variants are supported.
4619   StringRef VariantName = getMatchedVariantName();
4620   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4621     return Error(IDLoc,
4622                  Twine(VariantName,
4623                        " variant of this instruction is not supported"));
4624   }
4625 
4626   // Finally check if this instruction is supported on any other GPU.
4627   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4628     return Error(IDLoc, "instruction not supported on this GPU");
4629   }
4630 
4631   // Instruction not supported on any GPU. Probably a typo.
4632   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4633   return Error(IDLoc, "invalid instruction" + Suggestion);
4634 }
4635 
4636 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4637                                               OperandVector &Operands,
4638                                               MCStreamer &Out,
4639                                               uint64_t &ErrorInfo,
4640                                               bool MatchingInlineAsm) {
4641   MCInst Inst;
4642   unsigned Result = Match_Success;
4643   for (auto Variant : getMatchedVariants()) {
4644     uint64_t EI;
4645     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4646                                   Variant);
4647     // We order match statuses from least to most specific. We use most specific
4648     // status as resulting
4649     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4650     if ((R == Match_Success) ||
4651         (R == Match_PreferE32) ||
4652         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4653         (R == Match_InvalidOperand && Result != Match_MissingFeature
4654                                    && Result != Match_PreferE32) ||
4655         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4656                                    && Result != Match_MissingFeature
4657                                    && Result != Match_PreferE32)) {
4658       Result = R;
4659       ErrorInfo = EI;
4660     }
4661     if (R == Match_Success)
4662       break;
4663   }
4664 
4665   if (Result == Match_Success) {
4666     if (!validateInstruction(Inst, IDLoc, Operands)) {
4667       return true;
4668     }
4669     Inst.setLoc(IDLoc);
4670     Out.emitInstruction(Inst, getSTI());
4671     return false;
4672   }
4673 
4674   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4675   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4676     return true;
4677   }
4678 
4679   switch (Result) {
4680   default: break;
4681   case Match_MissingFeature:
4682     // It has been verified that the specified instruction
4683     // mnemonic is valid. A match was found but it requires
4684     // features which are not supported on this GPU.
4685     return Error(IDLoc, "operands are not valid for this GPU or mode");
4686 
4687   case Match_InvalidOperand: {
4688     SMLoc ErrorLoc = IDLoc;
4689     if (ErrorInfo != ~0ULL) {
4690       if (ErrorInfo >= Operands.size()) {
4691         return Error(IDLoc, "too few operands for instruction");
4692       }
4693       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4694       if (ErrorLoc == SMLoc())
4695         ErrorLoc = IDLoc;
4696     }
4697     return Error(ErrorLoc, "invalid operand for instruction");
4698   }
4699 
4700   case Match_PreferE32:
4701     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4702                         "should be encoded as e32");
4703   case Match_MnemonicFail:
4704     llvm_unreachable("Invalid instructions should have been handled already");
4705   }
4706   llvm_unreachable("Implement any new match types added!");
4707 }
4708 
4709 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4710   int64_t Tmp = -1;
4711   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4712     return true;
4713   }
4714   if (getParser().parseAbsoluteExpression(Tmp)) {
4715     return true;
4716   }
4717   Ret = static_cast<uint32_t>(Tmp);
4718   return false;
4719 }
4720 
4721 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4722                                                uint32_t &Minor) {
4723   if (ParseAsAbsoluteExpression(Major))
4724     return TokError("invalid major version");
4725 
4726   if (!trySkipToken(AsmToken::Comma))
4727     return TokError("minor version number required, comma expected");
4728 
4729   if (ParseAsAbsoluteExpression(Minor))
4730     return TokError("invalid minor version");
4731 
4732   return false;
4733 }
4734 
4735 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4736   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4737     return TokError("directive only supported for amdgcn architecture");
4738 
4739   std::string TargetIDDirective;
4740   SMLoc TargetStart = getTok().getLoc();
4741   if (getParser().parseEscapedString(TargetIDDirective))
4742     return true;
4743 
4744   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4745   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4746     return getParser().Error(TargetRange.Start,
4747         (Twine(".amdgcn_target directive's target id ") +
4748          Twine(TargetIDDirective) +
4749          Twine(" does not match the specified target id ") +
4750          Twine(getTargetStreamer().getTargetID()->toString())).str());
4751 
4752   return false;
4753 }
4754 
4755 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4756   return Error(Range.Start, "value out of range", Range);
4757 }
4758 
4759 bool AMDGPUAsmParser::calculateGPRBlocks(
4760     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4761     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4762     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4763     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4764   // TODO(scott.linder): These calculations are duplicated from
4765   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4766   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4767 
4768   unsigned NumVGPRs = NextFreeVGPR;
4769   unsigned NumSGPRs = NextFreeSGPR;
4770 
4771   if (Version.Major >= 10)
4772     NumSGPRs = 0;
4773   else {
4774     unsigned MaxAddressableNumSGPRs =
4775         IsaInfo::getAddressableNumSGPRs(&getSTI());
4776 
4777     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4778         NumSGPRs > MaxAddressableNumSGPRs)
4779       return OutOfRangeError(SGPRRange);
4780 
4781     NumSGPRs +=
4782         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4783 
4784     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4785         NumSGPRs > MaxAddressableNumSGPRs)
4786       return OutOfRangeError(SGPRRange);
4787 
4788     if (Features.test(FeatureSGPRInitBug))
4789       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4790   }
4791 
4792   VGPRBlocks =
4793       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4794   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4795 
4796   return false;
4797 }
4798 
4799 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4800   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4801     return TokError("directive only supported for amdgcn architecture");
4802 
4803   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4804     return TokError("directive only supported for amdhsa OS");
4805 
4806   StringRef KernelName;
4807   if (getParser().parseIdentifier(KernelName))
4808     return true;
4809 
4810   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4811 
4812   StringSet<> Seen;
4813 
4814   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4815 
4816   SMRange VGPRRange;
4817   uint64_t NextFreeVGPR = 0;
4818   uint64_t AccumOffset = 0;
4819   uint64_t SharedVGPRCount = 0;
4820   SMRange SGPRRange;
4821   uint64_t NextFreeSGPR = 0;
4822 
4823   // Count the number of user SGPRs implied from the enabled feature bits.
4824   unsigned ImpliedUserSGPRCount = 0;
4825 
4826   // Track if the asm explicitly contains the directive for the user SGPR
4827   // count.
4828   Optional<unsigned> ExplicitUserSGPRCount;
4829   bool ReserveVCC = true;
4830   bool ReserveFlatScr = true;
4831   Optional<bool> EnableWavefrontSize32;
4832 
4833   while (true) {
4834     while (trySkipToken(AsmToken::EndOfStatement));
4835 
4836     StringRef ID;
4837     SMRange IDRange = getTok().getLocRange();
4838     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4839       return true;
4840 
4841     if (ID == ".end_amdhsa_kernel")
4842       break;
4843 
4844     if (Seen.find(ID) != Seen.end())
4845       return TokError(".amdhsa_ directives cannot be repeated");
4846     Seen.insert(ID);
4847 
4848     SMLoc ValStart = getLoc();
4849     int64_t IVal;
4850     if (getParser().parseAbsoluteExpression(IVal))
4851       return true;
4852     SMLoc ValEnd = getLoc();
4853     SMRange ValRange = SMRange(ValStart, ValEnd);
4854 
4855     if (IVal < 0)
4856       return OutOfRangeError(ValRange);
4857 
4858     uint64_t Val = IVal;
4859 
4860 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4861   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4862     return OutOfRangeError(RANGE);                                             \
4863   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4864 
4865     if (ID == ".amdhsa_group_segment_fixed_size") {
4866       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4867         return OutOfRangeError(ValRange);
4868       KD.group_segment_fixed_size = Val;
4869     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4870       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4871         return OutOfRangeError(ValRange);
4872       KD.private_segment_fixed_size = Val;
4873     } else if (ID == ".amdhsa_kernarg_size") {
4874       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4875         return OutOfRangeError(ValRange);
4876       KD.kernarg_size = Val;
4877     } else if (ID == ".amdhsa_user_sgpr_count") {
4878       ExplicitUserSGPRCount = Val;
4879     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4880       if (hasArchitectedFlatScratch())
4881         return Error(IDRange.Start,
4882                      "directive is not supported with architected flat scratch",
4883                      IDRange);
4884       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4885                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4886                        Val, ValRange);
4887       if (Val)
4888         ImpliedUserSGPRCount += 4;
4889     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4890       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4891                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4892                        ValRange);
4893       if (Val)
4894         ImpliedUserSGPRCount += 2;
4895     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4896       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4897                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4898                        ValRange);
4899       if (Val)
4900         ImpliedUserSGPRCount += 2;
4901     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4902       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4903                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4904                        Val, ValRange);
4905       if (Val)
4906         ImpliedUserSGPRCount += 2;
4907     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4908       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4909                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4910                        ValRange);
4911       if (Val)
4912         ImpliedUserSGPRCount += 2;
4913     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4914       if (hasArchitectedFlatScratch())
4915         return Error(IDRange.Start,
4916                      "directive is not supported with architected flat scratch",
4917                      IDRange);
4918       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4919                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4920                        ValRange);
4921       if (Val)
4922         ImpliedUserSGPRCount += 2;
4923     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4924       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4925                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4926                        Val, ValRange);
4927       if (Val)
4928         ImpliedUserSGPRCount += 1;
4929     } else if (ID == ".amdhsa_wavefront_size32") {
4930       if (IVersion.Major < 10)
4931         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4932       EnableWavefrontSize32 = Val;
4933       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4934                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4935                        Val, ValRange);
4936     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4937       if (hasArchitectedFlatScratch())
4938         return Error(IDRange.Start,
4939                      "directive is not supported with architected flat scratch",
4940                      IDRange);
4941       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4942                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4943     } else if (ID == ".amdhsa_enable_private_segment") {
4944       if (!hasArchitectedFlatScratch())
4945         return Error(
4946             IDRange.Start,
4947             "directive is not supported without architected flat scratch",
4948             IDRange);
4949       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4950                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4951     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4952       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4953                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4954                        ValRange);
4955     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4956       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4957                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4958                        ValRange);
4959     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4960       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4961                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4962                        ValRange);
4963     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4964       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4965                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4966                        ValRange);
4967     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4968       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4969                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4970                        ValRange);
4971     } else if (ID == ".amdhsa_next_free_vgpr") {
4972       VGPRRange = ValRange;
4973       NextFreeVGPR = Val;
4974     } else if (ID == ".amdhsa_next_free_sgpr") {
4975       SGPRRange = ValRange;
4976       NextFreeSGPR = Val;
4977     } else if (ID == ".amdhsa_accum_offset") {
4978       if (!isGFX90A())
4979         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4980       AccumOffset = Val;
4981     } else if (ID == ".amdhsa_reserve_vcc") {
4982       if (!isUInt<1>(Val))
4983         return OutOfRangeError(ValRange);
4984       ReserveVCC = Val;
4985     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4986       if (IVersion.Major < 7)
4987         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4988       if (hasArchitectedFlatScratch())
4989         return Error(IDRange.Start,
4990                      "directive is not supported with architected flat scratch",
4991                      IDRange);
4992       if (!isUInt<1>(Val))
4993         return OutOfRangeError(ValRange);
4994       ReserveFlatScr = Val;
4995     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4996       if (IVersion.Major < 8)
4997         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4998       if (!isUInt<1>(Val))
4999         return OutOfRangeError(ValRange);
5000       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5001         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5002                                  IDRange);
5003     } else if (ID == ".amdhsa_float_round_mode_32") {
5004       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5005                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5006     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5007       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5008                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5009     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5010       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5011                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5012     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5013       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5014                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5015                        ValRange);
5016     } else if (ID == ".amdhsa_dx10_clamp") {
5017       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5018                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5019     } else if (ID == ".amdhsa_ieee_mode") {
5020       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5021                        Val, ValRange);
5022     } else if (ID == ".amdhsa_fp16_overflow") {
5023       if (IVersion.Major < 9)
5024         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5025       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5026                        ValRange);
5027     } else if (ID == ".amdhsa_tg_split") {
5028       if (!isGFX90A())
5029         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5030       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5031                        ValRange);
5032     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5033       if (IVersion.Major < 10)
5034         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5035       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5036                        ValRange);
5037     } else if (ID == ".amdhsa_memory_ordered") {
5038       if (IVersion.Major < 10)
5039         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5040       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5041                        ValRange);
5042     } else if (ID == ".amdhsa_forward_progress") {
5043       if (IVersion.Major < 10)
5044         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5045       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5046                        ValRange);
5047     } else if (ID == ".amdhsa_shared_vgpr_count") {
5048       if (IVersion.Major < 10)
5049         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5050       SharedVGPRCount = Val;
5051       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5052                        COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
5053                        ValRange);
5054     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5055       PARSE_BITS_ENTRY(
5056           KD.compute_pgm_rsrc2,
5057           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5058           ValRange);
5059     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5060       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5061                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5062                        Val, ValRange);
5063     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5064       PARSE_BITS_ENTRY(
5065           KD.compute_pgm_rsrc2,
5066           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5067           ValRange);
5068     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5069       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5070                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5071                        Val, ValRange);
5072     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5073       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5074                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5075                        Val, ValRange);
5076     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5077       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5078                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5079                        Val, ValRange);
5080     } else if (ID == ".amdhsa_exception_int_div_zero") {
5081       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5082                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5083                        Val, ValRange);
5084     } else {
5085       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5086     }
5087 
5088 #undef PARSE_BITS_ENTRY
5089   }
5090 
5091   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5092     return TokError(".amdhsa_next_free_vgpr directive is required");
5093 
5094   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5095     return TokError(".amdhsa_next_free_sgpr directive is required");
5096 
5097   unsigned VGPRBlocks;
5098   unsigned SGPRBlocks;
5099   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5100                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5101                          EnableWavefrontSize32, NextFreeVGPR,
5102                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5103                          SGPRBlocks))
5104     return true;
5105 
5106   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5107           VGPRBlocks))
5108     return OutOfRangeError(VGPRRange);
5109   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5110                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5111 
5112   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5113           SGPRBlocks))
5114     return OutOfRangeError(SGPRRange);
5115   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5116                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5117                   SGPRBlocks);
5118 
5119   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5120     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5121                     "enabled user SGPRs");
5122 
5123   unsigned UserSGPRCount =
5124       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5125 
5126   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5127     return TokError("too many user SGPRs enabled");
5128   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5129                   UserSGPRCount);
5130 
5131   if (isGFX90A()) {
5132     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5133       return TokError(".amdhsa_accum_offset directive is required");
5134     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5135       return TokError("accum_offset should be in range [4..256] in "
5136                       "increments of 4");
5137     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5138       return TokError("accum_offset exceeds total VGPR allocation");
5139     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5140                     (AccumOffset / 4 - 1));
5141   }
5142 
5143   if (IVersion.Major == 10) {
5144     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5145     if (SharedVGPRCount && EnableWavefrontSize32) {
5146       return TokError("shared_vgpr_count directive not valid on "
5147                       "wavefront size 32");
5148     }
5149     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5150       return TokError("shared_vgpr_count*2 + "
5151                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5152                       "exceed 63\n");
5153     }
5154   }
5155 
5156   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5157       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5158       ReserveFlatScr);
5159   return false;
5160 }
5161 
5162 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5163   uint32_t Major;
5164   uint32_t Minor;
5165 
5166   if (ParseDirectiveMajorMinor(Major, Minor))
5167     return true;
5168 
5169   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5170   return false;
5171 }
5172 
5173 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5174   uint32_t Major;
5175   uint32_t Minor;
5176   uint32_t Stepping;
5177   StringRef VendorName;
5178   StringRef ArchName;
5179 
5180   // If this directive has no arguments, then use the ISA version for the
5181   // targeted GPU.
5182   if (isToken(AsmToken::EndOfStatement)) {
5183     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5184     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5185                                                         ISA.Stepping,
5186                                                         "AMD", "AMDGPU");
5187     return false;
5188   }
5189 
5190   if (ParseDirectiveMajorMinor(Major, Minor))
5191     return true;
5192 
5193   if (!trySkipToken(AsmToken::Comma))
5194     return TokError("stepping version number required, comma expected");
5195 
5196   if (ParseAsAbsoluteExpression(Stepping))
5197     return TokError("invalid stepping version");
5198 
5199   if (!trySkipToken(AsmToken::Comma))
5200     return TokError("vendor name required, comma expected");
5201 
5202   if (!parseString(VendorName, "invalid vendor name"))
5203     return true;
5204 
5205   if (!trySkipToken(AsmToken::Comma))
5206     return TokError("arch name required, comma expected");
5207 
5208   if (!parseString(ArchName, "invalid arch name"))
5209     return true;
5210 
5211   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5212                                                       VendorName, ArchName);
5213   return false;
5214 }
5215 
5216 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5217                                                amd_kernel_code_t &Header) {
5218   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5219   // assembly for backwards compatibility.
5220   if (ID == "max_scratch_backing_memory_byte_size") {
5221     Parser.eatToEndOfStatement();
5222     return false;
5223   }
5224 
5225   SmallString<40> ErrStr;
5226   raw_svector_ostream Err(ErrStr);
5227   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5228     return TokError(Err.str());
5229   }
5230   Lex();
5231 
5232   if (ID == "enable_wavefront_size32") {
5233     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5234       if (!isGFX10Plus())
5235         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5236       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5237         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5238     } else {
5239       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5240         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5241     }
5242   }
5243 
5244   if (ID == "wavefront_size") {
5245     if (Header.wavefront_size == 5) {
5246       if (!isGFX10Plus())
5247         return TokError("wavefront_size=5 is only allowed on GFX10+");
5248       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5249         return TokError("wavefront_size=5 requires +WavefrontSize32");
5250     } else if (Header.wavefront_size == 6) {
5251       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5252         return TokError("wavefront_size=6 requires +WavefrontSize64");
5253     }
5254   }
5255 
5256   if (ID == "enable_wgp_mode") {
5257     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5258         !isGFX10Plus())
5259       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5260   }
5261 
5262   if (ID == "enable_mem_ordered") {
5263     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5264         !isGFX10Plus())
5265       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5266   }
5267 
5268   if (ID == "enable_fwd_progress") {
5269     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5270         !isGFX10Plus())
5271       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5272   }
5273 
5274   return false;
5275 }
5276 
5277 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5278   amd_kernel_code_t Header;
5279   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5280 
5281   while (true) {
5282     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5283     // will set the current token to EndOfStatement.
5284     while(trySkipToken(AsmToken::EndOfStatement));
5285 
5286     StringRef ID;
5287     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5288       return true;
5289 
5290     if (ID == ".end_amd_kernel_code_t")
5291       break;
5292 
5293     if (ParseAMDKernelCodeTValue(ID, Header))
5294       return true;
5295   }
5296 
5297   getTargetStreamer().EmitAMDKernelCodeT(Header);
5298 
5299   return false;
5300 }
5301 
5302 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5303   StringRef KernelName;
5304   if (!parseId(KernelName, "expected symbol name"))
5305     return true;
5306 
5307   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5308                                            ELF::STT_AMDGPU_HSA_KERNEL);
5309 
5310   KernelScope.initialize(getContext());
5311   return false;
5312 }
5313 
5314 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5315   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5316     return Error(getLoc(),
5317                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5318                  "architectures");
5319   }
5320 
5321   auto TargetIDDirective = getLexer().getTok().getStringContents();
5322   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5323     return Error(getParser().getTok().getLoc(), "target id must match options");
5324 
5325   getTargetStreamer().EmitISAVersion();
5326   Lex();
5327 
5328   return false;
5329 }
5330 
5331 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5332   const char *AssemblerDirectiveBegin;
5333   const char *AssemblerDirectiveEnd;
5334   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5335       isHsaAbiVersion3AndAbove(&getSTI())
5336           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5337                             HSAMD::V3::AssemblerDirectiveEnd)
5338           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5339                             HSAMD::AssemblerDirectiveEnd);
5340 
5341   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5342     return Error(getLoc(),
5343                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5344                  "not available on non-amdhsa OSes")).str());
5345   }
5346 
5347   std::string HSAMetadataString;
5348   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5349                           HSAMetadataString))
5350     return true;
5351 
5352   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5353     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5354       return Error(getLoc(), "invalid HSA metadata");
5355   } else {
5356     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5357       return Error(getLoc(), "invalid HSA metadata");
5358   }
5359 
5360   return false;
5361 }
5362 
5363 /// Common code to parse out a block of text (typically YAML) between start and
5364 /// end directives.
5365 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5366                                           const char *AssemblerDirectiveEnd,
5367                                           std::string &CollectString) {
5368 
5369   raw_string_ostream CollectStream(CollectString);
5370 
5371   getLexer().setSkipSpace(false);
5372 
5373   bool FoundEnd = false;
5374   while (!isToken(AsmToken::Eof)) {
5375     while (isToken(AsmToken::Space)) {
5376       CollectStream << getTokenStr();
5377       Lex();
5378     }
5379 
5380     if (trySkipId(AssemblerDirectiveEnd)) {
5381       FoundEnd = true;
5382       break;
5383     }
5384 
5385     CollectStream << Parser.parseStringToEndOfStatement()
5386                   << getContext().getAsmInfo()->getSeparatorString();
5387 
5388     Parser.eatToEndOfStatement();
5389   }
5390 
5391   getLexer().setSkipSpace(true);
5392 
5393   if (isToken(AsmToken::Eof) && !FoundEnd) {
5394     return TokError(Twine("expected directive ") +
5395                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5396   }
5397 
5398   CollectStream.flush();
5399   return false;
5400 }
5401 
5402 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5403 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5404   std::string String;
5405   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5406                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5407     return true;
5408 
5409   auto PALMetadata = getTargetStreamer().getPALMetadata();
5410   if (!PALMetadata->setFromString(String))
5411     return Error(getLoc(), "invalid PAL metadata");
5412   return false;
5413 }
5414 
5415 /// Parse the assembler directive for old linear-format PAL metadata.
5416 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5417   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5418     return Error(getLoc(),
5419                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5420                  "not available on non-amdpal OSes")).str());
5421   }
5422 
5423   auto PALMetadata = getTargetStreamer().getPALMetadata();
5424   PALMetadata->setLegacy();
5425   for (;;) {
5426     uint32_t Key, Value;
5427     if (ParseAsAbsoluteExpression(Key)) {
5428       return TokError(Twine("invalid value in ") +
5429                       Twine(PALMD::AssemblerDirective));
5430     }
5431     if (!trySkipToken(AsmToken::Comma)) {
5432       return TokError(Twine("expected an even number of values in ") +
5433                       Twine(PALMD::AssemblerDirective));
5434     }
5435     if (ParseAsAbsoluteExpression(Value)) {
5436       return TokError(Twine("invalid value in ") +
5437                       Twine(PALMD::AssemblerDirective));
5438     }
5439     PALMetadata->setRegister(Key, Value);
5440     if (!trySkipToken(AsmToken::Comma))
5441       break;
5442   }
5443   return false;
5444 }
5445 
5446 /// ParseDirectiveAMDGPULDS
5447 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5448 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5449   if (getParser().checkForValidSection())
5450     return true;
5451 
5452   StringRef Name;
5453   SMLoc NameLoc = getLoc();
5454   if (getParser().parseIdentifier(Name))
5455     return TokError("expected identifier in directive");
5456 
5457   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5458   if (parseToken(AsmToken::Comma, "expected ','"))
5459     return true;
5460 
5461   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5462 
5463   int64_t Size;
5464   SMLoc SizeLoc = getLoc();
5465   if (getParser().parseAbsoluteExpression(Size))
5466     return true;
5467   if (Size < 0)
5468     return Error(SizeLoc, "size must be non-negative");
5469   if (Size > LocalMemorySize)
5470     return Error(SizeLoc, "size is too large");
5471 
5472   int64_t Alignment = 4;
5473   if (trySkipToken(AsmToken::Comma)) {
5474     SMLoc AlignLoc = getLoc();
5475     if (getParser().parseAbsoluteExpression(Alignment))
5476       return true;
5477     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5478       return Error(AlignLoc, "alignment must be a power of two");
5479 
5480     // Alignment larger than the size of LDS is possible in theory, as long
5481     // as the linker manages to place to symbol at address 0, but we do want
5482     // to make sure the alignment fits nicely into a 32-bit integer.
5483     if (Alignment >= 1u << 31)
5484       return Error(AlignLoc, "alignment is too large");
5485   }
5486 
5487   if (parseToken(AsmToken::EndOfStatement,
5488                  "unexpected token in '.amdgpu_lds' directive"))
5489     return true;
5490 
5491   Symbol->redefineIfPossible();
5492   if (!Symbol->isUndefined())
5493     return Error(NameLoc, "invalid symbol redefinition");
5494 
5495   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5496   return false;
5497 }
5498 
5499 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5500   StringRef IDVal = DirectiveID.getString();
5501 
5502   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5503     if (IDVal == ".amdhsa_kernel")
5504      return ParseDirectiveAMDHSAKernel();
5505 
5506     // TODO: Restructure/combine with PAL metadata directive.
5507     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5508       return ParseDirectiveHSAMetadata();
5509   } else {
5510     if (IDVal == ".hsa_code_object_version")
5511       return ParseDirectiveHSACodeObjectVersion();
5512 
5513     if (IDVal == ".hsa_code_object_isa")
5514       return ParseDirectiveHSACodeObjectISA();
5515 
5516     if (IDVal == ".amd_kernel_code_t")
5517       return ParseDirectiveAMDKernelCodeT();
5518 
5519     if (IDVal == ".amdgpu_hsa_kernel")
5520       return ParseDirectiveAMDGPUHsaKernel();
5521 
5522     if (IDVal == ".amd_amdgpu_isa")
5523       return ParseDirectiveISAVersion();
5524 
5525     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5526       return ParseDirectiveHSAMetadata();
5527   }
5528 
5529   if (IDVal == ".amdgcn_target")
5530     return ParseDirectiveAMDGCNTarget();
5531 
5532   if (IDVal == ".amdgpu_lds")
5533     return ParseDirectiveAMDGPULDS();
5534 
5535   if (IDVal == PALMD::AssemblerDirectiveBegin)
5536     return ParseDirectivePALMetadataBegin();
5537 
5538   if (IDVal == PALMD::AssemblerDirective)
5539     return ParseDirectivePALMetadata();
5540 
5541   return true;
5542 }
5543 
5544 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5545                                            unsigned RegNo) {
5546 
5547   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5548     return isGFX9Plus();
5549 
5550   // GFX10 has 2 more SGPRs 104 and 105.
5551   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5552     return hasSGPR104_SGPR105();
5553 
5554   switch (RegNo) {
5555   case AMDGPU::SRC_SHARED_BASE:
5556   case AMDGPU::SRC_SHARED_LIMIT:
5557   case AMDGPU::SRC_PRIVATE_BASE:
5558   case AMDGPU::SRC_PRIVATE_LIMIT:
5559   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5560     return isGFX9Plus();
5561   case AMDGPU::TBA:
5562   case AMDGPU::TBA_LO:
5563   case AMDGPU::TBA_HI:
5564   case AMDGPU::TMA:
5565   case AMDGPU::TMA_LO:
5566   case AMDGPU::TMA_HI:
5567     return !isGFX9Plus();
5568   case AMDGPU::XNACK_MASK:
5569   case AMDGPU::XNACK_MASK_LO:
5570   case AMDGPU::XNACK_MASK_HI:
5571     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5572   case AMDGPU::SGPR_NULL:
5573     return isGFX10Plus();
5574   default:
5575     break;
5576   }
5577 
5578   if (isCI())
5579     return true;
5580 
5581   if (isSI() || isGFX10Plus()) {
5582     // No flat_scr on SI.
5583     // On GFX10 flat scratch is not a valid register operand and can only be
5584     // accessed with s_setreg/s_getreg.
5585     switch (RegNo) {
5586     case AMDGPU::FLAT_SCR:
5587     case AMDGPU::FLAT_SCR_LO:
5588     case AMDGPU::FLAT_SCR_HI:
5589       return false;
5590     default:
5591       return true;
5592     }
5593   }
5594 
5595   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5596   // SI/CI have.
5597   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5598     return hasSGPR102_SGPR103();
5599 
5600   return true;
5601 }
5602 
5603 OperandMatchResultTy
5604 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5605                               OperandMode Mode) {
5606   // Try to parse with a custom parser
5607   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5608 
5609   // If we successfully parsed the operand or if there as an error parsing,
5610   // we are done.
5611   //
5612   // If we are parsing after we reach EndOfStatement then this means we
5613   // are appending default values to the Operands list.  This is only done
5614   // by custom parser, so we shouldn't continue on to the generic parsing.
5615   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5616       isToken(AsmToken::EndOfStatement))
5617     return ResTy;
5618 
5619   SMLoc RBraceLoc;
5620   SMLoc LBraceLoc = getLoc();
5621   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5622     unsigned Prefix = Operands.size();
5623 
5624     for (;;) {
5625       auto Loc = getLoc();
5626       ResTy = parseReg(Operands);
5627       if (ResTy == MatchOperand_NoMatch)
5628         Error(Loc, "expected a register");
5629       if (ResTy != MatchOperand_Success)
5630         return MatchOperand_ParseFail;
5631 
5632       RBraceLoc = getLoc();
5633       if (trySkipToken(AsmToken::RBrac))
5634         break;
5635 
5636       if (!skipToken(AsmToken::Comma,
5637                      "expected a comma or a closing square bracket")) {
5638         return MatchOperand_ParseFail;
5639       }
5640     }
5641 
5642     if (Operands.size() - Prefix > 1) {
5643       Operands.insert(Operands.begin() + Prefix,
5644                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5645       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5646     }
5647 
5648     return MatchOperand_Success;
5649   }
5650 
5651   return parseRegOrImm(Operands);
5652 }
5653 
5654 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5655   // Clear any forced encodings from the previous instruction.
5656   setForcedEncodingSize(0);
5657   setForcedDPP(false);
5658   setForcedSDWA(false);
5659 
5660   if (Name.endswith("_e64")) {
5661     setForcedEncodingSize(64);
5662     return Name.substr(0, Name.size() - 4);
5663   } else if (Name.endswith("_e32")) {
5664     setForcedEncodingSize(32);
5665     return Name.substr(0, Name.size() - 4);
5666   } else if (Name.endswith("_dpp")) {
5667     setForcedDPP(true);
5668     return Name.substr(0, Name.size() - 4);
5669   } else if (Name.endswith("_sdwa")) {
5670     setForcedSDWA(true);
5671     return Name.substr(0, Name.size() - 5);
5672   }
5673   return Name;
5674 }
5675 
5676 static void applyMnemonicAliases(StringRef &Mnemonic,
5677                                  const FeatureBitset &Features,
5678                                  unsigned VariantID);
5679 
5680 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5681                                        StringRef Name,
5682                                        SMLoc NameLoc, OperandVector &Operands) {
5683   // Add the instruction mnemonic
5684   Name = parseMnemonicSuffix(Name);
5685 
5686   // If the target architecture uses MnemonicAlias, call it here to parse
5687   // operands correctly.
5688   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5689 
5690   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5691 
5692   bool IsMIMG = Name.startswith("image_");
5693 
5694   while (!trySkipToken(AsmToken::EndOfStatement)) {
5695     OperandMode Mode = OperandMode_Default;
5696     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5697       Mode = OperandMode_NSA;
5698     CPolSeen = 0;
5699     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5700 
5701     if (Res != MatchOperand_Success) {
5702       checkUnsupportedInstruction(Name, NameLoc);
5703       if (!Parser.hasPendingError()) {
5704         // FIXME: use real operand location rather than the current location.
5705         StringRef Msg =
5706           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5707                                             "not a valid operand.";
5708         Error(getLoc(), Msg);
5709       }
5710       while (!trySkipToken(AsmToken::EndOfStatement)) {
5711         lex();
5712       }
5713       return true;
5714     }
5715 
5716     // Eat the comma or space if there is one.
5717     trySkipToken(AsmToken::Comma);
5718   }
5719 
5720   return false;
5721 }
5722 
5723 //===----------------------------------------------------------------------===//
5724 // Utility functions
5725 //===----------------------------------------------------------------------===//
5726 
5727 OperandMatchResultTy
5728 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5729 
5730   if (!trySkipId(Prefix, AsmToken::Colon))
5731     return MatchOperand_NoMatch;
5732 
5733   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5734 }
5735 
5736 OperandMatchResultTy
5737 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5738                                     AMDGPUOperand::ImmTy ImmTy,
5739                                     bool (*ConvertResult)(int64_t&)) {
5740   SMLoc S = getLoc();
5741   int64_t Value = 0;
5742 
5743   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5744   if (Res != MatchOperand_Success)
5745     return Res;
5746 
5747   if (ConvertResult && !ConvertResult(Value)) {
5748     Error(S, "invalid " + StringRef(Prefix) + " value.");
5749   }
5750 
5751   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5752   return MatchOperand_Success;
5753 }
5754 
5755 OperandMatchResultTy
5756 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5757                                              OperandVector &Operands,
5758                                              AMDGPUOperand::ImmTy ImmTy,
5759                                              bool (*ConvertResult)(int64_t&)) {
5760   SMLoc S = getLoc();
5761   if (!trySkipId(Prefix, AsmToken::Colon))
5762     return MatchOperand_NoMatch;
5763 
5764   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5765     return MatchOperand_ParseFail;
5766 
5767   unsigned Val = 0;
5768   const unsigned MaxSize = 4;
5769 
5770   // FIXME: How to verify the number of elements matches the number of src
5771   // operands?
5772   for (int I = 0; ; ++I) {
5773     int64_t Op;
5774     SMLoc Loc = getLoc();
5775     if (!parseExpr(Op))
5776       return MatchOperand_ParseFail;
5777 
5778     if (Op != 0 && Op != 1) {
5779       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5780       return MatchOperand_ParseFail;
5781     }
5782 
5783     Val |= (Op << I);
5784 
5785     if (trySkipToken(AsmToken::RBrac))
5786       break;
5787 
5788     if (I + 1 == MaxSize) {
5789       Error(getLoc(), "expected a closing square bracket");
5790       return MatchOperand_ParseFail;
5791     }
5792 
5793     if (!skipToken(AsmToken::Comma, "expected a comma"))
5794       return MatchOperand_ParseFail;
5795   }
5796 
5797   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5798   return MatchOperand_Success;
5799 }
5800 
5801 OperandMatchResultTy
5802 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5803                                AMDGPUOperand::ImmTy ImmTy) {
5804   int64_t Bit;
5805   SMLoc S = getLoc();
5806 
5807   if (trySkipId(Name)) {
5808     Bit = 1;
5809   } else if (trySkipId("no", Name)) {
5810     Bit = 0;
5811   } else {
5812     return MatchOperand_NoMatch;
5813   }
5814 
5815   if (Name == "r128" && !hasMIMG_R128()) {
5816     Error(S, "r128 modifier is not supported on this GPU");
5817     return MatchOperand_ParseFail;
5818   }
5819   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5820     Error(S, "a16 modifier is not supported on this GPU");
5821     return MatchOperand_ParseFail;
5822   }
5823 
5824   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5825     ImmTy = AMDGPUOperand::ImmTyR128A16;
5826 
5827   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5828   return MatchOperand_Success;
5829 }
5830 
5831 OperandMatchResultTy
5832 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5833   unsigned CPolOn = 0;
5834   unsigned CPolOff = 0;
5835   SMLoc S = getLoc();
5836 
5837   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5838   if (isGFX940() && !Mnemo.startswith("s_")) {
5839     if (trySkipId("sc0"))
5840       CPolOn = AMDGPU::CPol::SC0;
5841     else if (trySkipId("nosc0"))
5842       CPolOff = AMDGPU::CPol::SC0;
5843     else if (trySkipId("nt"))
5844       CPolOn = AMDGPU::CPol::NT;
5845     else if (trySkipId("nont"))
5846       CPolOff = AMDGPU::CPol::NT;
5847     else if (trySkipId("sc1"))
5848       CPolOn = AMDGPU::CPol::SC1;
5849     else if (trySkipId("nosc1"))
5850       CPolOff = AMDGPU::CPol::SC1;
5851     else
5852       return MatchOperand_NoMatch;
5853   }
5854   else if (trySkipId("glc"))
5855     CPolOn = AMDGPU::CPol::GLC;
5856   else if (trySkipId("noglc"))
5857     CPolOff = AMDGPU::CPol::GLC;
5858   else if (trySkipId("slc"))
5859     CPolOn = AMDGPU::CPol::SLC;
5860   else if (trySkipId("noslc"))
5861     CPolOff = AMDGPU::CPol::SLC;
5862   else if (trySkipId("dlc"))
5863     CPolOn = AMDGPU::CPol::DLC;
5864   else if (trySkipId("nodlc"))
5865     CPolOff = AMDGPU::CPol::DLC;
5866   else if (trySkipId("scc"))
5867     CPolOn = AMDGPU::CPol::SCC;
5868   else if (trySkipId("noscc"))
5869     CPolOff = AMDGPU::CPol::SCC;
5870   else
5871     return MatchOperand_NoMatch;
5872 
5873   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5874     Error(S, "dlc modifier is not supported on this GPU");
5875     return MatchOperand_ParseFail;
5876   }
5877 
5878   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5879     Error(S, "scc modifier is not supported on this GPU");
5880     return MatchOperand_ParseFail;
5881   }
5882 
5883   if (CPolSeen & (CPolOn | CPolOff)) {
5884     Error(S, "duplicate cache policy modifier");
5885     return MatchOperand_ParseFail;
5886   }
5887 
5888   CPolSeen |= (CPolOn | CPolOff);
5889 
5890   for (unsigned I = 1; I != Operands.size(); ++I) {
5891     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5892     if (Op.isCPol()) {
5893       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5894       return MatchOperand_Success;
5895     }
5896   }
5897 
5898   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5899                                               AMDGPUOperand::ImmTyCPol));
5900 
5901   return MatchOperand_Success;
5902 }
5903 
5904 static void addOptionalImmOperand(
5905   MCInst& Inst, const OperandVector& Operands,
5906   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5907   AMDGPUOperand::ImmTy ImmT,
5908   int64_t Default = 0) {
5909   auto i = OptionalIdx.find(ImmT);
5910   if (i != OptionalIdx.end()) {
5911     unsigned Idx = i->second;
5912     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5913   } else {
5914     Inst.addOperand(MCOperand::createImm(Default));
5915   }
5916 }
5917 
5918 OperandMatchResultTy
5919 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5920                                        StringRef &Value,
5921                                        SMLoc &StringLoc) {
5922   if (!trySkipId(Prefix, AsmToken::Colon))
5923     return MatchOperand_NoMatch;
5924 
5925   StringLoc = getLoc();
5926   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5927                                                   : MatchOperand_ParseFail;
5928 }
5929 
5930 //===----------------------------------------------------------------------===//
5931 // MTBUF format
5932 //===----------------------------------------------------------------------===//
5933 
5934 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5935                                   int64_t MaxVal,
5936                                   int64_t &Fmt) {
5937   int64_t Val;
5938   SMLoc Loc = getLoc();
5939 
5940   auto Res = parseIntWithPrefix(Pref, Val);
5941   if (Res == MatchOperand_ParseFail)
5942     return false;
5943   if (Res == MatchOperand_NoMatch)
5944     return true;
5945 
5946   if (Val < 0 || Val > MaxVal) {
5947     Error(Loc, Twine("out of range ", StringRef(Pref)));
5948     return false;
5949   }
5950 
5951   Fmt = Val;
5952   return true;
5953 }
5954 
5955 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5956 // values to live in a joint format operand in the MCInst encoding.
5957 OperandMatchResultTy
5958 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5959   using namespace llvm::AMDGPU::MTBUFFormat;
5960 
5961   int64_t Dfmt = DFMT_UNDEF;
5962   int64_t Nfmt = NFMT_UNDEF;
5963 
5964   // dfmt and nfmt can appear in either order, and each is optional.
5965   for (int I = 0; I < 2; ++I) {
5966     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5967       return MatchOperand_ParseFail;
5968 
5969     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5970       return MatchOperand_ParseFail;
5971     }
5972     // Skip optional comma between dfmt/nfmt
5973     // but guard against 2 commas following each other.
5974     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5975         !peekToken().is(AsmToken::Comma)) {
5976       trySkipToken(AsmToken::Comma);
5977     }
5978   }
5979 
5980   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5981     return MatchOperand_NoMatch;
5982 
5983   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5984   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5985 
5986   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5987   return MatchOperand_Success;
5988 }
5989 
5990 OperandMatchResultTy
5991 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5992   using namespace llvm::AMDGPU::MTBUFFormat;
5993 
5994   int64_t Fmt = UFMT_UNDEF;
5995 
5996   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5997     return MatchOperand_ParseFail;
5998 
5999   if (Fmt == UFMT_UNDEF)
6000     return MatchOperand_NoMatch;
6001 
6002   Format = Fmt;
6003   return MatchOperand_Success;
6004 }
6005 
6006 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6007                                     int64_t &Nfmt,
6008                                     StringRef FormatStr,
6009                                     SMLoc Loc) {
6010   using namespace llvm::AMDGPU::MTBUFFormat;
6011   int64_t Format;
6012 
6013   Format = getDfmt(FormatStr);
6014   if (Format != DFMT_UNDEF) {
6015     Dfmt = Format;
6016     return true;
6017   }
6018 
6019   Format = getNfmt(FormatStr, getSTI());
6020   if (Format != NFMT_UNDEF) {
6021     Nfmt = Format;
6022     return true;
6023   }
6024 
6025   Error(Loc, "unsupported format");
6026   return false;
6027 }
6028 
6029 OperandMatchResultTy
6030 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6031                                           SMLoc FormatLoc,
6032                                           int64_t &Format) {
6033   using namespace llvm::AMDGPU::MTBUFFormat;
6034 
6035   int64_t Dfmt = DFMT_UNDEF;
6036   int64_t Nfmt = NFMT_UNDEF;
6037   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6038     return MatchOperand_ParseFail;
6039 
6040   if (trySkipToken(AsmToken::Comma)) {
6041     StringRef Str;
6042     SMLoc Loc = getLoc();
6043     if (!parseId(Str, "expected a format string") ||
6044         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
6045       return MatchOperand_ParseFail;
6046     }
6047     if (Dfmt == DFMT_UNDEF) {
6048       Error(Loc, "duplicate numeric format");
6049       return MatchOperand_ParseFail;
6050     } else if (Nfmt == NFMT_UNDEF) {
6051       Error(Loc, "duplicate data format");
6052       return MatchOperand_ParseFail;
6053     }
6054   }
6055 
6056   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6057   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6058 
6059   if (isGFX10Plus()) {
6060     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6061     if (Ufmt == UFMT_UNDEF) {
6062       Error(FormatLoc, "unsupported format");
6063       return MatchOperand_ParseFail;
6064     }
6065     Format = Ufmt;
6066   } else {
6067     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6068   }
6069 
6070   return MatchOperand_Success;
6071 }
6072 
6073 OperandMatchResultTy
6074 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6075                                             SMLoc Loc,
6076                                             int64_t &Format) {
6077   using namespace llvm::AMDGPU::MTBUFFormat;
6078 
6079   auto Id = getUnifiedFormat(FormatStr, getSTI());
6080   if (Id == UFMT_UNDEF)
6081     return MatchOperand_NoMatch;
6082 
6083   if (!isGFX10Plus()) {
6084     Error(Loc, "unified format is not supported on this GPU");
6085     return MatchOperand_ParseFail;
6086   }
6087 
6088   Format = Id;
6089   return MatchOperand_Success;
6090 }
6091 
6092 OperandMatchResultTy
6093 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6094   using namespace llvm::AMDGPU::MTBUFFormat;
6095   SMLoc Loc = getLoc();
6096 
6097   if (!parseExpr(Format))
6098     return MatchOperand_ParseFail;
6099   if (!isValidFormatEncoding(Format, getSTI())) {
6100     Error(Loc, "out of range format");
6101     return MatchOperand_ParseFail;
6102   }
6103 
6104   return MatchOperand_Success;
6105 }
6106 
6107 OperandMatchResultTy
6108 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6109   using namespace llvm::AMDGPU::MTBUFFormat;
6110 
6111   if (!trySkipId("format", AsmToken::Colon))
6112     return MatchOperand_NoMatch;
6113 
6114   if (trySkipToken(AsmToken::LBrac)) {
6115     StringRef FormatStr;
6116     SMLoc Loc = getLoc();
6117     if (!parseId(FormatStr, "expected a format string"))
6118       return MatchOperand_ParseFail;
6119 
6120     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6121     if (Res == MatchOperand_NoMatch)
6122       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6123     if (Res != MatchOperand_Success)
6124       return Res;
6125 
6126     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6127       return MatchOperand_ParseFail;
6128 
6129     return MatchOperand_Success;
6130   }
6131 
6132   return parseNumericFormat(Format);
6133 }
6134 
6135 OperandMatchResultTy
6136 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6137   using namespace llvm::AMDGPU::MTBUFFormat;
6138 
6139   int64_t Format = getDefaultFormatEncoding(getSTI());
6140   OperandMatchResultTy Res;
6141   SMLoc Loc = getLoc();
6142 
6143   // Parse legacy format syntax.
6144   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6145   if (Res == MatchOperand_ParseFail)
6146     return Res;
6147 
6148   bool FormatFound = (Res == MatchOperand_Success);
6149 
6150   Operands.push_back(
6151     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6152 
6153   if (FormatFound)
6154     trySkipToken(AsmToken::Comma);
6155 
6156   if (isToken(AsmToken::EndOfStatement)) {
6157     // We are expecting an soffset operand,
6158     // but let matcher handle the error.
6159     return MatchOperand_Success;
6160   }
6161 
6162   // Parse soffset.
6163   Res = parseRegOrImm(Operands);
6164   if (Res != MatchOperand_Success)
6165     return Res;
6166 
6167   trySkipToken(AsmToken::Comma);
6168 
6169   if (!FormatFound) {
6170     Res = parseSymbolicOrNumericFormat(Format);
6171     if (Res == MatchOperand_ParseFail)
6172       return Res;
6173     if (Res == MatchOperand_Success) {
6174       auto Size = Operands.size();
6175       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6176       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6177       Op.setImm(Format);
6178     }
6179     return MatchOperand_Success;
6180   }
6181 
6182   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6183     Error(getLoc(), "duplicate format");
6184     return MatchOperand_ParseFail;
6185   }
6186   return MatchOperand_Success;
6187 }
6188 
6189 //===----------------------------------------------------------------------===//
6190 // ds
6191 //===----------------------------------------------------------------------===//
6192 
6193 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6194                                     const OperandVector &Operands) {
6195   OptionalImmIndexMap OptionalIdx;
6196 
6197   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6198     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6199 
6200     // Add the register arguments
6201     if (Op.isReg()) {
6202       Op.addRegOperands(Inst, 1);
6203       continue;
6204     }
6205 
6206     // Handle optional arguments
6207     OptionalIdx[Op.getImmTy()] = i;
6208   }
6209 
6210   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6211   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6212   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6213 
6214   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6215 }
6216 
6217 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6218                                 bool IsGdsHardcoded) {
6219   OptionalImmIndexMap OptionalIdx;
6220 
6221   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6222     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6223 
6224     // Add the register arguments
6225     if (Op.isReg()) {
6226       Op.addRegOperands(Inst, 1);
6227       continue;
6228     }
6229 
6230     if (Op.isToken() && Op.getToken() == "gds") {
6231       IsGdsHardcoded = true;
6232       continue;
6233     }
6234 
6235     // Handle optional arguments
6236     OptionalIdx[Op.getImmTy()] = i;
6237   }
6238 
6239   AMDGPUOperand::ImmTy OffsetType =
6240     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6241      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6242      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6243                                                       AMDGPUOperand::ImmTyOffset;
6244 
6245   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6246 
6247   if (!IsGdsHardcoded) {
6248     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6249   }
6250   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6251 }
6252 
6253 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6254   OptionalImmIndexMap OptionalIdx;
6255 
6256   unsigned OperandIdx[4];
6257   unsigned EnMask = 0;
6258   int SrcIdx = 0;
6259 
6260   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6261     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6262 
6263     // Add the register arguments
6264     if (Op.isReg()) {
6265       assert(SrcIdx < 4);
6266       OperandIdx[SrcIdx] = Inst.size();
6267       Op.addRegOperands(Inst, 1);
6268       ++SrcIdx;
6269       continue;
6270     }
6271 
6272     if (Op.isOff()) {
6273       assert(SrcIdx < 4);
6274       OperandIdx[SrcIdx] = Inst.size();
6275       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6276       ++SrcIdx;
6277       continue;
6278     }
6279 
6280     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6281       Op.addImmOperands(Inst, 1);
6282       continue;
6283     }
6284 
6285     if (Op.isToken() && Op.getToken() == "done")
6286       continue;
6287 
6288     // Handle optional arguments
6289     OptionalIdx[Op.getImmTy()] = i;
6290   }
6291 
6292   assert(SrcIdx == 4);
6293 
6294   bool Compr = false;
6295   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6296     Compr = true;
6297     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6298     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6299     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6300   }
6301 
6302   for (auto i = 0; i < SrcIdx; ++i) {
6303     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6304       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6305     }
6306   }
6307 
6308   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6309   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6310 
6311   Inst.addOperand(MCOperand::createImm(EnMask));
6312 }
6313 
6314 //===----------------------------------------------------------------------===//
6315 // s_waitcnt
6316 //===----------------------------------------------------------------------===//
6317 
6318 static bool
6319 encodeCnt(
6320   const AMDGPU::IsaVersion ISA,
6321   int64_t &IntVal,
6322   int64_t CntVal,
6323   bool Saturate,
6324   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6325   unsigned (*decode)(const IsaVersion &Version, unsigned))
6326 {
6327   bool Failed = false;
6328 
6329   IntVal = encode(ISA, IntVal, CntVal);
6330   if (CntVal != decode(ISA, IntVal)) {
6331     if (Saturate) {
6332       IntVal = encode(ISA, IntVal, -1);
6333     } else {
6334       Failed = true;
6335     }
6336   }
6337   return Failed;
6338 }
6339 
6340 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6341 
6342   SMLoc CntLoc = getLoc();
6343   StringRef CntName = getTokenStr();
6344 
6345   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6346       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6347     return false;
6348 
6349   int64_t CntVal;
6350   SMLoc ValLoc = getLoc();
6351   if (!parseExpr(CntVal))
6352     return false;
6353 
6354   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6355 
6356   bool Failed = true;
6357   bool Sat = CntName.endswith("_sat");
6358 
6359   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6360     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6361   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6362     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6363   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6364     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6365   } else {
6366     Error(CntLoc, "invalid counter name " + CntName);
6367     return false;
6368   }
6369 
6370   if (Failed) {
6371     Error(ValLoc, "too large value for " + CntName);
6372     return false;
6373   }
6374 
6375   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6376     return false;
6377 
6378   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6379     if (isToken(AsmToken::EndOfStatement)) {
6380       Error(getLoc(), "expected a counter name");
6381       return false;
6382     }
6383   }
6384 
6385   return true;
6386 }
6387 
6388 OperandMatchResultTy
6389 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6390   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6391   int64_t Waitcnt = getWaitcntBitMask(ISA);
6392   SMLoc S = getLoc();
6393 
6394   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6395     while (!isToken(AsmToken::EndOfStatement)) {
6396       if (!parseCnt(Waitcnt))
6397         return MatchOperand_ParseFail;
6398     }
6399   } else {
6400     if (!parseExpr(Waitcnt))
6401       return MatchOperand_ParseFail;
6402   }
6403 
6404   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6405   return MatchOperand_Success;
6406 }
6407 
6408 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6409   SMLoc FieldLoc = getLoc();
6410   StringRef FieldName = getTokenStr();
6411   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6412       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6413     return false;
6414 
6415   SMLoc ValueLoc = getLoc();
6416   StringRef ValueName = getTokenStr();
6417   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6418       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6419     return false;
6420 
6421   unsigned Shift;
6422   if (FieldName == "instid0") {
6423     Shift = 0;
6424   } else if (FieldName == "instskip") {
6425     Shift = 4;
6426   } else if (FieldName == "instid1") {
6427     Shift = 7;
6428   } else {
6429     Error(FieldLoc, "invalid field name " + FieldName);
6430     return false;
6431   }
6432 
6433   int Value;
6434   if (Shift == 4) {
6435     // Parse values for instskip.
6436     Value = StringSwitch<int>(ValueName)
6437                 .Case("SAME", 0)
6438                 .Case("NEXT", 1)
6439                 .Case("SKIP_1", 2)
6440                 .Case("SKIP_2", 3)
6441                 .Case("SKIP_3", 4)
6442                 .Case("SKIP_4", 5)
6443                 .Default(-1);
6444   } else {
6445     // Parse values for instid0 and instid1.
6446     Value = StringSwitch<int>(ValueName)
6447                 .Case("NO_DEP", 0)
6448                 .Case("VALU_DEP_1", 1)
6449                 .Case("VALU_DEP_2", 2)
6450                 .Case("VALU_DEP_3", 3)
6451                 .Case("VALU_DEP_4", 4)
6452                 .Case("TRANS32_DEP_1", 5)
6453                 .Case("TRANS32_DEP_2", 6)
6454                 .Case("TRANS32_DEP_3", 7)
6455                 .Case("FMA_ACCUM_CYCLE_1", 8)
6456                 .Case("SALU_CYCLE_1", 9)
6457                 .Case("SALU_CYCLE_2", 10)
6458                 .Case("SALU_CYCLE_3", 11)
6459                 .Default(-1);
6460   }
6461   if (Value < 0) {
6462     Error(ValueLoc, "invalid value name " + ValueName);
6463     return false;
6464   }
6465 
6466   Delay |= Value << Shift;
6467   return true;
6468 }
6469 
6470 OperandMatchResultTy
6471 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) {
6472   int64_t Delay = 0;
6473   SMLoc S = getLoc();
6474 
6475   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6476     do {
6477       if (!parseDelay(Delay))
6478         return MatchOperand_ParseFail;
6479     } while (trySkipToken(AsmToken::Pipe));
6480   } else {
6481     if (!parseExpr(Delay))
6482       return MatchOperand_ParseFail;
6483   }
6484 
6485   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6486   return MatchOperand_Success;
6487 }
6488 
6489 bool
6490 AMDGPUOperand::isSWaitCnt() const {
6491   return isImm();
6492 }
6493 
6494 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); }
6495 
6496 //===----------------------------------------------------------------------===//
6497 // DepCtr
6498 //===----------------------------------------------------------------------===//
6499 
6500 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6501                                   StringRef DepCtrName) {
6502   switch (ErrorId) {
6503   case OPR_ID_UNKNOWN:
6504     Error(Loc, Twine("invalid counter name ", DepCtrName));
6505     return;
6506   case OPR_ID_UNSUPPORTED:
6507     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6508     return;
6509   case OPR_ID_DUPLICATE:
6510     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6511     return;
6512   case OPR_VAL_INVALID:
6513     Error(Loc, Twine("invalid value for ", DepCtrName));
6514     return;
6515   default:
6516     assert(false);
6517   }
6518 }
6519 
6520 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6521 
6522   using namespace llvm::AMDGPU::DepCtr;
6523 
6524   SMLoc DepCtrLoc = getLoc();
6525   StringRef DepCtrName = getTokenStr();
6526 
6527   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6528       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6529     return false;
6530 
6531   int64_t ExprVal;
6532   if (!parseExpr(ExprVal))
6533     return false;
6534 
6535   unsigned PrevOprMask = UsedOprMask;
6536   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6537 
6538   if (CntVal < 0) {
6539     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6540     return false;
6541   }
6542 
6543   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6544     return false;
6545 
6546   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6547     if (isToken(AsmToken::EndOfStatement)) {
6548       Error(getLoc(), "expected a counter name");
6549       return false;
6550     }
6551   }
6552 
6553   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6554   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6555   return true;
6556 }
6557 
6558 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6559   using namespace llvm::AMDGPU::DepCtr;
6560 
6561   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6562   SMLoc Loc = getLoc();
6563 
6564   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6565     unsigned UsedOprMask = 0;
6566     while (!isToken(AsmToken::EndOfStatement)) {
6567       if (!parseDepCtr(DepCtr, UsedOprMask))
6568         return MatchOperand_ParseFail;
6569     }
6570   } else {
6571     if (!parseExpr(DepCtr))
6572       return MatchOperand_ParseFail;
6573   }
6574 
6575   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6576   return MatchOperand_Success;
6577 }
6578 
6579 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6580 
6581 //===----------------------------------------------------------------------===//
6582 // hwreg
6583 //===----------------------------------------------------------------------===//
6584 
6585 bool
6586 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6587                                 OperandInfoTy &Offset,
6588                                 OperandInfoTy &Width) {
6589   using namespace llvm::AMDGPU::Hwreg;
6590 
6591   // The register may be specified by name or using a numeric code
6592   HwReg.Loc = getLoc();
6593   if (isToken(AsmToken::Identifier) &&
6594       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6595     HwReg.IsSymbolic = true;
6596     lex(); // skip register name
6597   } else if (!parseExpr(HwReg.Id, "a register name")) {
6598     return false;
6599   }
6600 
6601   if (trySkipToken(AsmToken::RParen))
6602     return true;
6603 
6604   // parse optional params
6605   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6606     return false;
6607 
6608   Offset.Loc = getLoc();
6609   if (!parseExpr(Offset.Id))
6610     return false;
6611 
6612   if (!skipToken(AsmToken::Comma, "expected a comma"))
6613     return false;
6614 
6615   Width.Loc = getLoc();
6616   return parseExpr(Width.Id) &&
6617          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6618 }
6619 
6620 bool
6621 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6622                                const OperandInfoTy &Offset,
6623                                const OperandInfoTy &Width) {
6624 
6625   using namespace llvm::AMDGPU::Hwreg;
6626 
6627   if (HwReg.IsSymbolic) {
6628     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6629       Error(HwReg.Loc,
6630             "specified hardware register is not supported on this GPU");
6631       return false;
6632     }
6633   } else {
6634     if (!isValidHwreg(HwReg.Id)) {
6635       Error(HwReg.Loc,
6636             "invalid code of hardware register: only 6-bit values are legal");
6637       return false;
6638     }
6639   }
6640   if (!isValidHwregOffset(Offset.Id)) {
6641     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6642     return false;
6643   }
6644   if (!isValidHwregWidth(Width.Id)) {
6645     Error(Width.Loc,
6646           "invalid bitfield width: only values from 1 to 32 are legal");
6647     return false;
6648   }
6649   return true;
6650 }
6651 
6652 OperandMatchResultTy
6653 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6654   using namespace llvm::AMDGPU::Hwreg;
6655 
6656   int64_t ImmVal = 0;
6657   SMLoc Loc = getLoc();
6658 
6659   if (trySkipId("hwreg", AsmToken::LParen)) {
6660     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6661     OperandInfoTy Offset(OFFSET_DEFAULT_);
6662     OperandInfoTy Width(WIDTH_DEFAULT_);
6663     if (parseHwregBody(HwReg, Offset, Width) &&
6664         validateHwreg(HwReg, Offset, Width)) {
6665       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6666     } else {
6667       return MatchOperand_ParseFail;
6668     }
6669   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6670     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6671       Error(Loc, "invalid immediate: only 16-bit values are legal");
6672       return MatchOperand_ParseFail;
6673     }
6674   } else {
6675     return MatchOperand_ParseFail;
6676   }
6677 
6678   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6679   return MatchOperand_Success;
6680 }
6681 
6682 bool AMDGPUOperand::isHwreg() const {
6683   return isImmTy(ImmTyHwreg);
6684 }
6685 
6686 //===----------------------------------------------------------------------===//
6687 // sendmsg
6688 //===----------------------------------------------------------------------===//
6689 
6690 bool
6691 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6692                                   OperandInfoTy &Op,
6693                                   OperandInfoTy &Stream) {
6694   using namespace llvm::AMDGPU::SendMsg;
6695 
6696   Msg.Loc = getLoc();
6697   if (isToken(AsmToken::Identifier) &&
6698       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6699     Msg.IsSymbolic = true;
6700     lex(); // skip message name
6701   } else if (!parseExpr(Msg.Id, "a message name")) {
6702     return false;
6703   }
6704 
6705   if (trySkipToken(AsmToken::Comma)) {
6706     Op.IsDefined = true;
6707     Op.Loc = getLoc();
6708     if (isToken(AsmToken::Identifier) &&
6709         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6710       lex(); // skip operation name
6711     } else if (!parseExpr(Op.Id, "an operation name")) {
6712       return false;
6713     }
6714 
6715     if (trySkipToken(AsmToken::Comma)) {
6716       Stream.IsDefined = true;
6717       Stream.Loc = getLoc();
6718       if (!parseExpr(Stream.Id))
6719         return false;
6720     }
6721   }
6722 
6723   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6724 }
6725 
6726 bool
6727 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6728                                  const OperandInfoTy &Op,
6729                                  const OperandInfoTy &Stream) {
6730   using namespace llvm::AMDGPU::SendMsg;
6731 
6732   // Validation strictness depends on whether message is specified
6733   // in a symbolic or in a numeric form. In the latter case
6734   // only encoding possibility is checked.
6735   bool Strict = Msg.IsSymbolic;
6736 
6737   if (Strict) {
6738     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6739       Error(Msg.Loc, "specified message id is not supported on this GPU");
6740       return false;
6741     }
6742   } else {
6743     if (!isValidMsgId(Msg.Id, getSTI())) {
6744       Error(Msg.Loc, "invalid message id");
6745       return false;
6746     }
6747   }
6748   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6749     if (Op.IsDefined) {
6750       Error(Op.Loc, "message does not support operations");
6751     } else {
6752       Error(Msg.Loc, "missing message operation");
6753     }
6754     return false;
6755   }
6756   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6757     Error(Op.Loc, "invalid operation id");
6758     return false;
6759   }
6760   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6761       Stream.IsDefined) {
6762     Error(Stream.Loc, "message operation does not support streams");
6763     return false;
6764   }
6765   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6766     Error(Stream.Loc, "invalid message stream id");
6767     return false;
6768   }
6769   return true;
6770 }
6771 
6772 OperandMatchResultTy
6773 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6774   using namespace llvm::AMDGPU::SendMsg;
6775 
6776   int64_t ImmVal = 0;
6777   SMLoc Loc = getLoc();
6778 
6779   if (trySkipId("sendmsg", AsmToken::LParen)) {
6780     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6781     OperandInfoTy Op(OP_NONE_);
6782     OperandInfoTy Stream(STREAM_ID_NONE_);
6783     if (parseSendMsgBody(Msg, Op, Stream) &&
6784         validateSendMsg(Msg, Op, Stream)) {
6785       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6786     } else {
6787       return MatchOperand_ParseFail;
6788     }
6789   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6790     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6791       Error(Loc, "invalid immediate: only 16-bit values are legal");
6792       return MatchOperand_ParseFail;
6793     }
6794   } else {
6795     return MatchOperand_ParseFail;
6796   }
6797 
6798   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6799   return MatchOperand_Success;
6800 }
6801 
6802 bool AMDGPUOperand::isSendMsg() const {
6803   return isImmTy(ImmTySendMsg);
6804 }
6805 
6806 //===----------------------------------------------------------------------===//
6807 // v_interp
6808 //===----------------------------------------------------------------------===//
6809 
6810 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6811   StringRef Str;
6812   SMLoc S = getLoc();
6813 
6814   if (!parseId(Str))
6815     return MatchOperand_NoMatch;
6816 
6817   int Slot = StringSwitch<int>(Str)
6818     .Case("p10", 0)
6819     .Case("p20", 1)
6820     .Case("p0", 2)
6821     .Default(-1);
6822 
6823   if (Slot == -1) {
6824     Error(S, "invalid interpolation slot");
6825     return MatchOperand_ParseFail;
6826   }
6827 
6828   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6829                                               AMDGPUOperand::ImmTyInterpSlot));
6830   return MatchOperand_Success;
6831 }
6832 
6833 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6834   StringRef Str;
6835   SMLoc S = getLoc();
6836 
6837   if (!parseId(Str))
6838     return MatchOperand_NoMatch;
6839 
6840   if (!Str.startswith("attr")) {
6841     Error(S, "invalid interpolation attribute");
6842     return MatchOperand_ParseFail;
6843   }
6844 
6845   StringRef Chan = Str.take_back(2);
6846   int AttrChan = StringSwitch<int>(Chan)
6847     .Case(".x", 0)
6848     .Case(".y", 1)
6849     .Case(".z", 2)
6850     .Case(".w", 3)
6851     .Default(-1);
6852   if (AttrChan == -1) {
6853     Error(S, "invalid or missing interpolation attribute channel");
6854     return MatchOperand_ParseFail;
6855   }
6856 
6857   Str = Str.drop_back(2).drop_front(4);
6858 
6859   uint8_t Attr;
6860   if (Str.getAsInteger(10, Attr)) {
6861     Error(S, "invalid or missing interpolation attribute number");
6862     return MatchOperand_ParseFail;
6863   }
6864 
6865   if (Attr > 63) {
6866     Error(S, "out of bounds interpolation attribute number");
6867     return MatchOperand_ParseFail;
6868   }
6869 
6870   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6871 
6872   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6873                                               AMDGPUOperand::ImmTyInterpAttr));
6874   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6875                                               AMDGPUOperand::ImmTyAttrChan));
6876   return MatchOperand_Success;
6877 }
6878 
6879 //===----------------------------------------------------------------------===//
6880 // exp
6881 //===----------------------------------------------------------------------===//
6882 
6883 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6884   using namespace llvm::AMDGPU::Exp;
6885 
6886   StringRef Str;
6887   SMLoc S = getLoc();
6888 
6889   if (!parseId(Str))
6890     return MatchOperand_NoMatch;
6891 
6892   unsigned Id = getTgtId(Str);
6893   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6894     Error(S, (Id == ET_INVALID) ?
6895                 "invalid exp target" :
6896                 "exp target is not supported on this GPU");
6897     return MatchOperand_ParseFail;
6898   }
6899 
6900   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6901                                               AMDGPUOperand::ImmTyExpTgt));
6902   return MatchOperand_Success;
6903 }
6904 
6905 //===----------------------------------------------------------------------===//
6906 // parser helpers
6907 //===----------------------------------------------------------------------===//
6908 
6909 bool
6910 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6911   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6912 }
6913 
6914 bool
6915 AMDGPUAsmParser::isId(const StringRef Id) const {
6916   return isId(getToken(), Id);
6917 }
6918 
6919 bool
6920 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6921   return getTokenKind() == Kind;
6922 }
6923 
6924 bool
6925 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6926   if (isId(Id)) {
6927     lex();
6928     return true;
6929   }
6930   return false;
6931 }
6932 
6933 bool
6934 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6935   if (isToken(AsmToken::Identifier)) {
6936     StringRef Tok = getTokenStr();
6937     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6938       lex();
6939       return true;
6940     }
6941   }
6942   return false;
6943 }
6944 
6945 bool
6946 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6947   if (isId(Id) && peekToken().is(Kind)) {
6948     lex();
6949     lex();
6950     return true;
6951   }
6952   return false;
6953 }
6954 
6955 bool
6956 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6957   if (isToken(Kind)) {
6958     lex();
6959     return true;
6960   }
6961   return false;
6962 }
6963 
6964 bool
6965 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6966                            const StringRef ErrMsg) {
6967   if (!trySkipToken(Kind)) {
6968     Error(getLoc(), ErrMsg);
6969     return false;
6970   }
6971   return true;
6972 }
6973 
6974 bool
6975 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6976   SMLoc S = getLoc();
6977 
6978   const MCExpr *Expr;
6979   if (Parser.parseExpression(Expr))
6980     return false;
6981 
6982   if (Expr->evaluateAsAbsolute(Imm))
6983     return true;
6984 
6985   if (Expected.empty()) {
6986     Error(S, "expected absolute expression");
6987   } else {
6988     Error(S, Twine("expected ", Expected) +
6989              Twine(" or an absolute expression"));
6990   }
6991   return false;
6992 }
6993 
6994 bool
6995 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6996   SMLoc S = getLoc();
6997 
6998   const MCExpr *Expr;
6999   if (Parser.parseExpression(Expr))
7000     return false;
7001 
7002   int64_t IntVal;
7003   if (Expr->evaluateAsAbsolute(IntVal)) {
7004     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7005   } else {
7006     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7007   }
7008   return true;
7009 }
7010 
7011 bool
7012 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7013   if (isToken(AsmToken::String)) {
7014     Val = getToken().getStringContents();
7015     lex();
7016     return true;
7017   } else {
7018     Error(getLoc(), ErrMsg);
7019     return false;
7020   }
7021 }
7022 
7023 bool
7024 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7025   if (isToken(AsmToken::Identifier)) {
7026     Val = getTokenStr();
7027     lex();
7028     return true;
7029   } else {
7030     if (!ErrMsg.empty())
7031       Error(getLoc(), ErrMsg);
7032     return false;
7033   }
7034 }
7035 
7036 AsmToken
7037 AMDGPUAsmParser::getToken() const {
7038   return Parser.getTok();
7039 }
7040 
7041 AsmToken
7042 AMDGPUAsmParser::peekToken() {
7043   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
7044 }
7045 
7046 void
7047 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7048   auto TokCount = getLexer().peekTokens(Tokens);
7049 
7050   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7051     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7052 }
7053 
7054 AsmToken::TokenKind
7055 AMDGPUAsmParser::getTokenKind() const {
7056   return getLexer().getKind();
7057 }
7058 
7059 SMLoc
7060 AMDGPUAsmParser::getLoc() const {
7061   return getToken().getLoc();
7062 }
7063 
7064 StringRef
7065 AMDGPUAsmParser::getTokenStr() const {
7066   return getToken().getString();
7067 }
7068 
7069 void
7070 AMDGPUAsmParser::lex() {
7071   Parser.Lex();
7072 }
7073 
7074 SMLoc
7075 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7076                                const OperandVector &Operands) const {
7077   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7078     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7079     if (Test(Op))
7080       return Op.getStartLoc();
7081   }
7082   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7083 }
7084 
7085 SMLoc
7086 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7087                            const OperandVector &Operands) const {
7088   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7089   return getOperandLoc(Test, Operands);
7090 }
7091 
7092 SMLoc
7093 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7094                            const OperandVector &Operands) const {
7095   auto Test = [=](const AMDGPUOperand& Op) {
7096     return Op.isRegKind() && Op.getReg() == Reg;
7097   };
7098   return getOperandLoc(Test, Operands);
7099 }
7100 
7101 SMLoc
7102 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
7103   auto Test = [](const AMDGPUOperand& Op) {
7104     return Op.IsImmKindLiteral() || Op.isExpr();
7105   };
7106   return getOperandLoc(Test, Operands);
7107 }
7108 
7109 SMLoc
7110 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7111   auto Test = [](const AMDGPUOperand& Op) {
7112     return Op.isImmKindConst();
7113   };
7114   return getOperandLoc(Test, Operands);
7115 }
7116 
7117 //===----------------------------------------------------------------------===//
7118 // swizzle
7119 //===----------------------------------------------------------------------===//
7120 
7121 LLVM_READNONE
7122 static unsigned
7123 encodeBitmaskPerm(const unsigned AndMask,
7124                   const unsigned OrMask,
7125                   const unsigned XorMask) {
7126   using namespace llvm::AMDGPU::Swizzle;
7127 
7128   return BITMASK_PERM_ENC |
7129          (AndMask << BITMASK_AND_SHIFT) |
7130          (OrMask  << BITMASK_OR_SHIFT)  |
7131          (XorMask << BITMASK_XOR_SHIFT);
7132 }
7133 
7134 bool
7135 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7136                                      const unsigned MinVal,
7137                                      const unsigned MaxVal,
7138                                      const StringRef ErrMsg,
7139                                      SMLoc &Loc) {
7140   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7141     return false;
7142   }
7143   Loc = getLoc();
7144   if (!parseExpr(Op)) {
7145     return false;
7146   }
7147   if (Op < MinVal || Op > MaxVal) {
7148     Error(Loc, ErrMsg);
7149     return false;
7150   }
7151 
7152   return true;
7153 }
7154 
7155 bool
7156 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7157                                       const unsigned MinVal,
7158                                       const unsigned MaxVal,
7159                                       const StringRef ErrMsg) {
7160   SMLoc Loc;
7161   for (unsigned i = 0; i < OpNum; ++i) {
7162     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7163       return false;
7164   }
7165 
7166   return true;
7167 }
7168 
7169 bool
7170 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7171   using namespace llvm::AMDGPU::Swizzle;
7172 
7173   int64_t Lane[LANE_NUM];
7174   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7175                            "expected a 2-bit lane id")) {
7176     Imm = QUAD_PERM_ENC;
7177     for (unsigned I = 0; I < LANE_NUM; ++I) {
7178       Imm |= Lane[I] << (LANE_SHIFT * I);
7179     }
7180     return true;
7181   }
7182   return false;
7183 }
7184 
7185 bool
7186 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7187   using namespace llvm::AMDGPU::Swizzle;
7188 
7189   SMLoc Loc;
7190   int64_t GroupSize;
7191   int64_t LaneIdx;
7192 
7193   if (!parseSwizzleOperand(GroupSize,
7194                            2, 32,
7195                            "group size must be in the interval [2,32]",
7196                            Loc)) {
7197     return false;
7198   }
7199   if (!isPowerOf2_64(GroupSize)) {
7200     Error(Loc, "group size must be a power of two");
7201     return false;
7202   }
7203   if (parseSwizzleOperand(LaneIdx,
7204                           0, GroupSize - 1,
7205                           "lane id must be in the interval [0,group size - 1]",
7206                           Loc)) {
7207     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7208     return true;
7209   }
7210   return false;
7211 }
7212 
7213 bool
7214 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7215   using namespace llvm::AMDGPU::Swizzle;
7216 
7217   SMLoc Loc;
7218   int64_t GroupSize;
7219 
7220   if (!parseSwizzleOperand(GroupSize,
7221                            2, 32,
7222                            "group size must be in the interval [2,32]",
7223                            Loc)) {
7224     return false;
7225   }
7226   if (!isPowerOf2_64(GroupSize)) {
7227     Error(Loc, "group size must be a power of two");
7228     return false;
7229   }
7230 
7231   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7232   return true;
7233 }
7234 
7235 bool
7236 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7237   using namespace llvm::AMDGPU::Swizzle;
7238 
7239   SMLoc Loc;
7240   int64_t GroupSize;
7241 
7242   if (!parseSwizzleOperand(GroupSize,
7243                            1, 16,
7244                            "group size must be in the interval [1,16]",
7245                            Loc)) {
7246     return false;
7247   }
7248   if (!isPowerOf2_64(GroupSize)) {
7249     Error(Loc, "group size must be a power of two");
7250     return false;
7251   }
7252 
7253   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7254   return true;
7255 }
7256 
7257 bool
7258 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7259   using namespace llvm::AMDGPU::Swizzle;
7260 
7261   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7262     return false;
7263   }
7264 
7265   StringRef Ctl;
7266   SMLoc StrLoc = getLoc();
7267   if (!parseString(Ctl)) {
7268     return false;
7269   }
7270   if (Ctl.size() != BITMASK_WIDTH) {
7271     Error(StrLoc, "expected a 5-character mask");
7272     return false;
7273   }
7274 
7275   unsigned AndMask = 0;
7276   unsigned OrMask = 0;
7277   unsigned XorMask = 0;
7278 
7279   for (size_t i = 0; i < Ctl.size(); ++i) {
7280     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7281     switch(Ctl[i]) {
7282     default:
7283       Error(StrLoc, "invalid mask");
7284       return false;
7285     case '0':
7286       break;
7287     case '1':
7288       OrMask |= Mask;
7289       break;
7290     case 'p':
7291       AndMask |= Mask;
7292       break;
7293     case 'i':
7294       AndMask |= Mask;
7295       XorMask |= Mask;
7296       break;
7297     }
7298   }
7299 
7300   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7301   return true;
7302 }
7303 
7304 bool
7305 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7306 
7307   SMLoc OffsetLoc = getLoc();
7308 
7309   if (!parseExpr(Imm, "a swizzle macro")) {
7310     return false;
7311   }
7312   if (!isUInt<16>(Imm)) {
7313     Error(OffsetLoc, "expected a 16-bit offset");
7314     return false;
7315   }
7316   return true;
7317 }
7318 
7319 bool
7320 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7321   using namespace llvm::AMDGPU::Swizzle;
7322 
7323   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7324 
7325     SMLoc ModeLoc = getLoc();
7326     bool Ok = false;
7327 
7328     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7329       Ok = parseSwizzleQuadPerm(Imm);
7330     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7331       Ok = parseSwizzleBitmaskPerm(Imm);
7332     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7333       Ok = parseSwizzleBroadcast(Imm);
7334     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7335       Ok = parseSwizzleSwap(Imm);
7336     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7337       Ok = parseSwizzleReverse(Imm);
7338     } else {
7339       Error(ModeLoc, "expected a swizzle mode");
7340     }
7341 
7342     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7343   }
7344 
7345   return false;
7346 }
7347 
7348 OperandMatchResultTy
7349 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7350   SMLoc S = getLoc();
7351   int64_t Imm = 0;
7352 
7353   if (trySkipId("offset")) {
7354 
7355     bool Ok = false;
7356     if (skipToken(AsmToken::Colon, "expected a colon")) {
7357       if (trySkipId("swizzle")) {
7358         Ok = parseSwizzleMacro(Imm);
7359       } else {
7360         Ok = parseSwizzleOffset(Imm);
7361       }
7362     }
7363 
7364     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7365 
7366     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7367   } else {
7368     // Swizzle "offset" operand is optional.
7369     // If it is omitted, try parsing other optional operands.
7370     return parseOptionalOpr(Operands);
7371   }
7372 }
7373 
7374 bool
7375 AMDGPUOperand::isSwizzle() const {
7376   return isImmTy(ImmTySwizzle);
7377 }
7378 
7379 //===----------------------------------------------------------------------===//
7380 // VGPR Index Mode
7381 //===----------------------------------------------------------------------===//
7382 
7383 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7384 
7385   using namespace llvm::AMDGPU::VGPRIndexMode;
7386 
7387   if (trySkipToken(AsmToken::RParen)) {
7388     return OFF;
7389   }
7390 
7391   int64_t Imm = 0;
7392 
7393   while (true) {
7394     unsigned Mode = 0;
7395     SMLoc S = getLoc();
7396 
7397     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7398       if (trySkipId(IdSymbolic[ModeId])) {
7399         Mode = 1 << ModeId;
7400         break;
7401       }
7402     }
7403 
7404     if (Mode == 0) {
7405       Error(S, (Imm == 0)?
7406                "expected a VGPR index mode or a closing parenthesis" :
7407                "expected a VGPR index mode");
7408       return UNDEF;
7409     }
7410 
7411     if (Imm & Mode) {
7412       Error(S, "duplicate VGPR index mode");
7413       return UNDEF;
7414     }
7415     Imm |= Mode;
7416 
7417     if (trySkipToken(AsmToken::RParen))
7418       break;
7419     if (!skipToken(AsmToken::Comma,
7420                    "expected a comma or a closing parenthesis"))
7421       return UNDEF;
7422   }
7423 
7424   return Imm;
7425 }
7426 
7427 OperandMatchResultTy
7428 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7429 
7430   using namespace llvm::AMDGPU::VGPRIndexMode;
7431 
7432   int64_t Imm = 0;
7433   SMLoc S = getLoc();
7434 
7435   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7436     Imm = parseGPRIdxMacro();
7437     if (Imm == UNDEF)
7438       return MatchOperand_ParseFail;
7439   } else {
7440     if (getParser().parseAbsoluteExpression(Imm))
7441       return MatchOperand_ParseFail;
7442     if (Imm < 0 || !isUInt<4>(Imm)) {
7443       Error(S, "invalid immediate: only 4-bit values are legal");
7444       return MatchOperand_ParseFail;
7445     }
7446   }
7447 
7448   Operands.push_back(
7449       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7450   return MatchOperand_Success;
7451 }
7452 
7453 bool AMDGPUOperand::isGPRIdxMode() const {
7454   return isImmTy(ImmTyGprIdxMode);
7455 }
7456 
7457 //===----------------------------------------------------------------------===//
7458 // sopp branch targets
7459 //===----------------------------------------------------------------------===//
7460 
7461 OperandMatchResultTy
7462 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7463 
7464   // Make sure we are not parsing something
7465   // that looks like a label or an expression but is not.
7466   // This will improve error messages.
7467   if (isRegister() || isModifier())
7468     return MatchOperand_NoMatch;
7469 
7470   if (!parseExpr(Operands))
7471     return MatchOperand_ParseFail;
7472 
7473   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7474   assert(Opr.isImm() || Opr.isExpr());
7475   SMLoc Loc = Opr.getStartLoc();
7476 
7477   // Currently we do not support arbitrary expressions as branch targets.
7478   // Only labels and absolute expressions are accepted.
7479   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7480     Error(Loc, "expected an absolute expression or a label");
7481   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7482     Error(Loc, "expected a 16-bit signed jump offset");
7483   }
7484 
7485   return MatchOperand_Success;
7486 }
7487 
7488 //===----------------------------------------------------------------------===//
7489 // Boolean holding registers
7490 //===----------------------------------------------------------------------===//
7491 
7492 OperandMatchResultTy
7493 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7494   return parseReg(Operands);
7495 }
7496 
7497 //===----------------------------------------------------------------------===//
7498 // mubuf
7499 //===----------------------------------------------------------------------===//
7500 
7501 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7502   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7503 }
7504 
7505 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7506                                    const OperandVector &Operands,
7507                                    bool IsAtomic,
7508                                    bool IsLds) {
7509   OptionalImmIndexMap OptionalIdx;
7510   unsigned FirstOperandIdx = 1;
7511   bool IsAtomicReturn = false;
7512 
7513   if (IsAtomic) {
7514     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7515       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7516       if (!Op.isCPol())
7517         continue;
7518       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7519       break;
7520     }
7521 
7522     if (!IsAtomicReturn) {
7523       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7524       if (NewOpc != -1)
7525         Inst.setOpcode(NewOpc);
7526     }
7527 
7528     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7529                       SIInstrFlags::IsAtomicRet;
7530   }
7531 
7532   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7533     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7534 
7535     // Add the register arguments
7536     if (Op.isReg()) {
7537       Op.addRegOperands(Inst, 1);
7538       // Insert a tied src for atomic return dst.
7539       // This cannot be postponed as subsequent calls to
7540       // addImmOperands rely on correct number of MC operands.
7541       if (IsAtomicReturn && i == FirstOperandIdx)
7542         Op.addRegOperands(Inst, 1);
7543       continue;
7544     }
7545 
7546     // Handle the case where soffset is an immediate
7547     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7548       Op.addImmOperands(Inst, 1);
7549       continue;
7550     }
7551 
7552     // Handle tokens like 'offen' which are sometimes hard-coded into the
7553     // asm string.  There are no MCInst operands for these.
7554     if (Op.isToken()) {
7555       continue;
7556     }
7557     assert(Op.isImm());
7558 
7559     // Handle optional arguments
7560     OptionalIdx[Op.getImmTy()] = i;
7561   }
7562 
7563   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7564   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7565 
7566   if (!IsLds) { // tfe is not legal with lds opcodes
7567     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7568   }
7569   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7570 }
7571 
7572 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7573   OptionalImmIndexMap OptionalIdx;
7574 
7575   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7576     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7577 
7578     // Add the register arguments
7579     if (Op.isReg()) {
7580       Op.addRegOperands(Inst, 1);
7581       continue;
7582     }
7583 
7584     // Handle the case where soffset is an immediate
7585     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7586       Op.addImmOperands(Inst, 1);
7587       continue;
7588     }
7589 
7590     // Handle tokens like 'offen' which are sometimes hard-coded into the
7591     // asm string.  There are no MCInst operands for these.
7592     if (Op.isToken()) {
7593       continue;
7594     }
7595     assert(Op.isImm());
7596 
7597     // Handle optional arguments
7598     OptionalIdx[Op.getImmTy()] = i;
7599   }
7600 
7601   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7602                         AMDGPUOperand::ImmTyOffset);
7603   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7604   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7605   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7606   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7607 }
7608 
7609 //===----------------------------------------------------------------------===//
7610 // mimg
7611 //===----------------------------------------------------------------------===//
7612 
7613 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7614                               bool IsAtomic) {
7615   unsigned I = 1;
7616   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7617   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7618     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7619   }
7620 
7621   if (IsAtomic) {
7622     // Add src, same as dst
7623     assert(Desc.getNumDefs() == 1);
7624     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7625   }
7626 
7627   OptionalImmIndexMap OptionalIdx;
7628 
7629   for (unsigned E = Operands.size(); I != E; ++I) {
7630     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7631 
7632     // Add the register arguments
7633     if (Op.isReg()) {
7634       Op.addRegOperands(Inst, 1);
7635     } else if (Op.isImmModifier()) {
7636       OptionalIdx[Op.getImmTy()] = I;
7637     } else if (!Op.isToken()) {
7638       llvm_unreachable("unexpected operand type");
7639     }
7640   }
7641 
7642   bool IsGFX10Plus = isGFX10Plus();
7643 
7644   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7645   if (IsGFX10Plus)
7646     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7647   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7648   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7649   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7650   if (IsGFX10Plus)
7651     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7652   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7653     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7654   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7655   if (!IsGFX10Plus)
7656     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7657   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7658 }
7659 
7660 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7661   cvtMIMG(Inst, Operands, true);
7662 }
7663 
7664 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7665   OptionalImmIndexMap OptionalIdx;
7666   bool IsAtomicReturn = false;
7667 
7668   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7669     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7670     if (!Op.isCPol())
7671       continue;
7672     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7673     break;
7674   }
7675 
7676   if (!IsAtomicReturn) {
7677     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7678     if (NewOpc != -1)
7679       Inst.setOpcode(NewOpc);
7680   }
7681 
7682   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7683                     SIInstrFlags::IsAtomicRet;
7684 
7685   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7686     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7687 
7688     // Add the register arguments
7689     if (Op.isReg()) {
7690       Op.addRegOperands(Inst, 1);
7691       if (IsAtomicReturn && i == 1)
7692         Op.addRegOperands(Inst, 1);
7693       continue;
7694     }
7695 
7696     // Handle the case where soffset is an immediate
7697     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7698       Op.addImmOperands(Inst, 1);
7699       continue;
7700     }
7701 
7702     // Handle tokens like 'offen' which are sometimes hard-coded into the
7703     // asm string.  There are no MCInst operands for these.
7704     if (Op.isToken()) {
7705       continue;
7706     }
7707     assert(Op.isImm());
7708 
7709     // Handle optional arguments
7710     OptionalIdx[Op.getImmTy()] = i;
7711   }
7712 
7713   if ((int)Inst.getNumOperands() <=
7714       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7715     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7716   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7717 }
7718 
7719 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7720                                       const OperandVector &Operands) {
7721   for (unsigned I = 1; I < Operands.size(); ++I) {
7722     auto &Operand = (AMDGPUOperand &)*Operands[I];
7723     if (Operand.isReg())
7724       Operand.addRegOperands(Inst, 1);
7725   }
7726 
7727   Inst.addOperand(MCOperand::createImm(1)); // a16
7728 }
7729 
7730 //===----------------------------------------------------------------------===//
7731 // smrd
7732 //===----------------------------------------------------------------------===//
7733 
7734 bool AMDGPUOperand::isSMRDOffset8() const {
7735   return isImm() && isUInt<8>(getImm());
7736 }
7737 
7738 bool AMDGPUOperand::isSMEMOffset() const {
7739   return isImmTy(ImmTyNone) ||
7740          isImmTy(ImmTyOffset); // Offset range is checked later by validator.
7741 }
7742 
7743 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7744   // 32-bit literals are only supported on CI and we only want to use them
7745   // when the offset is > 8-bits.
7746   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7747 }
7748 
7749 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7750   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7751 }
7752 
7753 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7754   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7755 }
7756 
7757 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7758   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7759 }
7760 
7761 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7762   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7763 }
7764 
7765 //===----------------------------------------------------------------------===//
7766 // vop3
7767 //===----------------------------------------------------------------------===//
7768 
7769 static bool ConvertOmodMul(int64_t &Mul) {
7770   if (Mul != 1 && Mul != 2 && Mul != 4)
7771     return false;
7772 
7773   Mul >>= 1;
7774   return true;
7775 }
7776 
7777 static bool ConvertOmodDiv(int64_t &Div) {
7778   if (Div == 1) {
7779     Div = 0;
7780     return true;
7781   }
7782 
7783   if (Div == 2) {
7784     Div = 3;
7785     return true;
7786   }
7787 
7788   return false;
7789 }
7790 
7791 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7792 // This is intentional and ensures compatibility with sp3.
7793 // See bug 35397 for details.
7794 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7795   if (BoundCtrl == 0 || BoundCtrl == 1) {
7796     BoundCtrl = 1;
7797     return true;
7798   }
7799   return false;
7800 }
7801 
7802 // Note: the order in this table matches the order of operands in AsmString.
7803 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7804   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7805   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7806   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7807   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7808   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7809   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7810   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7811   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7812   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7813   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7814   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7815   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7816   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7817   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7818   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7819   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7820   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7821   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7822   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7823   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7824   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7825   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7826   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7827   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7828   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7829   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7830   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7831   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7832   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7833   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7834   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7835   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7836   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7837   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7838   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7839   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7840   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7841   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7842   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7843   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7844   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr},
7845   {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr}
7846 };
7847 
7848 void AMDGPUAsmParser::onBeginOfFile() {
7849   if (!getParser().getStreamer().getTargetStreamer() ||
7850       getSTI().getTargetTriple().getArch() == Triple::r600)
7851     return;
7852 
7853   if (!getTargetStreamer().getTargetID())
7854     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7855 
7856   if (isHsaAbiVersion3AndAbove(&getSTI()))
7857     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7858 }
7859 
7860 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7861 
7862   OperandMatchResultTy res = parseOptionalOpr(Operands);
7863 
7864   // This is a hack to enable hardcoded mandatory operands which follow
7865   // optional operands.
7866   //
7867   // Current design assumes that all operands after the first optional operand
7868   // are also optional. However implementation of some instructions violates
7869   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7870   //
7871   // To alleviate this problem, we have to (implicitly) parse extra operands
7872   // to make sure autogenerated parser of custom operands never hit hardcoded
7873   // mandatory operands.
7874 
7875   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7876     if (res != MatchOperand_Success ||
7877         isToken(AsmToken::EndOfStatement))
7878       break;
7879 
7880     trySkipToken(AsmToken::Comma);
7881     res = parseOptionalOpr(Operands);
7882   }
7883 
7884   return res;
7885 }
7886 
7887 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7888   OperandMatchResultTy res;
7889   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7890     // try to parse any optional operand here
7891     if (Op.IsBit) {
7892       res = parseNamedBit(Op.Name, Operands, Op.Type);
7893     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7894       res = parseOModOperand(Operands);
7895     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7896                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7897                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7898       res = parseSDWASel(Operands, Op.Name, Op.Type);
7899     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7900       res = parseSDWADstUnused(Operands);
7901     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7902                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7903                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7904                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7905       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7906                                         Op.ConvertResult);
7907     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7908       res = parseDim(Operands);
7909     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7910       res = parseCPol(Operands);
7911     } else {
7912       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7913       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
7914         res = parseOperandArrayWithPrefix("neg", Operands,
7915                                           AMDGPUOperand::ImmTyBLGP,
7916                                           nullptr);
7917       }
7918     }
7919     if (res != MatchOperand_NoMatch) {
7920       return res;
7921     }
7922   }
7923   return MatchOperand_NoMatch;
7924 }
7925 
7926 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7927   StringRef Name = getTokenStr();
7928   if (Name == "mul") {
7929     return parseIntWithPrefix("mul", Operands,
7930                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7931   }
7932 
7933   if (Name == "div") {
7934     return parseIntWithPrefix("div", Operands,
7935                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7936   }
7937 
7938   return MatchOperand_NoMatch;
7939 }
7940 
7941 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7942   cvtVOP3P(Inst, Operands);
7943 
7944   int Opc = Inst.getOpcode();
7945 
7946   int SrcNum;
7947   const int Ops[] = { AMDGPU::OpName::src0,
7948                       AMDGPU::OpName::src1,
7949                       AMDGPU::OpName::src2 };
7950   for (SrcNum = 0;
7951        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7952        ++SrcNum);
7953   assert(SrcNum > 0);
7954 
7955   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7956   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7957 
7958   if ((OpSel & (1 << SrcNum)) != 0) {
7959     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7960     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7961     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7962   }
7963 }
7964 
7965 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7966       // 1. This operand is input modifiers
7967   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7968       // 2. This is not last operand
7969       && Desc.NumOperands > (OpNum + 1)
7970       // 3. Next operand is register class
7971       && Desc.OpInfo[OpNum + 1].RegClass != -1
7972       // 4. Next register is not tied to any other operand
7973       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7974 }
7975 
7976 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7977 {
7978   OptionalImmIndexMap OptionalIdx;
7979   unsigned Opc = Inst.getOpcode();
7980 
7981   unsigned I = 1;
7982   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7983   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7984     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7985   }
7986 
7987   for (unsigned E = Operands.size(); I != E; ++I) {
7988     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7989     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7990       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7991     } else if (Op.isInterpSlot() ||
7992                Op.isInterpAttr() ||
7993                Op.isAttrChan()) {
7994       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7995     } else if (Op.isImmModifier()) {
7996       OptionalIdx[Op.getImmTy()] = I;
7997     } else {
7998       llvm_unreachable("unhandled operand type");
7999     }
8000   }
8001 
8002   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
8003     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
8004   }
8005 
8006   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8007     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8008   }
8009 
8010   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8011     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8012   }
8013 }
8014 
8015 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8016                               OptionalImmIndexMap &OptionalIdx) {
8017   unsigned Opc = Inst.getOpcode();
8018 
8019   unsigned I = 1;
8020   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8021   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8022     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8023   }
8024 
8025   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
8026     // This instruction has src modifiers
8027     for (unsigned E = Operands.size(); I != E; ++I) {
8028       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8029       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8030         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8031       } else if (Op.isImmModifier()) {
8032         OptionalIdx[Op.getImmTy()] = I;
8033       } else if (Op.isRegOrImm()) {
8034         Op.addRegOrImmOperands(Inst, 1);
8035       } else {
8036         llvm_unreachable("unhandled operand type");
8037       }
8038     }
8039   } else {
8040     // No src modifiers
8041     for (unsigned E = Operands.size(); I != E; ++I) {
8042       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8043       if (Op.isMod()) {
8044         OptionalIdx[Op.getImmTy()] = I;
8045       } else {
8046         Op.addRegOrImmOperands(Inst, 1);
8047       }
8048     }
8049   }
8050 
8051   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8052     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8053   }
8054 
8055   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8056     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8057   }
8058 
8059   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8060   // it has src2 register operand that is tied to dst operand
8061   // we don't allow modifiers for this operand in assembler so src2_modifiers
8062   // should be 0.
8063   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
8064       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
8065       Opc == AMDGPU::V_MAC_F32_e64_vi ||
8066       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
8067       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
8068       Opc == AMDGPU::V_MAC_F16_e64_vi ||
8069       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
8070       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
8071       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
8072       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
8073       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
8074     auto it = Inst.begin();
8075     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8076     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8077     ++it;
8078     // Copy the operand to ensure it's not invalidated when Inst grows.
8079     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8080   }
8081 }
8082 
8083 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8084   OptionalImmIndexMap OptionalIdx;
8085   cvtVOP3(Inst, Operands, OptionalIdx);
8086 }
8087 
8088 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8089                                OptionalImmIndexMap &OptIdx) {
8090   const int Opc = Inst.getOpcode();
8091   const MCInstrDesc &Desc = MII.get(Opc);
8092 
8093   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8094 
8095   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
8096     assert(!IsPacked);
8097     Inst.addOperand(Inst.getOperand(0));
8098   }
8099 
8100   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8101   // instruction, and then figure out where to actually put the modifiers
8102 
8103   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8104   if (OpSelIdx != -1) {
8105     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8106   }
8107 
8108   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8109   if (OpSelHiIdx != -1) {
8110     int DefaultVal = IsPacked ? -1 : 0;
8111     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8112                           DefaultVal);
8113   }
8114 
8115   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8116   if (NegLoIdx != -1) {
8117     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8118     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8119   }
8120 
8121   const int Ops[] = { AMDGPU::OpName::src0,
8122                       AMDGPU::OpName::src1,
8123                       AMDGPU::OpName::src2 };
8124   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8125                          AMDGPU::OpName::src1_modifiers,
8126                          AMDGPU::OpName::src2_modifiers };
8127 
8128   unsigned OpSel = 0;
8129   unsigned OpSelHi = 0;
8130   unsigned NegLo = 0;
8131   unsigned NegHi = 0;
8132 
8133   if (OpSelIdx != -1)
8134     OpSel = Inst.getOperand(OpSelIdx).getImm();
8135 
8136   if (OpSelHiIdx != -1)
8137     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8138 
8139   if (NegLoIdx != -1) {
8140     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8141     NegLo = Inst.getOperand(NegLoIdx).getImm();
8142     NegHi = Inst.getOperand(NegHiIdx).getImm();
8143   }
8144 
8145   for (int J = 0; J < 3; ++J) {
8146     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8147     if (OpIdx == -1)
8148       break;
8149 
8150     uint32_t ModVal = 0;
8151 
8152     if ((OpSel & (1 << J)) != 0)
8153       ModVal |= SISrcMods::OP_SEL_0;
8154 
8155     if ((OpSelHi & (1 << J)) != 0)
8156       ModVal |= SISrcMods::OP_SEL_1;
8157 
8158     if ((NegLo & (1 << J)) != 0)
8159       ModVal |= SISrcMods::NEG;
8160 
8161     if ((NegHi & (1 << J)) != 0)
8162       ModVal |= SISrcMods::NEG_HI;
8163 
8164     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8165 
8166     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8167   }
8168 }
8169 
8170 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8171   OptionalImmIndexMap OptIdx;
8172   cvtVOP3(Inst, Operands, OptIdx);
8173   cvtVOP3P(Inst, Operands, OptIdx);
8174 }
8175 
8176 //===----------------------------------------------------------------------===//
8177 // dpp
8178 //===----------------------------------------------------------------------===//
8179 
8180 bool AMDGPUOperand::isDPP8() const {
8181   return isImmTy(ImmTyDPP8);
8182 }
8183 
8184 bool AMDGPUOperand::isDPPCtrl() const {
8185   using namespace AMDGPU::DPP;
8186 
8187   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8188   if (result) {
8189     int64_t Imm = getImm();
8190     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8191            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8192            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8193            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8194            (Imm == DppCtrl::WAVE_SHL1) ||
8195            (Imm == DppCtrl::WAVE_ROL1) ||
8196            (Imm == DppCtrl::WAVE_SHR1) ||
8197            (Imm == DppCtrl::WAVE_ROR1) ||
8198            (Imm == DppCtrl::ROW_MIRROR) ||
8199            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8200            (Imm == DppCtrl::BCAST15) ||
8201            (Imm == DppCtrl::BCAST31) ||
8202            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8203            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8204   }
8205   return false;
8206 }
8207 
8208 //===----------------------------------------------------------------------===//
8209 // mAI
8210 //===----------------------------------------------------------------------===//
8211 
8212 bool AMDGPUOperand::isBLGP() const {
8213   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8214 }
8215 
8216 bool AMDGPUOperand::isCBSZ() const {
8217   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8218 }
8219 
8220 bool AMDGPUOperand::isABID() const {
8221   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8222 }
8223 
8224 bool AMDGPUOperand::isS16Imm() const {
8225   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8226 }
8227 
8228 bool AMDGPUOperand::isU16Imm() const {
8229   return isImm() && isUInt<16>(getImm());
8230 }
8231 
8232 //===----------------------------------------------------------------------===//
8233 // dim
8234 //===----------------------------------------------------------------------===//
8235 
8236 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8237   // We want to allow "dim:1D" etc.,
8238   // but the initial 1 is tokenized as an integer.
8239   std::string Token;
8240   if (isToken(AsmToken::Integer)) {
8241     SMLoc Loc = getToken().getEndLoc();
8242     Token = std::string(getTokenStr());
8243     lex();
8244     if (getLoc() != Loc)
8245       return false;
8246   }
8247 
8248   StringRef Suffix;
8249   if (!parseId(Suffix))
8250     return false;
8251   Token += Suffix;
8252 
8253   StringRef DimId = Token;
8254   if (DimId.startswith("SQ_RSRC_IMG_"))
8255     DimId = DimId.drop_front(12);
8256 
8257   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8258   if (!DimInfo)
8259     return false;
8260 
8261   Encoding = DimInfo->Encoding;
8262   return true;
8263 }
8264 
8265 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8266   if (!isGFX10Plus())
8267     return MatchOperand_NoMatch;
8268 
8269   SMLoc S = getLoc();
8270 
8271   if (!trySkipId("dim", AsmToken::Colon))
8272     return MatchOperand_NoMatch;
8273 
8274   unsigned Encoding;
8275   SMLoc Loc = getLoc();
8276   if (!parseDimId(Encoding)) {
8277     Error(Loc, "invalid dim value");
8278     return MatchOperand_ParseFail;
8279   }
8280 
8281   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8282                                               AMDGPUOperand::ImmTyDim));
8283   return MatchOperand_Success;
8284 }
8285 
8286 //===----------------------------------------------------------------------===//
8287 // dpp
8288 //===----------------------------------------------------------------------===//
8289 
8290 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8291   SMLoc S = getLoc();
8292 
8293   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8294     return MatchOperand_NoMatch;
8295 
8296   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8297 
8298   int64_t Sels[8];
8299 
8300   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8301     return MatchOperand_ParseFail;
8302 
8303   for (size_t i = 0; i < 8; ++i) {
8304     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8305       return MatchOperand_ParseFail;
8306 
8307     SMLoc Loc = getLoc();
8308     if (getParser().parseAbsoluteExpression(Sels[i]))
8309       return MatchOperand_ParseFail;
8310     if (0 > Sels[i] || 7 < Sels[i]) {
8311       Error(Loc, "expected a 3-bit value");
8312       return MatchOperand_ParseFail;
8313     }
8314   }
8315 
8316   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8317     return MatchOperand_ParseFail;
8318 
8319   unsigned DPP8 = 0;
8320   for (size_t i = 0; i < 8; ++i)
8321     DPP8 |= (Sels[i] << (i * 3));
8322 
8323   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8324   return MatchOperand_Success;
8325 }
8326 
8327 bool
8328 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8329                                     const OperandVector &Operands) {
8330   if (Ctrl == "row_newbcast")
8331     return isGFX90A();
8332 
8333   if (Ctrl == "row_share" ||
8334       Ctrl == "row_xmask")
8335     return isGFX10Plus();
8336 
8337   if (Ctrl == "wave_shl" ||
8338       Ctrl == "wave_shr" ||
8339       Ctrl == "wave_rol" ||
8340       Ctrl == "wave_ror" ||
8341       Ctrl == "row_bcast")
8342     return isVI() || isGFX9();
8343 
8344   return Ctrl == "row_mirror" ||
8345          Ctrl == "row_half_mirror" ||
8346          Ctrl == "quad_perm" ||
8347          Ctrl == "row_shl" ||
8348          Ctrl == "row_shr" ||
8349          Ctrl == "row_ror";
8350 }
8351 
8352 int64_t
8353 AMDGPUAsmParser::parseDPPCtrlPerm() {
8354   // quad_perm:[%d,%d,%d,%d]
8355 
8356   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8357     return -1;
8358 
8359   int64_t Val = 0;
8360   for (int i = 0; i < 4; ++i) {
8361     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8362       return -1;
8363 
8364     int64_t Temp;
8365     SMLoc Loc = getLoc();
8366     if (getParser().parseAbsoluteExpression(Temp))
8367       return -1;
8368     if (Temp < 0 || Temp > 3) {
8369       Error(Loc, "expected a 2-bit value");
8370       return -1;
8371     }
8372 
8373     Val += (Temp << i * 2);
8374   }
8375 
8376   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8377     return -1;
8378 
8379   return Val;
8380 }
8381 
8382 int64_t
8383 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8384   using namespace AMDGPU::DPP;
8385 
8386   // sel:%d
8387 
8388   int64_t Val;
8389   SMLoc Loc = getLoc();
8390 
8391   if (getParser().parseAbsoluteExpression(Val))
8392     return -1;
8393 
8394   struct DppCtrlCheck {
8395     int64_t Ctrl;
8396     int Lo;
8397     int Hi;
8398   };
8399 
8400   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8401     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8402     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8403     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8404     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8405     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8406     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8407     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8408     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8409     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8410     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8411     .Default({-1, 0, 0});
8412 
8413   bool Valid;
8414   if (Check.Ctrl == -1) {
8415     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8416     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8417   } else {
8418     Valid = Check.Lo <= Val && Val <= Check.Hi;
8419     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8420   }
8421 
8422   if (!Valid) {
8423     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8424     return -1;
8425   }
8426 
8427   return Val;
8428 }
8429 
8430 OperandMatchResultTy
8431 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8432   using namespace AMDGPU::DPP;
8433 
8434   if (!isToken(AsmToken::Identifier) ||
8435       !isSupportedDPPCtrl(getTokenStr(), Operands))
8436     return MatchOperand_NoMatch;
8437 
8438   SMLoc S = getLoc();
8439   int64_t Val = -1;
8440   StringRef Ctrl;
8441 
8442   parseId(Ctrl);
8443 
8444   if (Ctrl == "row_mirror") {
8445     Val = DppCtrl::ROW_MIRROR;
8446   } else if (Ctrl == "row_half_mirror") {
8447     Val = DppCtrl::ROW_HALF_MIRROR;
8448   } else {
8449     if (skipToken(AsmToken::Colon, "expected a colon")) {
8450       if (Ctrl == "quad_perm") {
8451         Val = parseDPPCtrlPerm();
8452       } else {
8453         Val = parseDPPCtrlSel(Ctrl);
8454       }
8455     }
8456   }
8457 
8458   if (Val == -1)
8459     return MatchOperand_ParseFail;
8460 
8461   Operands.push_back(
8462     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8463   return MatchOperand_Success;
8464 }
8465 
8466 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8467   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8468 }
8469 
8470 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8471   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8472 }
8473 
8474 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8475   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8476 }
8477 
8478 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8479   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8480 }
8481 
8482 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8483   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8484 }
8485 
8486 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8487   OptionalImmIndexMap OptionalIdx;
8488 
8489   unsigned Opc = Inst.getOpcode();
8490   bool HasModifiers =
8491       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8492   unsigned I = 1;
8493   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8494   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8495     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8496   }
8497 
8498   int Fi = 0;
8499   for (unsigned E = Operands.size(); I != E; ++I) {
8500     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8501                                             MCOI::TIED_TO);
8502     if (TiedTo != -1) {
8503       assert((unsigned)TiedTo < Inst.getNumOperands());
8504       // handle tied old or src2 for MAC instructions
8505       Inst.addOperand(Inst.getOperand(TiedTo));
8506     }
8507     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8508     // Add the register arguments
8509     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8510       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8511       // Skip it.
8512       continue;
8513     }
8514 
8515     if (IsDPP8) {
8516       if (Op.isDPP8()) {
8517         Op.addImmOperands(Inst, 1);
8518       } else if (HasModifiers &&
8519                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8520         Op.addRegWithFPInputModsOperands(Inst, 2);
8521       } else if (Op.isFI()) {
8522         Fi = Op.getImm();
8523       } else if (Op.isReg()) {
8524         Op.addRegOperands(Inst, 1);
8525       } else {
8526         llvm_unreachable("Invalid operand type");
8527       }
8528     } else {
8529       if (HasModifiers &&
8530           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8531         Op.addRegWithFPInputModsOperands(Inst, 2);
8532       } else if (Op.isReg()) {
8533         Op.addRegOperands(Inst, 1);
8534       } else if (Op.isDPPCtrl()) {
8535         Op.addImmOperands(Inst, 1);
8536       } else if (Op.isImm()) {
8537         // Handle optional arguments
8538         OptionalIdx[Op.getImmTy()] = I;
8539       } else {
8540         llvm_unreachable("Invalid operand type");
8541       }
8542     }
8543   }
8544 
8545   if (IsDPP8) {
8546     using namespace llvm::AMDGPU::DPP;
8547     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8548   } else {
8549     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8550     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8551     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8552     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8553       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8554     }
8555   }
8556 }
8557 
8558 //===----------------------------------------------------------------------===//
8559 // sdwa
8560 //===----------------------------------------------------------------------===//
8561 
8562 OperandMatchResultTy
8563 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8564                               AMDGPUOperand::ImmTy Type) {
8565   using namespace llvm::AMDGPU::SDWA;
8566 
8567   SMLoc S = getLoc();
8568   StringRef Value;
8569   OperandMatchResultTy res;
8570 
8571   SMLoc StringLoc;
8572   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8573   if (res != MatchOperand_Success) {
8574     return res;
8575   }
8576 
8577   int64_t Int;
8578   Int = StringSwitch<int64_t>(Value)
8579         .Case("BYTE_0", SdwaSel::BYTE_0)
8580         .Case("BYTE_1", SdwaSel::BYTE_1)
8581         .Case("BYTE_2", SdwaSel::BYTE_2)
8582         .Case("BYTE_3", SdwaSel::BYTE_3)
8583         .Case("WORD_0", SdwaSel::WORD_0)
8584         .Case("WORD_1", SdwaSel::WORD_1)
8585         .Case("DWORD", SdwaSel::DWORD)
8586         .Default(0xffffffff);
8587 
8588   if (Int == 0xffffffff) {
8589     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8590     return MatchOperand_ParseFail;
8591   }
8592 
8593   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8594   return MatchOperand_Success;
8595 }
8596 
8597 OperandMatchResultTy
8598 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8599   using namespace llvm::AMDGPU::SDWA;
8600 
8601   SMLoc S = getLoc();
8602   StringRef Value;
8603   OperandMatchResultTy res;
8604 
8605   SMLoc StringLoc;
8606   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8607   if (res != MatchOperand_Success) {
8608     return res;
8609   }
8610 
8611   int64_t Int;
8612   Int = StringSwitch<int64_t>(Value)
8613         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8614         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8615         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8616         .Default(0xffffffff);
8617 
8618   if (Int == 0xffffffff) {
8619     Error(StringLoc, "invalid dst_unused value");
8620     return MatchOperand_ParseFail;
8621   }
8622 
8623   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8624   return MatchOperand_Success;
8625 }
8626 
8627 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8628   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8629 }
8630 
8631 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8632   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8633 }
8634 
8635 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8636   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8637 }
8638 
8639 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8640   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8641 }
8642 
8643 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8644   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8645 }
8646 
8647 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8648                               uint64_t BasicInstType,
8649                               bool SkipDstVcc,
8650                               bool SkipSrcVcc) {
8651   using namespace llvm::AMDGPU::SDWA;
8652 
8653   OptionalImmIndexMap OptionalIdx;
8654   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8655   bool SkippedVcc = false;
8656 
8657   unsigned I = 1;
8658   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8659   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8660     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8661   }
8662 
8663   for (unsigned E = Operands.size(); I != E; ++I) {
8664     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8665     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8666         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8667       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8668       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8669       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8670       // Skip VCC only if we didn't skip it on previous iteration.
8671       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8672       if (BasicInstType == SIInstrFlags::VOP2 &&
8673           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8674            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8675         SkippedVcc = true;
8676         continue;
8677       } else if (BasicInstType == SIInstrFlags::VOPC &&
8678                  Inst.getNumOperands() == 0) {
8679         SkippedVcc = true;
8680         continue;
8681       }
8682     }
8683     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8684       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8685     } else if (Op.isImm()) {
8686       // Handle optional arguments
8687       OptionalIdx[Op.getImmTy()] = I;
8688     } else {
8689       llvm_unreachable("Invalid operand type");
8690     }
8691     SkippedVcc = false;
8692   }
8693 
8694   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8695       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8696       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8697     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8698     switch (BasicInstType) {
8699     case SIInstrFlags::VOP1:
8700       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8701       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8702         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8703       }
8704       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8705       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8706       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8707       break;
8708 
8709     case SIInstrFlags::VOP2:
8710       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8711       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8712         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8713       }
8714       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8715       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8716       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8717       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8718       break;
8719 
8720     case SIInstrFlags::VOPC:
8721       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8722         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8723       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8724       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8725       break;
8726 
8727     default:
8728       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8729     }
8730   }
8731 
8732   // special case v_mac_{f16, f32}:
8733   // it has src2 register operand that is tied to dst operand
8734   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8735       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8736     auto it = Inst.begin();
8737     std::advance(
8738       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8739     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8740   }
8741 }
8742 
8743 //===----------------------------------------------------------------------===//
8744 // mAI
8745 //===----------------------------------------------------------------------===//
8746 
8747 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8748   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8749 }
8750 
8751 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8752   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8753 }
8754 
8755 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8756   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8757 }
8758 
8759 /// Force static initialization.
8760 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8761   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8762   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8763 }
8764 
8765 #define GET_REGISTER_MATCHER
8766 #define GET_MATCHER_IMPLEMENTATION
8767 #define GET_MNEMONIC_SPELL_CHECKER
8768 #define GET_MNEMONIC_CHECKER
8769 #include "AMDGPUGenAsmMatcher.inc"
8770 
8771 // This function should be defined after auto-generated include so that we have
8772 // MatchClassKind enum defined
8773 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8774                                                      unsigned Kind) {
8775   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8776   // But MatchInstructionImpl() expects to meet token and fails to validate
8777   // operand. This method checks if we are given immediate operand but expect to
8778   // get corresponding token.
8779   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8780   switch (Kind) {
8781   case MCK_addr64:
8782     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8783   case MCK_gds:
8784     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8785   case MCK_lds:
8786     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8787   case MCK_idxen:
8788     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8789   case MCK_offen:
8790     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8791   case MCK_SSrcB32:
8792     // When operands have expression values, they will return true for isToken,
8793     // because it is not possible to distinguish between a token and an
8794     // expression at parse time. MatchInstructionImpl() will always try to
8795     // match an operand as a token, when isToken returns true, and when the
8796     // name of the expression is not a valid token, the match will fail,
8797     // so we need to handle it here.
8798     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8799   case MCK_SSrcF32:
8800     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8801   case MCK_SoppBrTarget:
8802     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8803   case MCK_VReg32OrOff:
8804     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8805   case MCK_InterpSlot:
8806     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8807   case MCK_Attr:
8808     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8809   case MCK_AttrChan:
8810     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8811   case MCK_ImmSMEMOffset:
8812     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8813   case MCK_SReg_64:
8814   case MCK_SReg_64_XEXEC:
8815     // Null is defined as a 32-bit register but
8816     // it should also be enabled with 64-bit operands.
8817     // The following code enables it for SReg_64 operands
8818     // used as source and destination. Remaining source
8819     // operands are handled in isInlinableImm.
8820     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8821   default:
8822     return Match_InvalidOperand;
8823   }
8824 }
8825 
8826 //===----------------------------------------------------------------------===//
8827 // endpgm
8828 //===----------------------------------------------------------------------===//
8829 
8830 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8831   SMLoc S = getLoc();
8832   int64_t Imm = 0;
8833 
8834   if (!parseExpr(Imm)) {
8835     // The operand is optional, if not present default to 0
8836     Imm = 0;
8837   }
8838 
8839   if (!isUInt<16>(Imm)) {
8840     Error(S, "expected a 16-bit value");
8841     return MatchOperand_ParseFail;
8842   }
8843 
8844   Operands.push_back(
8845       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8846   return MatchOperand_Success;
8847 }
8848 
8849 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8850 
8851 //===----------------------------------------------------------------------===//
8852 // LDSDIR
8853 //===----------------------------------------------------------------------===//
8854 
8855 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const {
8856   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST);
8857 }
8858 
8859 bool AMDGPUOperand::isWaitVDST() const {
8860   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
8861 }
8862