1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/TargetRegistry.h"
32 #include "llvm/Support/AMDGPUMetadata.h"
33 #include "llvm/Support/AMDHSAKernelDescriptor.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/MachineValueType.h"
36 #include "llvm/Support/TargetParser.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53   enum KindTy {
54     Token,
55     Immediate,
56     Register,
57     Expression
58   } Kind;
59 
60   SMLoc StartLoc, EndLoc;
61   const AMDGPUAsmParser *AsmParser;
62 
63 public:
64   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65       : Kind(Kind_), AsmParser(AsmParser_) {}
66 
67   using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69   struct Modifiers {
70     bool Abs = false;
71     bool Neg = false;
72     bool Sext = false;
73 
74     bool hasFPModifiers() const { return Abs || Neg; }
75     bool hasIntModifiers() const { return Sext; }
76     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78     int64_t getFPModifiersOperand() const {
79       int64_t Operand = 0;
80       Operand |= Abs ? SISrcMods::ABS : 0u;
81       Operand |= Neg ? SISrcMods::NEG : 0u;
82       return Operand;
83     }
84 
85     int64_t getIntModifiersOperand() const {
86       int64_t Operand = 0;
87       Operand |= Sext ? SISrcMods::SEXT : 0u;
88       return Operand;
89     }
90 
91     int64_t getModifiersOperand() const {
92       assert(!(hasFPModifiers() && hasIntModifiers())
93            && "fp and int modifiers should not be used simultaneously");
94       if (hasFPModifiers()) {
95         return getFPModifiersOperand();
96       } else if (hasIntModifiers()) {
97         return getIntModifiersOperand();
98       } else {
99         return 0;
100       }
101     }
102 
103     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104   };
105 
106   enum ImmTy {
107     ImmTyNone,
108     ImmTyGDS,
109     ImmTyLDS,
110     ImmTyOffen,
111     ImmTyIdxen,
112     ImmTyAddr64,
113     ImmTyOffset,
114     ImmTyInstOffset,
115     ImmTyOffset0,
116     ImmTyOffset1,
117     ImmTyCPol,
118     ImmTySWZ,
119     ImmTyTFE,
120     ImmTyD16,
121     ImmTyClampSI,
122     ImmTyOModSI,
123     ImmTyDPP8,
124     ImmTyDppCtrl,
125     ImmTyDppRowMask,
126     ImmTyDppBankMask,
127     ImmTyDppBoundCtrl,
128     ImmTyDppFi,
129     ImmTySdwaDstSel,
130     ImmTySdwaSrc0Sel,
131     ImmTySdwaSrc1Sel,
132     ImmTySdwaDstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTySwizzle,
155     ImmTyGprIdxMode,
156     ImmTyHigh,
157     ImmTyBLGP,
158     ImmTyCBSZ,
159     ImmTyABID,
160     ImmTyEndpgm,
161   };
162 
163   enum ImmKindTy {
164     ImmKindTyNone,
165     ImmKindTyLiteral,
166     ImmKindTyConst,
167   };
168 
169 private:
170   struct TokOp {
171     const char *Data;
172     unsigned Length;
173   };
174 
175   struct ImmOp {
176     int64_t Val;
177     ImmTy Type;
178     bool IsFPImm;
179     mutable ImmKindTy Kind;
180     Modifiers Mods;
181   };
182 
183   struct RegOp {
184     unsigned RegNo;
185     Modifiers Mods;
186   };
187 
188   union {
189     TokOp Tok;
190     ImmOp Imm;
191     RegOp Reg;
192     const MCExpr *Expr;
193   };
194 
195 public:
196   bool isToken() const override {
197     if (Kind == Token)
198       return true;
199 
200     // When parsing operands, we can't always tell if something was meant to be
201     // a token, like 'gds', or an expression that references a global variable.
202     // In this case, we assume the string is an expression, and if we need to
203     // interpret is a token, then we treat the symbol name as the token.
204     return isSymbolRefExpr();
205   }
206 
207   bool isSymbolRefExpr() const {
208     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209   }
210 
211   bool isImm() const override {
212     return Kind == Immediate;
213   }
214 
215   void setImmKindNone() const {
216     assert(isImm());
217     Imm.Kind = ImmKindTyNone;
218   }
219 
220   void setImmKindLiteral() const {
221     assert(isImm());
222     Imm.Kind = ImmKindTyLiteral;
223   }
224 
225   void setImmKindConst() const {
226     assert(isImm());
227     Imm.Kind = ImmKindTyConst;
228   }
229 
230   bool IsImmKindLiteral() const {
231     return isImm() && Imm.Kind == ImmKindTyLiteral;
232   }
233 
234   bool isImmKindConst() const {
235     return isImm() && Imm.Kind == ImmKindTyConst;
236   }
237 
238   bool isInlinableImm(MVT type) const;
239   bool isLiteralImm(MVT type) const;
240 
241   bool isRegKind() const {
242     return Kind == Register;
243   }
244 
245   bool isReg() const override {
246     return isRegKind() && !hasModifiers();
247   }
248 
249   bool isRegOrInline(unsigned RCID, MVT type) const {
250     return isRegClass(RCID) || isInlinableImm(type);
251   }
252 
253   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
254     return isRegOrInline(RCID, type) || isLiteralImm(type);
255   }
256 
257   bool isRegOrImmWithInt16InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
259   }
260 
261   bool isRegOrImmWithInt32InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
263   }
264 
265   bool isRegOrImmWithInt64InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
267   }
268 
269   bool isRegOrImmWithFP16InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
271   }
272 
273   bool isRegOrImmWithFP32InputMods() const {
274     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
275   }
276 
277   bool isRegOrImmWithFP64InputMods() const {
278     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
279   }
280 
281   bool isVReg() const {
282     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
283            isRegClass(AMDGPU::VReg_64RegClassID) ||
284            isRegClass(AMDGPU::VReg_96RegClassID) ||
285            isRegClass(AMDGPU::VReg_128RegClassID) ||
286            isRegClass(AMDGPU::VReg_160RegClassID) ||
287            isRegClass(AMDGPU::VReg_192RegClassID) ||
288            isRegClass(AMDGPU::VReg_256RegClassID) ||
289            isRegClass(AMDGPU::VReg_512RegClassID) ||
290            isRegClass(AMDGPU::VReg_1024RegClassID);
291   }
292 
293   bool isVReg32() const {
294     return isRegClass(AMDGPU::VGPR_32RegClassID);
295   }
296 
297   bool isVReg32OrOff() const {
298     return isOff() || isVReg32();
299   }
300 
301   bool isNull() const {
302     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
303   }
304 
305   bool isVRegWithInputMods() const;
306 
307   bool isSDWAOperand(MVT type) const;
308   bool isSDWAFP16Operand() const;
309   bool isSDWAFP32Operand() const;
310   bool isSDWAInt16Operand() const;
311   bool isSDWAInt32Operand() const;
312 
313   bool isImmTy(ImmTy ImmT) const {
314     return isImm() && Imm.Type == ImmT;
315   }
316 
317   bool isImmModifier() const {
318     return isImm() && Imm.Type != ImmTyNone;
319   }
320 
321   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
322   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
323   bool isDMask() const { return isImmTy(ImmTyDMask); }
324   bool isDim() const { return isImmTy(ImmTyDim); }
325   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
326   bool isDA() const { return isImmTy(ImmTyDA); }
327   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
328   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
329   bool isLWE() const { return isImmTy(ImmTyLWE); }
330   bool isOff() const { return isImmTy(ImmTyOff); }
331   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
332   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
333   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
334   bool isOffen() const { return isImmTy(ImmTyOffen); }
335   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
336   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
337   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
338   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
339   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
340 
341   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
342   bool isGDS() const { return isImmTy(ImmTyGDS); }
343   bool isLDS() const { return isImmTy(ImmTyLDS); }
344   bool isCPol() const { return isImmTy(ImmTyCPol); }
345   bool isSWZ() const { return isImmTy(ImmTySWZ); }
346   bool isTFE() const { return isImmTy(ImmTyTFE); }
347   bool isD16() const { return isImmTy(ImmTyD16); }
348   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
349   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
350   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
351   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
352   bool isFI() const { return isImmTy(ImmTyDppFi); }
353   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
354   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
355   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
356   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
357   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
358   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
359   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
360   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
361   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
362   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
363   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
364   bool isHigh() const { return isImmTy(ImmTyHigh); }
365 
366   bool isMod() const {
367     return isClampSI() || isOModSI();
368   }
369 
370   bool isRegOrImm() const {
371     return isReg() || isImm();
372   }
373 
374   bool isRegClass(unsigned RCID) const;
375 
376   bool isInlineValue() const;
377 
378   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
379     return isRegOrInline(RCID, type) && !hasModifiers();
380   }
381 
382   bool isSCSrcB16() const {
383     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
384   }
385 
386   bool isSCSrcV2B16() const {
387     return isSCSrcB16();
388   }
389 
390   bool isSCSrcB32() const {
391     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
392   }
393 
394   bool isSCSrcB64() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
396   }
397 
398   bool isBoolReg() const;
399 
400   bool isSCSrcF16() const {
401     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
402   }
403 
404   bool isSCSrcV2F16() const {
405     return isSCSrcF16();
406   }
407 
408   bool isSCSrcF32() const {
409     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
410   }
411 
412   bool isSCSrcF64() const {
413     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
414   }
415 
416   bool isSSrcB32() const {
417     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
418   }
419 
420   bool isSSrcB16() const {
421     return isSCSrcB16() || isLiteralImm(MVT::i16);
422   }
423 
424   bool isSSrcV2B16() const {
425     llvm_unreachable("cannot happen");
426     return isSSrcB16();
427   }
428 
429   bool isSSrcB64() const {
430     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
431     // See isVSrc64().
432     return isSCSrcB64() || isLiteralImm(MVT::i64);
433   }
434 
435   bool isSSrcF32() const {
436     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
437   }
438 
439   bool isSSrcF64() const {
440     return isSCSrcB64() || isLiteralImm(MVT::f64);
441   }
442 
443   bool isSSrcF16() const {
444     return isSCSrcB16() || isLiteralImm(MVT::f16);
445   }
446 
447   bool isSSrcV2F16() const {
448     llvm_unreachable("cannot happen");
449     return isSSrcF16();
450   }
451 
452   bool isSSrcV2FP32() const {
453     llvm_unreachable("cannot happen");
454     return isSSrcF32();
455   }
456 
457   bool isSCSrcV2FP32() const {
458     llvm_unreachable("cannot happen");
459     return isSCSrcF32();
460   }
461 
462   bool isSSrcV2INT32() const {
463     llvm_unreachable("cannot happen");
464     return isSSrcB32();
465   }
466 
467   bool isSCSrcV2INT32() const {
468     llvm_unreachable("cannot happen");
469     return isSCSrcB32();
470   }
471 
472   bool isSSrcOrLdsB32() const {
473     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
474            isLiteralImm(MVT::i32) || isExpr();
475   }
476 
477   bool isVCSrcB32() const {
478     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
479   }
480 
481   bool isVCSrcB64() const {
482     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
483   }
484 
485   bool isVCSrcB16() const {
486     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
487   }
488 
489   bool isVCSrcV2B16() const {
490     return isVCSrcB16();
491   }
492 
493   bool isVCSrcF32() const {
494     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
495   }
496 
497   bool isVCSrcF64() const {
498     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
499   }
500 
501   bool isVCSrcF16() const {
502     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
503   }
504 
505   bool isVCSrcV2F16() const {
506     return isVCSrcF16();
507   }
508 
509   bool isVSrcB32() const {
510     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
511   }
512 
513   bool isVSrcB64() const {
514     return isVCSrcF64() || isLiteralImm(MVT::i64);
515   }
516 
517   bool isVSrcB16() const {
518     return isVCSrcB16() || isLiteralImm(MVT::i16);
519   }
520 
521   bool isVSrcV2B16() const {
522     return isVSrcB16() || isLiteralImm(MVT::v2i16);
523   }
524 
525   bool isVCSrcV2FP32() const {
526     return isVCSrcF64();
527   }
528 
529   bool isVSrcV2FP32() const {
530     return isVSrcF64() || isLiteralImm(MVT::v2f32);
531   }
532 
533   bool isVCSrcV2INT32() const {
534     return isVCSrcB64();
535   }
536 
537   bool isVSrcV2INT32() const {
538     return isVSrcB64() || isLiteralImm(MVT::v2i32);
539   }
540 
541   bool isVSrcF32() const {
542     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
543   }
544 
545   bool isVSrcF64() const {
546     return isVCSrcF64() || isLiteralImm(MVT::f64);
547   }
548 
549   bool isVSrcF16() const {
550     return isVCSrcF16() || isLiteralImm(MVT::f16);
551   }
552 
553   bool isVSrcV2F16() const {
554     return isVSrcF16() || isLiteralImm(MVT::v2f16);
555   }
556 
557   bool isVISrcB32() const {
558     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
559   }
560 
561   bool isVISrcB16() const {
562     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
563   }
564 
565   bool isVISrcV2B16() const {
566     return isVISrcB16();
567   }
568 
569   bool isVISrcF32() const {
570     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
571   }
572 
573   bool isVISrcF16() const {
574     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
575   }
576 
577   bool isVISrcV2F16() const {
578     return isVISrcF16() || isVISrcB32();
579   }
580 
581   bool isVISrc_64B64() const {
582     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
583   }
584 
585   bool isVISrc_64F64() const {
586     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
587   }
588 
589   bool isVISrc_64V2FP32() const {
590     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
591   }
592 
593   bool isVISrc_64V2INT32() const {
594     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
595   }
596 
597   bool isVISrc_256B64() const {
598     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
599   }
600 
601   bool isVISrc_256F64() const {
602     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
603   }
604 
605   bool isVISrc_128B16() const {
606     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
607   }
608 
609   bool isVISrc_128V2B16() const {
610     return isVISrc_128B16();
611   }
612 
613   bool isVISrc_128B32() const {
614     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
615   }
616 
617   bool isVISrc_128F32() const {
618     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
619   }
620 
621   bool isVISrc_256V2FP32() const {
622     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
623   }
624 
625   bool isVISrc_256V2INT32() const {
626     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
627   }
628 
629   bool isVISrc_512B32() const {
630     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
631   }
632 
633   bool isVISrc_512B16() const {
634     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
635   }
636 
637   bool isVISrc_512V2B16() const {
638     return isVISrc_512B16();
639   }
640 
641   bool isVISrc_512F32() const {
642     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
643   }
644 
645   bool isVISrc_512F16() const {
646     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
647   }
648 
649   bool isVISrc_512V2F16() const {
650     return isVISrc_512F16() || isVISrc_512B32();
651   }
652 
653   bool isVISrc_1024B32() const {
654     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
655   }
656 
657   bool isVISrc_1024B16() const {
658     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
659   }
660 
661   bool isVISrc_1024V2B16() const {
662     return isVISrc_1024B16();
663   }
664 
665   bool isVISrc_1024F32() const {
666     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
667   }
668 
669   bool isVISrc_1024F16() const {
670     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
671   }
672 
673   bool isVISrc_1024V2F16() const {
674     return isVISrc_1024F16() || isVISrc_1024B32();
675   }
676 
677   bool isAISrcB32() const {
678     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
679   }
680 
681   bool isAISrcB16() const {
682     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
683   }
684 
685   bool isAISrcV2B16() const {
686     return isAISrcB16();
687   }
688 
689   bool isAISrcF32() const {
690     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
691   }
692 
693   bool isAISrcF16() const {
694     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
695   }
696 
697   bool isAISrcV2F16() const {
698     return isAISrcF16() || isAISrcB32();
699   }
700 
701   bool isAISrc_64B64() const {
702     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
703   }
704 
705   bool isAISrc_64F64() const {
706     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
707   }
708 
709   bool isAISrc_128B32() const {
710     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
711   }
712 
713   bool isAISrc_128B16() const {
714     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
715   }
716 
717   bool isAISrc_128V2B16() const {
718     return isAISrc_128B16();
719   }
720 
721   bool isAISrc_128F32() const {
722     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
723   }
724 
725   bool isAISrc_128F16() const {
726     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
727   }
728 
729   bool isAISrc_128V2F16() const {
730     return isAISrc_128F16() || isAISrc_128B32();
731   }
732 
733   bool isVISrc_128F16() const {
734     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
735   }
736 
737   bool isVISrc_128V2F16() const {
738     return isVISrc_128F16() || isVISrc_128B32();
739   }
740 
741   bool isAISrc_256B64() const {
742     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
743   }
744 
745   bool isAISrc_256F64() const {
746     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
747   }
748 
749   bool isAISrc_512B32() const {
750     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
751   }
752 
753   bool isAISrc_512B16() const {
754     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
755   }
756 
757   bool isAISrc_512V2B16() const {
758     return isAISrc_512B16();
759   }
760 
761   bool isAISrc_512F32() const {
762     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
763   }
764 
765   bool isAISrc_512F16() const {
766     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
767   }
768 
769   bool isAISrc_512V2F16() const {
770     return isAISrc_512F16() || isAISrc_512B32();
771   }
772 
773   bool isAISrc_1024B32() const {
774     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
775   }
776 
777   bool isAISrc_1024B16() const {
778     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
779   }
780 
781   bool isAISrc_1024V2B16() const {
782     return isAISrc_1024B16();
783   }
784 
785   bool isAISrc_1024F32() const {
786     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
787   }
788 
789   bool isAISrc_1024F16() const {
790     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
791   }
792 
793   bool isAISrc_1024V2F16() const {
794     return isAISrc_1024F16() || isAISrc_1024B32();
795   }
796 
797   bool isKImmFP32() const {
798     return isLiteralImm(MVT::f32);
799   }
800 
801   bool isKImmFP16() const {
802     return isLiteralImm(MVT::f16);
803   }
804 
805   bool isMem() const override {
806     return false;
807   }
808 
809   bool isExpr() const {
810     return Kind == Expression;
811   }
812 
813   bool isSoppBrTarget() const {
814     return isExpr() || isImm();
815   }
816 
817   bool isSWaitCnt() const;
818   bool isHwreg() const;
819   bool isSendMsg() const;
820   bool isSwizzle() const;
821   bool isSMRDOffset8() const;
822   bool isSMEMOffset() const;
823   bool isSMRDLiteralOffset() const;
824   bool isDPP8() const;
825   bool isDPPCtrl() const;
826   bool isBLGP() const;
827   bool isCBSZ() const;
828   bool isABID() const;
829   bool isGPRIdxMode() const;
830   bool isS16Imm() const;
831   bool isU16Imm() const;
832   bool isEndpgm() const;
833 
834   StringRef getExpressionAsToken() const {
835     assert(isExpr());
836     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
837     return S->getSymbol().getName();
838   }
839 
840   StringRef getToken() const {
841     assert(isToken());
842 
843     if (Kind == Expression)
844       return getExpressionAsToken();
845 
846     return StringRef(Tok.Data, Tok.Length);
847   }
848 
849   int64_t getImm() const {
850     assert(isImm());
851     return Imm.Val;
852   }
853 
854   void setImm(int64_t Val) {
855     assert(isImm());
856     Imm.Val = Val;
857   }
858 
859   ImmTy getImmTy() const {
860     assert(isImm());
861     return Imm.Type;
862   }
863 
864   unsigned getReg() const override {
865     assert(isRegKind());
866     return Reg.RegNo;
867   }
868 
869   SMLoc getStartLoc() const override {
870     return StartLoc;
871   }
872 
873   SMLoc getEndLoc() const override {
874     return EndLoc;
875   }
876 
877   SMRange getLocRange() const {
878     return SMRange(StartLoc, EndLoc);
879   }
880 
881   Modifiers getModifiers() const {
882     assert(isRegKind() || isImmTy(ImmTyNone));
883     return isRegKind() ? Reg.Mods : Imm.Mods;
884   }
885 
886   void setModifiers(Modifiers Mods) {
887     assert(isRegKind() || isImmTy(ImmTyNone));
888     if (isRegKind())
889       Reg.Mods = Mods;
890     else
891       Imm.Mods = Mods;
892   }
893 
894   bool hasModifiers() const {
895     return getModifiers().hasModifiers();
896   }
897 
898   bool hasFPModifiers() const {
899     return getModifiers().hasFPModifiers();
900   }
901 
902   bool hasIntModifiers() const {
903     return getModifiers().hasIntModifiers();
904   }
905 
906   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
907 
908   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
909 
910   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
911 
912   template <unsigned Bitwidth>
913   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
914 
915   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
916     addKImmFPOperands<16>(Inst, N);
917   }
918 
919   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
920     addKImmFPOperands<32>(Inst, N);
921   }
922 
923   void addRegOperands(MCInst &Inst, unsigned N) const;
924 
925   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
926     addRegOperands(Inst, N);
927   }
928 
929   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
930     if (isRegKind())
931       addRegOperands(Inst, N);
932     else if (isExpr())
933       Inst.addOperand(MCOperand::createExpr(Expr));
934     else
935       addImmOperands(Inst, N);
936   }
937 
938   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
939     Modifiers Mods = getModifiers();
940     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
941     if (isRegKind()) {
942       addRegOperands(Inst, N);
943     } else {
944       addImmOperands(Inst, N, false);
945     }
946   }
947 
948   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
949     assert(!hasIntModifiers());
950     addRegOrImmWithInputModsOperands(Inst, N);
951   }
952 
953   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
954     assert(!hasFPModifiers());
955     addRegOrImmWithInputModsOperands(Inst, N);
956   }
957 
958   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
959     Modifiers Mods = getModifiers();
960     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
961     assert(isRegKind());
962     addRegOperands(Inst, N);
963   }
964 
965   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
966     assert(!hasIntModifiers());
967     addRegWithInputModsOperands(Inst, N);
968   }
969 
970   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
971     assert(!hasFPModifiers());
972     addRegWithInputModsOperands(Inst, N);
973   }
974 
975   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
976     if (isImm())
977       addImmOperands(Inst, N);
978     else {
979       assert(isExpr());
980       Inst.addOperand(MCOperand::createExpr(Expr));
981     }
982   }
983 
984   static void printImmTy(raw_ostream& OS, ImmTy Type) {
985     switch (Type) {
986     case ImmTyNone: OS << "None"; break;
987     case ImmTyGDS: OS << "GDS"; break;
988     case ImmTyLDS: OS << "LDS"; break;
989     case ImmTyOffen: OS << "Offen"; break;
990     case ImmTyIdxen: OS << "Idxen"; break;
991     case ImmTyAddr64: OS << "Addr64"; break;
992     case ImmTyOffset: OS << "Offset"; break;
993     case ImmTyInstOffset: OS << "InstOffset"; break;
994     case ImmTyOffset0: OS << "Offset0"; break;
995     case ImmTyOffset1: OS << "Offset1"; break;
996     case ImmTyCPol: OS << "CPol"; break;
997     case ImmTySWZ: OS << "SWZ"; break;
998     case ImmTyTFE: OS << "TFE"; break;
999     case ImmTyD16: OS << "D16"; break;
1000     case ImmTyFORMAT: OS << "FORMAT"; break;
1001     case ImmTyClampSI: OS << "ClampSI"; break;
1002     case ImmTyOModSI: OS << "OModSI"; break;
1003     case ImmTyDPP8: OS << "DPP8"; break;
1004     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1005     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1006     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1007     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1008     case ImmTyDppFi: OS << "FI"; break;
1009     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1010     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1011     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1012     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1013     case ImmTyDMask: OS << "DMask"; break;
1014     case ImmTyDim: OS << "Dim"; break;
1015     case ImmTyUNorm: OS << "UNorm"; break;
1016     case ImmTyDA: OS << "DA"; break;
1017     case ImmTyR128A16: OS << "R128A16"; break;
1018     case ImmTyA16: OS << "A16"; break;
1019     case ImmTyLWE: OS << "LWE"; break;
1020     case ImmTyOff: OS << "Off"; break;
1021     case ImmTyExpTgt: OS << "ExpTgt"; break;
1022     case ImmTyExpCompr: OS << "ExpCompr"; break;
1023     case ImmTyExpVM: OS << "ExpVM"; break;
1024     case ImmTyHwreg: OS << "Hwreg"; break;
1025     case ImmTySendMsg: OS << "SendMsg"; break;
1026     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1027     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1028     case ImmTyAttrChan: OS << "AttrChan"; break;
1029     case ImmTyOpSel: OS << "OpSel"; break;
1030     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1031     case ImmTyNegLo: OS << "NegLo"; break;
1032     case ImmTyNegHi: OS << "NegHi"; break;
1033     case ImmTySwizzle: OS << "Swizzle"; break;
1034     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1035     case ImmTyHigh: OS << "High"; break;
1036     case ImmTyBLGP: OS << "BLGP"; break;
1037     case ImmTyCBSZ: OS << "CBSZ"; break;
1038     case ImmTyABID: OS << "ABID"; break;
1039     case ImmTyEndpgm: OS << "Endpgm"; break;
1040     }
1041   }
1042 
1043   void print(raw_ostream &OS) const override {
1044     switch (Kind) {
1045     case Register:
1046       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1047       break;
1048     case Immediate:
1049       OS << '<' << getImm();
1050       if (getImmTy() != ImmTyNone) {
1051         OS << " type: "; printImmTy(OS, getImmTy());
1052       }
1053       OS << " mods: " << Imm.Mods << '>';
1054       break;
1055     case Token:
1056       OS << '\'' << getToken() << '\'';
1057       break;
1058     case Expression:
1059       OS << "<expr " << *Expr << '>';
1060       break;
1061     }
1062   }
1063 
1064   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1065                                       int64_t Val, SMLoc Loc,
1066                                       ImmTy Type = ImmTyNone,
1067                                       bool IsFPImm = false) {
1068     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1069     Op->Imm.Val = Val;
1070     Op->Imm.IsFPImm = IsFPImm;
1071     Op->Imm.Kind = ImmKindTyNone;
1072     Op->Imm.Type = Type;
1073     Op->Imm.Mods = Modifiers();
1074     Op->StartLoc = Loc;
1075     Op->EndLoc = Loc;
1076     return Op;
1077   }
1078 
1079   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1080                                         StringRef Str, SMLoc Loc,
1081                                         bool HasExplicitEncodingSize = true) {
1082     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1083     Res->Tok.Data = Str.data();
1084     Res->Tok.Length = Str.size();
1085     Res->StartLoc = Loc;
1086     Res->EndLoc = Loc;
1087     return Res;
1088   }
1089 
1090   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1091                                       unsigned RegNo, SMLoc S,
1092                                       SMLoc E) {
1093     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1094     Op->Reg.RegNo = RegNo;
1095     Op->Reg.Mods = Modifiers();
1096     Op->StartLoc = S;
1097     Op->EndLoc = E;
1098     return Op;
1099   }
1100 
1101   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1102                                        const class MCExpr *Expr, SMLoc S) {
1103     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1104     Op->Expr = Expr;
1105     Op->StartLoc = S;
1106     Op->EndLoc = S;
1107     return Op;
1108   }
1109 };
1110 
1111 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1112   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1113   return OS;
1114 }
1115 
1116 //===----------------------------------------------------------------------===//
1117 // AsmParser
1118 //===----------------------------------------------------------------------===//
1119 
1120 // Holds info related to the current kernel, e.g. count of SGPRs used.
1121 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1122 // .amdgpu_hsa_kernel or at EOF.
1123 class KernelScopeInfo {
1124   int SgprIndexUnusedMin = -1;
1125   int VgprIndexUnusedMin = -1;
1126   MCContext *Ctx = nullptr;
1127 
1128   void usesSgprAt(int i) {
1129     if (i >= SgprIndexUnusedMin) {
1130       SgprIndexUnusedMin = ++i;
1131       if (Ctx) {
1132         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1133         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1134       }
1135     }
1136   }
1137 
1138   void usesVgprAt(int i) {
1139     if (i >= VgprIndexUnusedMin) {
1140       VgprIndexUnusedMin = ++i;
1141       if (Ctx) {
1142         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1143         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1144       }
1145     }
1146   }
1147 
1148 public:
1149   KernelScopeInfo() = default;
1150 
1151   void initialize(MCContext &Context) {
1152     Ctx = &Context;
1153     usesSgprAt(SgprIndexUnusedMin = -1);
1154     usesVgprAt(VgprIndexUnusedMin = -1);
1155   }
1156 
1157   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1158     switch (RegKind) {
1159       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1160       case IS_AGPR: // fall through
1161       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1162       default: break;
1163     }
1164   }
1165 };
1166 
1167 class AMDGPUAsmParser : public MCTargetAsmParser {
1168   MCAsmParser &Parser;
1169 
1170   // Number of extra operands parsed after the first optional operand.
1171   // This may be necessary to skip hardcoded mandatory operands.
1172   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1173 
1174   unsigned ForcedEncodingSize = 0;
1175   bool ForcedDPP = false;
1176   bool ForcedSDWA = false;
1177   KernelScopeInfo KernelScope;
1178   unsigned CPolSeen;
1179 
1180   /// @name Auto-generated Match Functions
1181   /// {
1182 
1183 #define GET_ASSEMBLER_HEADER
1184 #include "AMDGPUGenAsmMatcher.inc"
1185 
1186   /// }
1187 
1188 private:
1189   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1190   bool OutOfRangeError(SMRange Range);
1191   /// Calculate VGPR/SGPR blocks required for given target, reserved
1192   /// registers, and user-specified NextFreeXGPR values.
1193   ///
1194   /// \param Features [in] Target features, used for bug corrections.
1195   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1196   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1197   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1198   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1199   /// descriptor field, if valid.
1200   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1201   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1202   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1203   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1204   /// \param VGPRBlocks [out] Result VGPR block count.
1205   /// \param SGPRBlocks [out] Result SGPR block count.
1206   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1207                           bool FlatScrUsed, bool XNACKUsed,
1208                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1209                           SMRange VGPRRange, unsigned NextFreeSGPR,
1210                           SMRange SGPRRange, unsigned &VGPRBlocks,
1211                           unsigned &SGPRBlocks);
1212   bool ParseDirectiveAMDGCNTarget();
1213   bool ParseDirectiveAMDHSAKernel();
1214   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1215   bool ParseDirectiveHSACodeObjectVersion();
1216   bool ParseDirectiveHSACodeObjectISA();
1217   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1218   bool ParseDirectiveAMDKernelCodeT();
1219   // TODO: Possibly make subtargetHasRegister const.
1220   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1221   bool ParseDirectiveAMDGPUHsaKernel();
1222 
1223   bool ParseDirectiveISAVersion();
1224   bool ParseDirectiveHSAMetadata();
1225   bool ParseDirectivePALMetadataBegin();
1226   bool ParseDirectivePALMetadata();
1227   bool ParseDirectiveAMDGPULDS();
1228 
1229   /// Common code to parse out a block of text (typically YAML) between start and
1230   /// end directives.
1231   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1232                            const char *AssemblerDirectiveEnd,
1233                            std::string &CollectString);
1234 
1235   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1236                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1237   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1238                            unsigned &RegNum, unsigned &RegWidth,
1239                            bool RestoreOnFailure = false);
1240   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1241                            unsigned &RegNum, unsigned &RegWidth,
1242                            SmallVectorImpl<AsmToken> &Tokens);
1243   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1244                            unsigned &RegWidth,
1245                            SmallVectorImpl<AsmToken> &Tokens);
1246   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1247                            unsigned &RegWidth,
1248                            SmallVectorImpl<AsmToken> &Tokens);
1249   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1250                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1251   bool ParseRegRange(unsigned& Num, unsigned& Width);
1252   unsigned getRegularReg(RegisterKind RegKind,
1253                          unsigned RegNum,
1254                          unsigned RegWidth,
1255                          SMLoc Loc);
1256 
1257   bool isRegister();
1258   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1259   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1260   void initializeGprCountSymbol(RegisterKind RegKind);
1261   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1262                              unsigned RegWidth);
1263   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1264                     bool IsAtomic, bool IsLds = false);
1265   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1266                  bool IsGdsHardcoded);
1267 
1268 public:
1269   enum AMDGPUMatchResultTy {
1270     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1271   };
1272   enum OperandMode {
1273     OperandMode_Default,
1274     OperandMode_NSA,
1275   };
1276 
1277   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1278 
1279   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1280                const MCInstrInfo &MII,
1281                const MCTargetOptions &Options)
1282       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1283     MCAsmParserExtension::Initialize(Parser);
1284 
1285     if (getFeatureBits().none()) {
1286       // Set default features.
1287       copySTI().ToggleFeature("southern-islands");
1288     }
1289 
1290     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1291 
1292     {
1293       // TODO: make those pre-defined variables read-only.
1294       // Currently there is none suitable machinery in the core llvm-mc for this.
1295       // MCSymbol::isRedefinable is intended for another purpose, and
1296       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1297       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1298       MCContext &Ctx = getContext();
1299       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1300         MCSymbol *Sym =
1301             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1302         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1303         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1304         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1305         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1306         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1307       } else {
1308         MCSymbol *Sym =
1309             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1310         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1311         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1312         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1313         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1314         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1315       }
1316       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1317         initializeGprCountSymbol(IS_VGPR);
1318         initializeGprCountSymbol(IS_SGPR);
1319       } else
1320         KernelScope.initialize(getContext());
1321     }
1322   }
1323 
1324   bool hasMIMG_R128() const {
1325     return AMDGPU::hasMIMG_R128(getSTI());
1326   }
1327 
1328   bool hasPackedD16() const {
1329     return AMDGPU::hasPackedD16(getSTI());
1330   }
1331 
1332   bool hasGFX10A16() const {
1333     return AMDGPU::hasGFX10A16(getSTI());
1334   }
1335 
1336   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1337 
1338   bool isSI() const {
1339     return AMDGPU::isSI(getSTI());
1340   }
1341 
1342   bool isCI() const {
1343     return AMDGPU::isCI(getSTI());
1344   }
1345 
1346   bool isVI() const {
1347     return AMDGPU::isVI(getSTI());
1348   }
1349 
1350   bool isGFX9() const {
1351     return AMDGPU::isGFX9(getSTI());
1352   }
1353 
1354   bool isGFX90A() const {
1355     return AMDGPU::isGFX90A(getSTI());
1356   }
1357 
1358   bool isGFX9Plus() const {
1359     return AMDGPU::isGFX9Plus(getSTI());
1360   }
1361 
1362   bool isGFX10() const {
1363     return AMDGPU::isGFX10(getSTI());
1364   }
1365 
1366   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1367 
1368   bool isGFX10_BEncoding() const {
1369     return AMDGPU::isGFX10_BEncoding(getSTI());
1370   }
1371 
1372   bool hasInv2PiInlineImm() const {
1373     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1374   }
1375 
1376   bool hasFlatOffsets() const {
1377     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1378   }
1379 
1380   bool hasArchitectedFlatScratch() const {
1381     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1382   }
1383 
1384   bool hasSGPR102_SGPR103() const {
1385     return !isVI() && !isGFX9();
1386   }
1387 
1388   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1389 
1390   bool hasIntClamp() const {
1391     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1392   }
1393 
1394   AMDGPUTargetStreamer &getTargetStreamer() {
1395     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1396     return static_cast<AMDGPUTargetStreamer &>(TS);
1397   }
1398 
1399   const MCRegisterInfo *getMRI() const {
1400     // We need this const_cast because for some reason getContext() is not const
1401     // in MCAsmParser.
1402     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1403   }
1404 
1405   const MCInstrInfo *getMII() const {
1406     return &MII;
1407   }
1408 
1409   const FeatureBitset &getFeatureBits() const {
1410     return getSTI().getFeatureBits();
1411   }
1412 
1413   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1414   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1415   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1416 
1417   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1418   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1419   bool isForcedDPP() const { return ForcedDPP; }
1420   bool isForcedSDWA() const { return ForcedSDWA; }
1421   ArrayRef<unsigned> getMatchedVariants() const;
1422   StringRef getMatchedVariantName() const;
1423 
1424   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1425   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1426                      bool RestoreOnFailure);
1427   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1428   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1429                                         SMLoc &EndLoc) override;
1430   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1431   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1432                                       unsigned Kind) override;
1433   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1434                                OperandVector &Operands, MCStreamer &Out,
1435                                uint64_t &ErrorInfo,
1436                                bool MatchingInlineAsm) override;
1437   bool ParseDirective(AsmToken DirectiveID) override;
1438   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1439                                     OperandMode Mode = OperandMode_Default);
1440   StringRef parseMnemonicSuffix(StringRef Name);
1441   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1442                         SMLoc NameLoc, OperandVector &Operands) override;
1443   //bool ProcessInstruction(MCInst &Inst);
1444 
1445   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1446 
1447   OperandMatchResultTy
1448   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1449                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1450                      bool (*ConvertResult)(int64_t &) = nullptr);
1451 
1452   OperandMatchResultTy
1453   parseOperandArrayWithPrefix(const char *Prefix,
1454                               OperandVector &Operands,
1455                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1456                               bool (*ConvertResult)(int64_t&) = nullptr);
1457 
1458   OperandMatchResultTy
1459   parseNamedBit(StringRef Name, OperandVector &Operands,
1460                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1461   OperandMatchResultTy parseCPol(OperandVector &Operands);
1462   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1463                                              StringRef &Value,
1464                                              SMLoc &StringLoc);
1465 
1466   bool isModifier();
1467   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1468   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1469   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1470   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1471   bool parseSP3NegModifier();
1472   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1473   OperandMatchResultTy parseReg(OperandVector &Operands);
1474   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1475   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1476   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1477   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1478   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1479   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1480   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1481   OperandMatchResultTy parseUfmt(int64_t &Format);
1482   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1483   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1484   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1485   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1486   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1487   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1488   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1489 
1490   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1491   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1492   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1493   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1494 
1495   bool parseCnt(int64_t &IntVal);
1496   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1497   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1498 
1499 private:
1500   struct OperandInfoTy {
1501     SMLoc Loc;
1502     int64_t Id;
1503     bool IsSymbolic = false;
1504     bool IsDefined = false;
1505 
1506     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1507   };
1508 
1509   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1510   bool validateSendMsg(const OperandInfoTy &Msg,
1511                        const OperandInfoTy &Op,
1512                        const OperandInfoTy &Stream);
1513 
1514   bool parseHwregBody(OperandInfoTy &HwReg,
1515                       OperandInfoTy &Offset,
1516                       OperandInfoTy &Width);
1517   bool validateHwreg(const OperandInfoTy &HwReg,
1518                      const OperandInfoTy &Offset,
1519                      const OperandInfoTy &Width);
1520 
1521   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1522   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1523 
1524   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1525                       const OperandVector &Operands) const;
1526   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1527   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1528   SMLoc getLitLoc(const OperandVector &Operands) const;
1529   SMLoc getConstLoc(const OperandVector &Operands) const;
1530 
1531   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1532   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1533   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1534   bool validateSOPLiteral(const MCInst &Inst) const;
1535   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1536   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1537   bool validateIntClampSupported(const MCInst &Inst);
1538   bool validateMIMGAtomicDMask(const MCInst &Inst);
1539   bool validateMIMGGatherDMask(const MCInst &Inst);
1540   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1541   bool validateMIMGDataSize(const MCInst &Inst);
1542   bool validateMIMGAddrSize(const MCInst &Inst);
1543   bool validateMIMGD16(const MCInst &Inst);
1544   bool validateMIMGDim(const MCInst &Inst);
1545   bool validateMIMGMSAA(const MCInst &Inst);
1546   bool validateOpSel(const MCInst &Inst);
1547   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1548   bool validateVccOperand(unsigned Reg) const;
1549   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1550   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1551   bool validateAGPRLdSt(const MCInst &Inst) const;
1552   bool validateVGPRAlign(const MCInst &Inst) const;
1553   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1554   bool validateDivScale(const MCInst &Inst);
1555   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1556                              const SMLoc &IDLoc);
1557   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1558   unsigned getConstantBusLimit(unsigned Opcode) const;
1559   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1560   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1561   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1562 
1563   bool isSupportedMnemo(StringRef Mnemo,
1564                         const FeatureBitset &FBS);
1565   bool isSupportedMnemo(StringRef Mnemo,
1566                         const FeatureBitset &FBS,
1567                         ArrayRef<unsigned> Variants);
1568   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1569 
1570   bool isId(const StringRef Id) const;
1571   bool isId(const AsmToken &Token, const StringRef Id) const;
1572   bool isToken(const AsmToken::TokenKind Kind) const;
1573   bool trySkipId(const StringRef Id);
1574   bool trySkipId(const StringRef Pref, const StringRef Id);
1575   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1576   bool trySkipToken(const AsmToken::TokenKind Kind);
1577   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1578   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1579   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1580 
1581   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1582   AsmToken::TokenKind getTokenKind() const;
1583   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1584   bool parseExpr(OperandVector &Operands);
1585   StringRef getTokenStr() const;
1586   AsmToken peekToken();
1587   AsmToken getToken() const;
1588   SMLoc getLoc() const;
1589   void lex();
1590 
1591 public:
1592   void onBeginOfFile() override;
1593 
1594   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1595   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1596 
1597   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1598   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1599   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1600   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1601   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1602   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1603 
1604   bool parseSwizzleOperand(int64_t &Op,
1605                            const unsigned MinVal,
1606                            const unsigned MaxVal,
1607                            const StringRef ErrMsg,
1608                            SMLoc &Loc);
1609   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1610                             const unsigned MinVal,
1611                             const unsigned MaxVal,
1612                             const StringRef ErrMsg);
1613   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1614   bool parseSwizzleOffset(int64_t &Imm);
1615   bool parseSwizzleMacro(int64_t &Imm);
1616   bool parseSwizzleQuadPerm(int64_t &Imm);
1617   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1618   bool parseSwizzleBroadcast(int64_t &Imm);
1619   bool parseSwizzleSwap(int64_t &Imm);
1620   bool parseSwizzleReverse(int64_t &Imm);
1621 
1622   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1623   int64_t parseGPRIdxMacro();
1624 
1625   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1626   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1627   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1628   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1629 
1630   AMDGPUOperand::Ptr defaultCPol() const;
1631 
1632   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1633   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1634   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1635   AMDGPUOperand::Ptr defaultFlatOffset() const;
1636 
1637   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1638 
1639   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1640                OptionalImmIndexMap &OptionalIdx);
1641   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1642   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1643   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1644   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1645                 OptionalImmIndexMap &OptionalIdx);
1646 
1647   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1648 
1649   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1650                bool IsAtomic = false);
1651   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1652   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1653 
1654   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1655 
1656   bool parseDimId(unsigned &Encoding);
1657   OperandMatchResultTy parseDim(OperandVector &Operands);
1658   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1659   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1660   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1661   int64_t parseDPPCtrlSel(StringRef Ctrl);
1662   int64_t parseDPPCtrlPerm();
1663   AMDGPUOperand::Ptr defaultRowMask() const;
1664   AMDGPUOperand::Ptr defaultBankMask() const;
1665   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1666   AMDGPUOperand::Ptr defaultFI() const;
1667   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1668   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1669 
1670   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1671                                     AMDGPUOperand::ImmTy Type);
1672   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1673   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1674   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1675   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1676   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1677   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1678   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1679                uint64_t BasicInstType,
1680                bool SkipDstVcc = false,
1681                bool SkipSrcVcc = false);
1682 
1683   AMDGPUOperand::Ptr defaultBLGP() const;
1684   AMDGPUOperand::Ptr defaultCBSZ() const;
1685   AMDGPUOperand::Ptr defaultABID() const;
1686 
1687   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1688   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1689 };
1690 
1691 struct OptionalOperand {
1692   const char *Name;
1693   AMDGPUOperand::ImmTy Type;
1694   bool IsBit;
1695   bool (*ConvertResult)(int64_t&);
1696 };
1697 
1698 } // end anonymous namespace
1699 
1700 // May be called with integer type with equivalent bitwidth.
1701 static const fltSemantics *getFltSemantics(unsigned Size) {
1702   switch (Size) {
1703   case 4:
1704     return &APFloat::IEEEsingle();
1705   case 8:
1706     return &APFloat::IEEEdouble();
1707   case 2:
1708     return &APFloat::IEEEhalf();
1709   default:
1710     llvm_unreachable("unsupported fp type");
1711   }
1712 }
1713 
1714 static const fltSemantics *getFltSemantics(MVT VT) {
1715   return getFltSemantics(VT.getSizeInBits() / 8);
1716 }
1717 
1718 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1719   switch (OperandType) {
1720   case AMDGPU::OPERAND_REG_IMM_INT32:
1721   case AMDGPU::OPERAND_REG_IMM_FP32:
1722   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1723   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1724   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1725   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1726   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1727   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1728   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1729   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1730   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1731   case AMDGPU::OPERAND_KIMM32:
1732     return &APFloat::IEEEsingle();
1733   case AMDGPU::OPERAND_REG_IMM_INT64:
1734   case AMDGPU::OPERAND_REG_IMM_FP64:
1735   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1736   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1737   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1738     return &APFloat::IEEEdouble();
1739   case AMDGPU::OPERAND_REG_IMM_INT16:
1740   case AMDGPU::OPERAND_REG_IMM_FP16:
1741   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1742   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1743   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1744   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1745   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1746   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1747   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1748   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1749   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1750   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1751   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1752   case AMDGPU::OPERAND_KIMM16:
1753     return &APFloat::IEEEhalf();
1754   default:
1755     llvm_unreachable("unsupported fp type");
1756   }
1757 }
1758 
1759 //===----------------------------------------------------------------------===//
1760 // Operand
1761 //===----------------------------------------------------------------------===//
1762 
1763 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1764   bool Lost;
1765 
1766   // Convert literal to single precision
1767   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1768                                                APFloat::rmNearestTiesToEven,
1769                                                &Lost);
1770   // We allow precision lost but not overflow or underflow
1771   if (Status != APFloat::opOK &&
1772       Lost &&
1773       ((Status & APFloat::opOverflow)  != 0 ||
1774        (Status & APFloat::opUnderflow) != 0)) {
1775     return false;
1776   }
1777 
1778   return true;
1779 }
1780 
1781 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1782   return isUIntN(Size, Val) || isIntN(Size, Val);
1783 }
1784 
1785 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1786   if (VT.getScalarType() == MVT::i16) {
1787     // FP immediate values are broken.
1788     return isInlinableIntLiteral(Val);
1789   }
1790 
1791   // f16/v2f16 operands work correctly for all values.
1792   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1793 }
1794 
1795 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1796 
1797   // This is a hack to enable named inline values like
1798   // shared_base with both 32-bit and 64-bit operands.
1799   // Note that these values are defined as
1800   // 32-bit operands only.
1801   if (isInlineValue()) {
1802     return true;
1803   }
1804 
1805   if (!isImmTy(ImmTyNone)) {
1806     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1807     return false;
1808   }
1809   // TODO: We should avoid using host float here. It would be better to
1810   // check the float bit values which is what a few other places do.
1811   // We've had bot failures before due to weird NaN support on mips hosts.
1812 
1813   APInt Literal(64, Imm.Val);
1814 
1815   if (Imm.IsFPImm) { // We got fp literal token
1816     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1817       return AMDGPU::isInlinableLiteral64(Imm.Val,
1818                                           AsmParser->hasInv2PiInlineImm());
1819     }
1820 
1821     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1822     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1823       return false;
1824 
1825     if (type.getScalarSizeInBits() == 16) {
1826       return isInlineableLiteralOp16(
1827         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1828         type, AsmParser->hasInv2PiInlineImm());
1829     }
1830 
1831     // Check if single precision literal is inlinable
1832     return AMDGPU::isInlinableLiteral32(
1833       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1834       AsmParser->hasInv2PiInlineImm());
1835   }
1836 
1837   // We got int literal token.
1838   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1839     return AMDGPU::isInlinableLiteral64(Imm.Val,
1840                                         AsmParser->hasInv2PiInlineImm());
1841   }
1842 
1843   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1844     return false;
1845   }
1846 
1847   if (type.getScalarSizeInBits() == 16) {
1848     return isInlineableLiteralOp16(
1849       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1850       type, AsmParser->hasInv2PiInlineImm());
1851   }
1852 
1853   return AMDGPU::isInlinableLiteral32(
1854     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1855     AsmParser->hasInv2PiInlineImm());
1856 }
1857 
1858 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1859   // Check that this immediate can be added as literal
1860   if (!isImmTy(ImmTyNone)) {
1861     return false;
1862   }
1863 
1864   if (!Imm.IsFPImm) {
1865     // We got int literal token.
1866 
1867     if (type == MVT::f64 && hasFPModifiers()) {
1868       // Cannot apply fp modifiers to int literals preserving the same semantics
1869       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1870       // disable these cases.
1871       return false;
1872     }
1873 
1874     unsigned Size = type.getSizeInBits();
1875     if (Size == 64)
1876       Size = 32;
1877 
1878     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1879     // types.
1880     return isSafeTruncation(Imm.Val, Size);
1881   }
1882 
1883   // We got fp literal token
1884   if (type == MVT::f64) { // Expected 64-bit fp operand
1885     // We would set low 64-bits of literal to zeroes but we accept this literals
1886     return true;
1887   }
1888 
1889   if (type == MVT::i64) { // Expected 64-bit int operand
1890     // We don't allow fp literals in 64-bit integer instructions. It is
1891     // unclear how we should encode them.
1892     return false;
1893   }
1894 
1895   // We allow fp literals with f16x2 operands assuming that the specified
1896   // literal goes into the lower half and the upper half is zero. We also
1897   // require that the literal may be losslesly converted to f16.
1898   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1899                      (type == MVT::v2i16)? MVT::i16 :
1900                      (type == MVT::v2f32)? MVT::f32 : type;
1901 
1902   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1903   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1904 }
1905 
1906 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1907   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1908 }
1909 
1910 bool AMDGPUOperand::isVRegWithInputMods() const {
1911   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1912          // GFX90A allows DPP on 64-bit operands.
1913          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1914           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1915 }
1916 
1917 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1918   if (AsmParser->isVI())
1919     return isVReg32();
1920   else if (AsmParser->isGFX9Plus())
1921     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1922   else
1923     return false;
1924 }
1925 
1926 bool AMDGPUOperand::isSDWAFP16Operand() const {
1927   return isSDWAOperand(MVT::f16);
1928 }
1929 
1930 bool AMDGPUOperand::isSDWAFP32Operand() const {
1931   return isSDWAOperand(MVT::f32);
1932 }
1933 
1934 bool AMDGPUOperand::isSDWAInt16Operand() const {
1935   return isSDWAOperand(MVT::i16);
1936 }
1937 
1938 bool AMDGPUOperand::isSDWAInt32Operand() const {
1939   return isSDWAOperand(MVT::i32);
1940 }
1941 
1942 bool AMDGPUOperand::isBoolReg() const {
1943   auto FB = AsmParser->getFeatureBits();
1944   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1945                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1946 }
1947 
1948 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1949 {
1950   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1951   assert(Size == 2 || Size == 4 || Size == 8);
1952 
1953   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1954 
1955   if (Imm.Mods.Abs) {
1956     Val &= ~FpSignMask;
1957   }
1958   if (Imm.Mods.Neg) {
1959     Val ^= FpSignMask;
1960   }
1961 
1962   return Val;
1963 }
1964 
1965 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1966   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1967                              Inst.getNumOperands())) {
1968     addLiteralImmOperand(Inst, Imm.Val,
1969                          ApplyModifiers &
1970                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1971   } else {
1972     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1973     Inst.addOperand(MCOperand::createImm(Imm.Val));
1974     setImmKindNone();
1975   }
1976 }
1977 
1978 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1979   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1980   auto OpNum = Inst.getNumOperands();
1981   // Check that this operand accepts literals
1982   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1983 
1984   if (ApplyModifiers) {
1985     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1986     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1987     Val = applyInputFPModifiers(Val, Size);
1988   }
1989 
1990   APInt Literal(64, Val);
1991   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1992 
1993   if (Imm.IsFPImm) { // We got fp literal token
1994     switch (OpTy) {
1995     case AMDGPU::OPERAND_REG_IMM_INT64:
1996     case AMDGPU::OPERAND_REG_IMM_FP64:
1997     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1998     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1999     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2000       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2001                                        AsmParser->hasInv2PiInlineImm())) {
2002         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2003         setImmKindConst();
2004         return;
2005       }
2006 
2007       // Non-inlineable
2008       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2009         // For fp operands we check if low 32 bits are zeros
2010         if (Literal.getLoBits(32) != 0) {
2011           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2012           "Can't encode literal as exact 64-bit floating-point operand. "
2013           "Low 32-bits will be set to zero");
2014         }
2015 
2016         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2017         setImmKindLiteral();
2018         return;
2019       }
2020 
2021       // We don't allow fp literals in 64-bit integer instructions. It is
2022       // unclear how we should encode them. This case should be checked earlier
2023       // in predicate methods (isLiteralImm())
2024       llvm_unreachable("fp literal in 64-bit integer instruction.");
2025 
2026     case AMDGPU::OPERAND_REG_IMM_INT32:
2027     case AMDGPU::OPERAND_REG_IMM_FP32:
2028     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2029     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2030     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2031     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2032     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2033     case AMDGPU::OPERAND_REG_IMM_INT16:
2034     case AMDGPU::OPERAND_REG_IMM_FP16:
2035     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2036     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2037     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2038     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2039     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2040     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2041     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2042     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2043     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2044     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2045     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2046     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2047     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2048     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2049     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2050     case AMDGPU::OPERAND_KIMM32:
2051     case AMDGPU::OPERAND_KIMM16: {
2052       bool lost;
2053       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2054       // Convert literal to single precision
2055       FPLiteral.convert(*getOpFltSemantics(OpTy),
2056                         APFloat::rmNearestTiesToEven, &lost);
2057       // We allow precision lost but not overflow or underflow. This should be
2058       // checked earlier in isLiteralImm()
2059 
2060       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2061       Inst.addOperand(MCOperand::createImm(ImmVal));
2062       setImmKindLiteral();
2063       return;
2064     }
2065     default:
2066       llvm_unreachable("invalid operand size");
2067     }
2068 
2069     return;
2070   }
2071 
2072   // We got int literal token.
2073   // Only sign extend inline immediates.
2074   switch (OpTy) {
2075   case AMDGPU::OPERAND_REG_IMM_INT32:
2076   case AMDGPU::OPERAND_REG_IMM_FP32:
2077   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2078   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2079   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2080   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2081   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2082   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2083   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2084   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2085   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2086   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2087   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2088     if (isSafeTruncation(Val, 32) &&
2089         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2090                                      AsmParser->hasInv2PiInlineImm())) {
2091       Inst.addOperand(MCOperand::createImm(Val));
2092       setImmKindConst();
2093       return;
2094     }
2095 
2096     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2097     setImmKindLiteral();
2098     return;
2099 
2100   case AMDGPU::OPERAND_REG_IMM_INT64:
2101   case AMDGPU::OPERAND_REG_IMM_FP64:
2102   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2103   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2104   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2105     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2106       Inst.addOperand(MCOperand::createImm(Val));
2107       setImmKindConst();
2108       return;
2109     }
2110 
2111     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2112     setImmKindLiteral();
2113     return;
2114 
2115   case AMDGPU::OPERAND_REG_IMM_INT16:
2116   case AMDGPU::OPERAND_REG_IMM_FP16:
2117   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2118   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2119   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2120   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2121   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2122     if (isSafeTruncation(Val, 16) &&
2123         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2124                                      AsmParser->hasInv2PiInlineImm())) {
2125       Inst.addOperand(MCOperand::createImm(Val));
2126       setImmKindConst();
2127       return;
2128     }
2129 
2130     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2131     setImmKindLiteral();
2132     return;
2133 
2134   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2135   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2136   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2137   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2138     assert(isSafeTruncation(Val, 16));
2139     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2140                                         AsmParser->hasInv2PiInlineImm()));
2141 
2142     Inst.addOperand(MCOperand::createImm(Val));
2143     return;
2144   }
2145   case AMDGPU::OPERAND_KIMM32:
2146     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2147     setImmKindNone();
2148     return;
2149   case AMDGPU::OPERAND_KIMM16:
2150     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2151     setImmKindNone();
2152     return;
2153   default:
2154     llvm_unreachable("invalid operand size");
2155   }
2156 }
2157 
2158 template <unsigned Bitwidth>
2159 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2160   APInt Literal(64, Imm.Val);
2161   setImmKindNone();
2162 
2163   if (!Imm.IsFPImm) {
2164     // We got int literal token.
2165     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2166     return;
2167   }
2168 
2169   bool Lost;
2170   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2171   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2172                     APFloat::rmNearestTiesToEven, &Lost);
2173   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2174 }
2175 
2176 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2177   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2178 }
2179 
2180 static bool isInlineValue(unsigned Reg) {
2181   switch (Reg) {
2182   case AMDGPU::SRC_SHARED_BASE:
2183   case AMDGPU::SRC_SHARED_LIMIT:
2184   case AMDGPU::SRC_PRIVATE_BASE:
2185   case AMDGPU::SRC_PRIVATE_LIMIT:
2186   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2187     return true;
2188   case AMDGPU::SRC_VCCZ:
2189   case AMDGPU::SRC_EXECZ:
2190   case AMDGPU::SRC_SCC:
2191     return true;
2192   case AMDGPU::SGPR_NULL:
2193     return true;
2194   default:
2195     return false;
2196   }
2197 }
2198 
2199 bool AMDGPUOperand::isInlineValue() const {
2200   return isRegKind() && ::isInlineValue(getReg());
2201 }
2202 
2203 //===----------------------------------------------------------------------===//
2204 // AsmParser
2205 //===----------------------------------------------------------------------===//
2206 
2207 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2208   if (Is == IS_VGPR) {
2209     switch (RegWidth) {
2210       default: return -1;
2211       case 1: return AMDGPU::VGPR_32RegClassID;
2212       case 2: return AMDGPU::VReg_64RegClassID;
2213       case 3: return AMDGPU::VReg_96RegClassID;
2214       case 4: return AMDGPU::VReg_128RegClassID;
2215       case 5: return AMDGPU::VReg_160RegClassID;
2216       case 6: return AMDGPU::VReg_192RegClassID;
2217       case 7: return AMDGPU::VReg_224RegClassID;
2218       case 8: return AMDGPU::VReg_256RegClassID;
2219       case 16: return AMDGPU::VReg_512RegClassID;
2220       case 32: return AMDGPU::VReg_1024RegClassID;
2221     }
2222   } else if (Is == IS_TTMP) {
2223     switch (RegWidth) {
2224       default: return -1;
2225       case 1: return AMDGPU::TTMP_32RegClassID;
2226       case 2: return AMDGPU::TTMP_64RegClassID;
2227       case 4: return AMDGPU::TTMP_128RegClassID;
2228       case 8: return AMDGPU::TTMP_256RegClassID;
2229       case 16: return AMDGPU::TTMP_512RegClassID;
2230     }
2231   } else if (Is == IS_SGPR) {
2232     switch (RegWidth) {
2233       default: return -1;
2234       case 1: return AMDGPU::SGPR_32RegClassID;
2235       case 2: return AMDGPU::SGPR_64RegClassID;
2236       case 3: return AMDGPU::SGPR_96RegClassID;
2237       case 4: return AMDGPU::SGPR_128RegClassID;
2238       case 5: return AMDGPU::SGPR_160RegClassID;
2239       case 6: return AMDGPU::SGPR_192RegClassID;
2240       case 7: return AMDGPU::SGPR_224RegClassID;
2241       case 8: return AMDGPU::SGPR_256RegClassID;
2242       case 16: return AMDGPU::SGPR_512RegClassID;
2243     }
2244   } else if (Is == IS_AGPR) {
2245     switch (RegWidth) {
2246       default: return -1;
2247       case 1: return AMDGPU::AGPR_32RegClassID;
2248       case 2: return AMDGPU::AReg_64RegClassID;
2249       case 3: return AMDGPU::AReg_96RegClassID;
2250       case 4: return AMDGPU::AReg_128RegClassID;
2251       case 5: return AMDGPU::AReg_160RegClassID;
2252       case 6: return AMDGPU::AReg_192RegClassID;
2253       case 7: return AMDGPU::AReg_224RegClassID;
2254       case 8: return AMDGPU::AReg_256RegClassID;
2255       case 16: return AMDGPU::AReg_512RegClassID;
2256       case 32: return AMDGPU::AReg_1024RegClassID;
2257     }
2258   }
2259   return -1;
2260 }
2261 
2262 static unsigned getSpecialRegForName(StringRef RegName) {
2263   return StringSwitch<unsigned>(RegName)
2264     .Case("exec", AMDGPU::EXEC)
2265     .Case("vcc", AMDGPU::VCC)
2266     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2267     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2268     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2269     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2270     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2271     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2272     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2273     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2274     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2275     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2276     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2277     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2278     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2279     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2280     .Case("m0", AMDGPU::M0)
2281     .Case("vccz", AMDGPU::SRC_VCCZ)
2282     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2283     .Case("execz", AMDGPU::SRC_EXECZ)
2284     .Case("src_execz", AMDGPU::SRC_EXECZ)
2285     .Case("scc", AMDGPU::SRC_SCC)
2286     .Case("src_scc", AMDGPU::SRC_SCC)
2287     .Case("tba", AMDGPU::TBA)
2288     .Case("tma", AMDGPU::TMA)
2289     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2290     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2291     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2292     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2293     .Case("vcc_lo", AMDGPU::VCC_LO)
2294     .Case("vcc_hi", AMDGPU::VCC_HI)
2295     .Case("exec_lo", AMDGPU::EXEC_LO)
2296     .Case("exec_hi", AMDGPU::EXEC_HI)
2297     .Case("tma_lo", AMDGPU::TMA_LO)
2298     .Case("tma_hi", AMDGPU::TMA_HI)
2299     .Case("tba_lo", AMDGPU::TBA_LO)
2300     .Case("tba_hi", AMDGPU::TBA_HI)
2301     .Case("pc", AMDGPU::PC_REG)
2302     .Case("null", AMDGPU::SGPR_NULL)
2303     .Default(AMDGPU::NoRegister);
2304 }
2305 
2306 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2307                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2308   auto R = parseRegister();
2309   if (!R) return true;
2310   assert(R->isReg());
2311   RegNo = R->getReg();
2312   StartLoc = R->getStartLoc();
2313   EndLoc = R->getEndLoc();
2314   return false;
2315 }
2316 
2317 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2318                                     SMLoc &EndLoc) {
2319   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2320 }
2321 
2322 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2323                                                        SMLoc &StartLoc,
2324                                                        SMLoc &EndLoc) {
2325   bool Result =
2326       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2327   bool PendingErrors = getParser().hasPendingError();
2328   getParser().clearPendingErrors();
2329   if (PendingErrors)
2330     return MatchOperand_ParseFail;
2331   if (Result)
2332     return MatchOperand_NoMatch;
2333   return MatchOperand_Success;
2334 }
2335 
2336 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2337                                             RegisterKind RegKind, unsigned Reg1,
2338                                             SMLoc Loc) {
2339   switch (RegKind) {
2340   case IS_SPECIAL:
2341     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2342       Reg = AMDGPU::EXEC;
2343       RegWidth = 2;
2344       return true;
2345     }
2346     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2347       Reg = AMDGPU::FLAT_SCR;
2348       RegWidth = 2;
2349       return true;
2350     }
2351     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2352       Reg = AMDGPU::XNACK_MASK;
2353       RegWidth = 2;
2354       return true;
2355     }
2356     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2357       Reg = AMDGPU::VCC;
2358       RegWidth = 2;
2359       return true;
2360     }
2361     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2362       Reg = AMDGPU::TBA;
2363       RegWidth = 2;
2364       return true;
2365     }
2366     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2367       Reg = AMDGPU::TMA;
2368       RegWidth = 2;
2369       return true;
2370     }
2371     Error(Loc, "register does not fit in the list");
2372     return false;
2373   case IS_VGPR:
2374   case IS_SGPR:
2375   case IS_AGPR:
2376   case IS_TTMP:
2377     if (Reg1 != Reg + RegWidth) {
2378       Error(Loc, "registers in a list must have consecutive indices");
2379       return false;
2380     }
2381     RegWidth++;
2382     return true;
2383   default:
2384     llvm_unreachable("unexpected register kind");
2385   }
2386 }
2387 
2388 struct RegInfo {
2389   StringLiteral Name;
2390   RegisterKind Kind;
2391 };
2392 
2393 static constexpr RegInfo RegularRegisters[] = {
2394   {{"v"},    IS_VGPR},
2395   {{"s"},    IS_SGPR},
2396   {{"ttmp"}, IS_TTMP},
2397   {{"acc"},  IS_AGPR},
2398   {{"a"},    IS_AGPR},
2399 };
2400 
2401 static bool isRegularReg(RegisterKind Kind) {
2402   return Kind == IS_VGPR ||
2403          Kind == IS_SGPR ||
2404          Kind == IS_TTMP ||
2405          Kind == IS_AGPR;
2406 }
2407 
2408 static const RegInfo* getRegularRegInfo(StringRef Str) {
2409   for (const RegInfo &Reg : RegularRegisters)
2410     if (Str.startswith(Reg.Name))
2411       return &Reg;
2412   return nullptr;
2413 }
2414 
2415 static bool getRegNum(StringRef Str, unsigned& Num) {
2416   return !Str.getAsInteger(10, Num);
2417 }
2418 
2419 bool
2420 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2421                             const AsmToken &NextToken) const {
2422 
2423   // A list of consecutive registers: [s0,s1,s2,s3]
2424   if (Token.is(AsmToken::LBrac))
2425     return true;
2426 
2427   if (!Token.is(AsmToken::Identifier))
2428     return false;
2429 
2430   // A single register like s0 or a range of registers like s[0:1]
2431 
2432   StringRef Str = Token.getString();
2433   const RegInfo *Reg = getRegularRegInfo(Str);
2434   if (Reg) {
2435     StringRef RegName = Reg->Name;
2436     StringRef RegSuffix = Str.substr(RegName.size());
2437     if (!RegSuffix.empty()) {
2438       unsigned Num;
2439       // A single register with an index: rXX
2440       if (getRegNum(RegSuffix, Num))
2441         return true;
2442     } else {
2443       // A range of registers: r[XX:YY].
2444       if (NextToken.is(AsmToken::LBrac))
2445         return true;
2446     }
2447   }
2448 
2449   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2450 }
2451 
2452 bool
2453 AMDGPUAsmParser::isRegister()
2454 {
2455   return isRegister(getToken(), peekToken());
2456 }
2457 
2458 unsigned
2459 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2460                                unsigned RegNum,
2461                                unsigned RegWidth,
2462                                SMLoc Loc) {
2463 
2464   assert(isRegularReg(RegKind));
2465 
2466   unsigned AlignSize = 1;
2467   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2468     // SGPR and TTMP registers must be aligned.
2469     // Max required alignment is 4 dwords.
2470     AlignSize = std::min(RegWidth, 4u);
2471   }
2472 
2473   if (RegNum % AlignSize != 0) {
2474     Error(Loc, "invalid register alignment");
2475     return AMDGPU::NoRegister;
2476   }
2477 
2478   unsigned RegIdx = RegNum / AlignSize;
2479   int RCID = getRegClass(RegKind, RegWidth);
2480   if (RCID == -1) {
2481     Error(Loc, "invalid or unsupported register size");
2482     return AMDGPU::NoRegister;
2483   }
2484 
2485   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2486   const MCRegisterClass RC = TRI->getRegClass(RCID);
2487   if (RegIdx >= RC.getNumRegs()) {
2488     Error(Loc, "register index is out of range");
2489     return AMDGPU::NoRegister;
2490   }
2491 
2492   return RC.getRegister(RegIdx);
2493 }
2494 
2495 bool
2496 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2497   int64_t RegLo, RegHi;
2498   if (!skipToken(AsmToken::LBrac, "missing register index"))
2499     return false;
2500 
2501   SMLoc FirstIdxLoc = getLoc();
2502   SMLoc SecondIdxLoc;
2503 
2504   if (!parseExpr(RegLo))
2505     return false;
2506 
2507   if (trySkipToken(AsmToken::Colon)) {
2508     SecondIdxLoc = getLoc();
2509     if (!parseExpr(RegHi))
2510       return false;
2511   } else {
2512     RegHi = RegLo;
2513   }
2514 
2515   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2516     return false;
2517 
2518   if (!isUInt<32>(RegLo)) {
2519     Error(FirstIdxLoc, "invalid register index");
2520     return false;
2521   }
2522 
2523   if (!isUInt<32>(RegHi)) {
2524     Error(SecondIdxLoc, "invalid register index");
2525     return false;
2526   }
2527 
2528   if (RegLo > RegHi) {
2529     Error(FirstIdxLoc, "first register index should not exceed second index");
2530     return false;
2531   }
2532 
2533   Num = static_cast<unsigned>(RegLo);
2534   Width = (RegHi - RegLo) + 1;
2535   return true;
2536 }
2537 
2538 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2539                                           unsigned &RegNum, unsigned &RegWidth,
2540                                           SmallVectorImpl<AsmToken> &Tokens) {
2541   assert(isToken(AsmToken::Identifier));
2542   unsigned Reg = getSpecialRegForName(getTokenStr());
2543   if (Reg) {
2544     RegNum = 0;
2545     RegWidth = 1;
2546     RegKind = IS_SPECIAL;
2547     Tokens.push_back(getToken());
2548     lex(); // skip register name
2549   }
2550   return Reg;
2551 }
2552 
2553 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2554                                           unsigned &RegNum, unsigned &RegWidth,
2555                                           SmallVectorImpl<AsmToken> &Tokens) {
2556   assert(isToken(AsmToken::Identifier));
2557   StringRef RegName = getTokenStr();
2558   auto Loc = getLoc();
2559 
2560   const RegInfo *RI = getRegularRegInfo(RegName);
2561   if (!RI) {
2562     Error(Loc, "invalid register name");
2563     return AMDGPU::NoRegister;
2564   }
2565 
2566   Tokens.push_back(getToken());
2567   lex(); // skip register name
2568 
2569   RegKind = RI->Kind;
2570   StringRef RegSuffix = RegName.substr(RI->Name.size());
2571   if (!RegSuffix.empty()) {
2572     // Single 32-bit register: vXX.
2573     if (!getRegNum(RegSuffix, RegNum)) {
2574       Error(Loc, "invalid register index");
2575       return AMDGPU::NoRegister;
2576     }
2577     RegWidth = 1;
2578   } else {
2579     // Range of registers: v[XX:YY]. ":YY" is optional.
2580     if (!ParseRegRange(RegNum, RegWidth))
2581       return AMDGPU::NoRegister;
2582   }
2583 
2584   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2585 }
2586 
2587 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2588                                        unsigned &RegWidth,
2589                                        SmallVectorImpl<AsmToken> &Tokens) {
2590   unsigned Reg = AMDGPU::NoRegister;
2591   auto ListLoc = getLoc();
2592 
2593   if (!skipToken(AsmToken::LBrac,
2594                  "expected a register or a list of registers")) {
2595     return AMDGPU::NoRegister;
2596   }
2597 
2598   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2599 
2600   auto Loc = getLoc();
2601   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2602     return AMDGPU::NoRegister;
2603   if (RegWidth != 1) {
2604     Error(Loc, "expected a single 32-bit register");
2605     return AMDGPU::NoRegister;
2606   }
2607 
2608   for (; trySkipToken(AsmToken::Comma); ) {
2609     RegisterKind NextRegKind;
2610     unsigned NextReg, NextRegNum, NextRegWidth;
2611     Loc = getLoc();
2612 
2613     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2614                              NextRegNum, NextRegWidth,
2615                              Tokens)) {
2616       return AMDGPU::NoRegister;
2617     }
2618     if (NextRegWidth != 1) {
2619       Error(Loc, "expected a single 32-bit register");
2620       return AMDGPU::NoRegister;
2621     }
2622     if (NextRegKind != RegKind) {
2623       Error(Loc, "registers in a list must be of the same kind");
2624       return AMDGPU::NoRegister;
2625     }
2626     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2627       return AMDGPU::NoRegister;
2628   }
2629 
2630   if (!skipToken(AsmToken::RBrac,
2631                  "expected a comma or a closing square bracket")) {
2632     return AMDGPU::NoRegister;
2633   }
2634 
2635   if (isRegularReg(RegKind))
2636     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2637 
2638   return Reg;
2639 }
2640 
2641 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2642                                           unsigned &RegNum, unsigned &RegWidth,
2643                                           SmallVectorImpl<AsmToken> &Tokens) {
2644   auto Loc = getLoc();
2645   Reg = AMDGPU::NoRegister;
2646 
2647   if (isToken(AsmToken::Identifier)) {
2648     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2649     if (Reg == AMDGPU::NoRegister)
2650       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2651   } else {
2652     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2653   }
2654 
2655   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2656   if (Reg == AMDGPU::NoRegister) {
2657     assert(Parser.hasPendingError());
2658     return false;
2659   }
2660 
2661   if (!subtargetHasRegister(*TRI, Reg)) {
2662     if (Reg == AMDGPU::SGPR_NULL) {
2663       Error(Loc, "'null' operand is not supported on this GPU");
2664     } else {
2665       Error(Loc, "register not available on this GPU");
2666     }
2667     return false;
2668   }
2669 
2670   return true;
2671 }
2672 
2673 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2674                                           unsigned &RegNum, unsigned &RegWidth,
2675                                           bool RestoreOnFailure /*=false*/) {
2676   Reg = AMDGPU::NoRegister;
2677 
2678   SmallVector<AsmToken, 1> Tokens;
2679   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2680     if (RestoreOnFailure) {
2681       while (!Tokens.empty()) {
2682         getLexer().UnLex(Tokens.pop_back_val());
2683       }
2684     }
2685     return true;
2686   }
2687   return false;
2688 }
2689 
2690 Optional<StringRef>
2691 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2692   switch (RegKind) {
2693   case IS_VGPR:
2694     return StringRef(".amdgcn.next_free_vgpr");
2695   case IS_SGPR:
2696     return StringRef(".amdgcn.next_free_sgpr");
2697   default:
2698     return None;
2699   }
2700 }
2701 
2702 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2703   auto SymbolName = getGprCountSymbolName(RegKind);
2704   assert(SymbolName && "initializing invalid register kind");
2705   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2706   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2707 }
2708 
2709 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2710                                             unsigned DwordRegIndex,
2711                                             unsigned RegWidth) {
2712   // Symbols are only defined for GCN targets
2713   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2714     return true;
2715 
2716   auto SymbolName = getGprCountSymbolName(RegKind);
2717   if (!SymbolName)
2718     return true;
2719   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2720 
2721   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2722   int64_t OldCount;
2723 
2724   if (!Sym->isVariable())
2725     return !Error(getLoc(),
2726                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2727   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2728     return !Error(
2729         getLoc(),
2730         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2731 
2732   if (OldCount <= NewMax)
2733     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2734 
2735   return true;
2736 }
2737 
2738 std::unique_ptr<AMDGPUOperand>
2739 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2740   const auto &Tok = getToken();
2741   SMLoc StartLoc = Tok.getLoc();
2742   SMLoc EndLoc = Tok.getEndLoc();
2743   RegisterKind RegKind;
2744   unsigned Reg, RegNum, RegWidth;
2745 
2746   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2747     return nullptr;
2748   }
2749   if (isHsaAbiVersion3Or4(&getSTI())) {
2750     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2751       return nullptr;
2752   } else
2753     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2754   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2755 }
2756 
2757 OperandMatchResultTy
2758 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2759   // TODO: add syntactic sugar for 1/(2*PI)
2760 
2761   assert(!isRegister());
2762   assert(!isModifier());
2763 
2764   const auto& Tok = getToken();
2765   const auto& NextTok = peekToken();
2766   bool IsReal = Tok.is(AsmToken::Real);
2767   SMLoc S = getLoc();
2768   bool Negate = false;
2769 
2770   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2771     lex();
2772     IsReal = true;
2773     Negate = true;
2774   }
2775 
2776   if (IsReal) {
2777     // Floating-point expressions are not supported.
2778     // Can only allow floating-point literals with an
2779     // optional sign.
2780 
2781     StringRef Num = getTokenStr();
2782     lex();
2783 
2784     APFloat RealVal(APFloat::IEEEdouble());
2785     auto roundMode = APFloat::rmNearestTiesToEven;
2786     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2787       return MatchOperand_ParseFail;
2788     }
2789     if (Negate)
2790       RealVal.changeSign();
2791 
2792     Operands.push_back(
2793       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2794                                AMDGPUOperand::ImmTyNone, true));
2795 
2796     return MatchOperand_Success;
2797 
2798   } else {
2799     int64_t IntVal;
2800     const MCExpr *Expr;
2801     SMLoc S = getLoc();
2802 
2803     if (HasSP3AbsModifier) {
2804       // This is a workaround for handling expressions
2805       // as arguments of SP3 'abs' modifier, for example:
2806       //     |1.0|
2807       //     |-1|
2808       //     |1+x|
2809       // This syntax is not compatible with syntax of standard
2810       // MC expressions (due to the trailing '|').
2811       SMLoc EndLoc;
2812       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2813         return MatchOperand_ParseFail;
2814     } else {
2815       if (Parser.parseExpression(Expr))
2816         return MatchOperand_ParseFail;
2817     }
2818 
2819     if (Expr->evaluateAsAbsolute(IntVal)) {
2820       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2821     } else {
2822       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2823     }
2824 
2825     return MatchOperand_Success;
2826   }
2827 
2828   return MatchOperand_NoMatch;
2829 }
2830 
2831 OperandMatchResultTy
2832 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2833   if (!isRegister())
2834     return MatchOperand_NoMatch;
2835 
2836   if (auto R = parseRegister()) {
2837     assert(R->isReg());
2838     Operands.push_back(std::move(R));
2839     return MatchOperand_Success;
2840   }
2841   return MatchOperand_ParseFail;
2842 }
2843 
2844 OperandMatchResultTy
2845 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2846   auto res = parseReg(Operands);
2847   if (res != MatchOperand_NoMatch) {
2848     return res;
2849   } else if (isModifier()) {
2850     return MatchOperand_NoMatch;
2851   } else {
2852     return parseImm(Operands, HasSP3AbsMod);
2853   }
2854 }
2855 
2856 bool
2857 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2858   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2859     const auto &str = Token.getString();
2860     return str == "abs" || str == "neg" || str == "sext";
2861   }
2862   return false;
2863 }
2864 
2865 bool
2866 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2867   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2868 }
2869 
2870 bool
2871 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2872   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2873 }
2874 
2875 bool
2876 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2877   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2878 }
2879 
2880 // Check if this is an operand modifier or an opcode modifier
2881 // which may look like an expression but it is not. We should
2882 // avoid parsing these modifiers as expressions. Currently
2883 // recognized sequences are:
2884 //   |...|
2885 //   abs(...)
2886 //   neg(...)
2887 //   sext(...)
2888 //   -reg
2889 //   -|...|
2890 //   -abs(...)
2891 //   name:...
2892 // Note that simple opcode modifiers like 'gds' may be parsed as
2893 // expressions; this is a special case. See getExpressionAsToken.
2894 //
2895 bool
2896 AMDGPUAsmParser::isModifier() {
2897 
2898   AsmToken Tok = getToken();
2899   AsmToken NextToken[2];
2900   peekTokens(NextToken);
2901 
2902   return isOperandModifier(Tok, NextToken[0]) ||
2903          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2904          isOpcodeModifierWithVal(Tok, NextToken[0]);
2905 }
2906 
2907 // Check if the current token is an SP3 'neg' modifier.
2908 // Currently this modifier is allowed in the following context:
2909 //
2910 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2911 // 2. Before an 'abs' modifier: -abs(...)
2912 // 3. Before an SP3 'abs' modifier: -|...|
2913 //
2914 // In all other cases "-" is handled as a part
2915 // of an expression that follows the sign.
2916 //
2917 // Note: When "-" is followed by an integer literal,
2918 // this is interpreted as integer negation rather
2919 // than a floating-point NEG modifier applied to N.
2920 // Beside being contr-intuitive, such use of floating-point
2921 // NEG modifier would have resulted in different meaning
2922 // of integer literals used with VOP1/2/C and VOP3,
2923 // for example:
2924 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2925 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2926 // Negative fp literals with preceding "-" are
2927 // handled likewise for unifomtity
2928 //
2929 bool
2930 AMDGPUAsmParser::parseSP3NegModifier() {
2931 
2932   AsmToken NextToken[2];
2933   peekTokens(NextToken);
2934 
2935   if (isToken(AsmToken::Minus) &&
2936       (isRegister(NextToken[0], NextToken[1]) ||
2937        NextToken[0].is(AsmToken::Pipe) ||
2938        isId(NextToken[0], "abs"))) {
2939     lex();
2940     return true;
2941   }
2942 
2943   return false;
2944 }
2945 
2946 OperandMatchResultTy
2947 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2948                                               bool AllowImm) {
2949   bool Neg, SP3Neg;
2950   bool Abs, SP3Abs;
2951   SMLoc Loc;
2952 
2953   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2954   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2955     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2956     return MatchOperand_ParseFail;
2957   }
2958 
2959   SP3Neg = parseSP3NegModifier();
2960 
2961   Loc = getLoc();
2962   Neg = trySkipId("neg");
2963   if (Neg && SP3Neg) {
2964     Error(Loc, "expected register or immediate");
2965     return MatchOperand_ParseFail;
2966   }
2967   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2968     return MatchOperand_ParseFail;
2969 
2970   Abs = trySkipId("abs");
2971   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2972     return MatchOperand_ParseFail;
2973 
2974   Loc = getLoc();
2975   SP3Abs = trySkipToken(AsmToken::Pipe);
2976   if (Abs && SP3Abs) {
2977     Error(Loc, "expected register or immediate");
2978     return MatchOperand_ParseFail;
2979   }
2980 
2981   OperandMatchResultTy Res;
2982   if (AllowImm) {
2983     Res = parseRegOrImm(Operands, SP3Abs);
2984   } else {
2985     Res = parseReg(Operands);
2986   }
2987   if (Res != MatchOperand_Success) {
2988     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2989   }
2990 
2991   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2992     return MatchOperand_ParseFail;
2993   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2994     return MatchOperand_ParseFail;
2995   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2996     return MatchOperand_ParseFail;
2997 
2998   AMDGPUOperand::Modifiers Mods;
2999   Mods.Abs = Abs || SP3Abs;
3000   Mods.Neg = Neg || SP3Neg;
3001 
3002   if (Mods.hasFPModifiers()) {
3003     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3004     if (Op.isExpr()) {
3005       Error(Op.getStartLoc(), "expected an absolute expression");
3006       return MatchOperand_ParseFail;
3007     }
3008     Op.setModifiers(Mods);
3009   }
3010   return MatchOperand_Success;
3011 }
3012 
3013 OperandMatchResultTy
3014 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3015                                                bool AllowImm) {
3016   bool Sext = trySkipId("sext");
3017   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3018     return MatchOperand_ParseFail;
3019 
3020   OperandMatchResultTy Res;
3021   if (AllowImm) {
3022     Res = parseRegOrImm(Operands);
3023   } else {
3024     Res = parseReg(Operands);
3025   }
3026   if (Res != MatchOperand_Success) {
3027     return Sext? MatchOperand_ParseFail : Res;
3028   }
3029 
3030   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3031     return MatchOperand_ParseFail;
3032 
3033   AMDGPUOperand::Modifiers Mods;
3034   Mods.Sext = Sext;
3035 
3036   if (Mods.hasIntModifiers()) {
3037     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3038     if (Op.isExpr()) {
3039       Error(Op.getStartLoc(), "expected an absolute expression");
3040       return MatchOperand_ParseFail;
3041     }
3042     Op.setModifiers(Mods);
3043   }
3044 
3045   return MatchOperand_Success;
3046 }
3047 
3048 OperandMatchResultTy
3049 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3050   return parseRegOrImmWithFPInputMods(Operands, false);
3051 }
3052 
3053 OperandMatchResultTy
3054 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3055   return parseRegOrImmWithIntInputMods(Operands, false);
3056 }
3057 
3058 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3059   auto Loc = getLoc();
3060   if (trySkipId("off")) {
3061     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3062                                                 AMDGPUOperand::ImmTyOff, false));
3063     return MatchOperand_Success;
3064   }
3065 
3066   if (!isRegister())
3067     return MatchOperand_NoMatch;
3068 
3069   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3070   if (Reg) {
3071     Operands.push_back(std::move(Reg));
3072     return MatchOperand_Success;
3073   }
3074 
3075   return MatchOperand_ParseFail;
3076 
3077 }
3078 
3079 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3080   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3081 
3082   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3083       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3084       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3085       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3086     return Match_InvalidOperand;
3087 
3088   if ((TSFlags & SIInstrFlags::VOP3) &&
3089       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3090       getForcedEncodingSize() != 64)
3091     return Match_PreferE32;
3092 
3093   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3094       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3095     // v_mac_f32/16 allow only dst_sel == DWORD;
3096     auto OpNum =
3097         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3098     const auto &Op = Inst.getOperand(OpNum);
3099     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3100       return Match_InvalidOperand;
3101     }
3102   }
3103 
3104   return Match_Success;
3105 }
3106 
3107 static ArrayRef<unsigned> getAllVariants() {
3108   static const unsigned Variants[] = {
3109     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3110     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3111   };
3112 
3113   return makeArrayRef(Variants);
3114 }
3115 
3116 // What asm variants we should check
3117 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3118   if (getForcedEncodingSize() == 32) {
3119     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3120     return makeArrayRef(Variants);
3121   }
3122 
3123   if (isForcedVOP3()) {
3124     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3125     return makeArrayRef(Variants);
3126   }
3127 
3128   if (isForcedSDWA()) {
3129     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3130                                         AMDGPUAsmVariants::SDWA9};
3131     return makeArrayRef(Variants);
3132   }
3133 
3134   if (isForcedDPP()) {
3135     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3136     return makeArrayRef(Variants);
3137   }
3138 
3139   return getAllVariants();
3140 }
3141 
3142 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3143   if (getForcedEncodingSize() == 32)
3144     return "e32";
3145 
3146   if (isForcedVOP3())
3147     return "e64";
3148 
3149   if (isForcedSDWA())
3150     return "sdwa";
3151 
3152   if (isForcedDPP())
3153     return "dpp";
3154 
3155   return "";
3156 }
3157 
3158 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3159   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3160   const unsigned Num = Desc.getNumImplicitUses();
3161   for (unsigned i = 0; i < Num; ++i) {
3162     unsigned Reg = Desc.ImplicitUses[i];
3163     switch (Reg) {
3164     case AMDGPU::FLAT_SCR:
3165     case AMDGPU::VCC:
3166     case AMDGPU::VCC_LO:
3167     case AMDGPU::VCC_HI:
3168     case AMDGPU::M0:
3169       return Reg;
3170     default:
3171       break;
3172     }
3173   }
3174   return AMDGPU::NoRegister;
3175 }
3176 
3177 // NB: This code is correct only when used to check constant
3178 // bus limitations because GFX7 support no f16 inline constants.
3179 // Note that there are no cases when a GFX7 opcode violates
3180 // constant bus limitations due to the use of an f16 constant.
3181 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3182                                        unsigned OpIdx) const {
3183   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3184 
3185   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3186     return false;
3187   }
3188 
3189   const MCOperand &MO = Inst.getOperand(OpIdx);
3190 
3191   int64_t Val = MO.getImm();
3192   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3193 
3194   switch (OpSize) { // expected operand size
3195   case 8:
3196     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3197   case 4:
3198     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3199   case 2: {
3200     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3201     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3202         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3203         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3204       return AMDGPU::isInlinableIntLiteral(Val);
3205 
3206     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3207         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3208         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3209       return AMDGPU::isInlinableIntLiteralV216(Val);
3210 
3211     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3212         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3213         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3214       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3215 
3216     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3217   }
3218   default:
3219     llvm_unreachable("invalid operand size");
3220   }
3221 }
3222 
3223 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3224   if (!isGFX10Plus())
3225     return 1;
3226 
3227   switch (Opcode) {
3228   // 64-bit shift instructions can use only one scalar value input
3229   case AMDGPU::V_LSHLREV_B64_e64:
3230   case AMDGPU::V_LSHLREV_B64_gfx10:
3231   case AMDGPU::V_LSHRREV_B64_e64:
3232   case AMDGPU::V_LSHRREV_B64_gfx10:
3233   case AMDGPU::V_ASHRREV_I64_e64:
3234   case AMDGPU::V_ASHRREV_I64_gfx10:
3235   case AMDGPU::V_LSHL_B64_e64:
3236   case AMDGPU::V_LSHR_B64_e64:
3237   case AMDGPU::V_ASHR_I64_e64:
3238     return 1;
3239   default:
3240     return 2;
3241   }
3242 }
3243 
3244 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3245   const MCOperand &MO = Inst.getOperand(OpIdx);
3246   if (MO.isImm()) {
3247     return !isInlineConstant(Inst, OpIdx);
3248   } else if (MO.isReg()) {
3249     auto Reg = MO.getReg();
3250     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3251     auto PReg = mc2PseudoReg(Reg);
3252     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3253   } else {
3254     return true;
3255   }
3256 }
3257 
3258 bool
3259 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3260                                                 const OperandVector &Operands) {
3261   const unsigned Opcode = Inst.getOpcode();
3262   const MCInstrDesc &Desc = MII.get(Opcode);
3263   unsigned LastSGPR = AMDGPU::NoRegister;
3264   unsigned ConstantBusUseCount = 0;
3265   unsigned NumLiterals = 0;
3266   unsigned LiteralSize;
3267 
3268   if (Desc.TSFlags &
3269       (SIInstrFlags::VOPC |
3270        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3271        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3272        SIInstrFlags::SDWA)) {
3273     // Check special imm operands (used by madmk, etc)
3274     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3275       ++NumLiterals;
3276       LiteralSize = 4;
3277     }
3278 
3279     SmallDenseSet<unsigned> SGPRsUsed;
3280     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3281     if (SGPRUsed != AMDGPU::NoRegister) {
3282       SGPRsUsed.insert(SGPRUsed);
3283       ++ConstantBusUseCount;
3284     }
3285 
3286     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3287     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3288     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3289 
3290     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3291 
3292     for (int OpIdx : OpIndices) {
3293       if (OpIdx == -1) break;
3294 
3295       const MCOperand &MO = Inst.getOperand(OpIdx);
3296       if (usesConstantBus(Inst, OpIdx)) {
3297         if (MO.isReg()) {
3298           LastSGPR = mc2PseudoReg(MO.getReg());
3299           // Pairs of registers with a partial intersections like these
3300           //   s0, s[0:1]
3301           //   flat_scratch_lo, flat_scratch
3302           //   flat_scratch_lo, flat_scratch_hi
3303           // are theoretically valid but they are disabled anyway.
3304           // Note that this code mimics SIInstrInfo::verifyInstruction
3305           if (!SGPRsUsed.count(LastSGPR)) {
3306             SGPRsUsed.insert(LastSGPR);
3307             ++ConstantBusUseCount;
3308           }
3309         } else { // Expression or a literal
3310 
3311           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3312             continue; // special operand like VINTERP attr_chan
3313 
3314           // An instruction may use only one literal.
3315           // This has been validated on the previous step.
3316           // See validateVOPLiteral.
3317           // This literal may be used as more than one operand.
3318           // If all these operands are of the same size,
3319           // this literal counts as one scalar value.
3320           // Otherwise it counts as 2 scalar values.
3321           // See "GFX10 Shader Programming", section 3.6.2.3.
3322 
3323           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3324           if (Size < 4) Size = 4;
3325 
3326           if (NumLiterals == 0) {
3327             NumLiterals = 1;
3328             LiteralSize = Size;
3329           } else if (LiteralSize != Size) {
3330             NumLiterals = 2;
3331           }
3332         }
3333       }
3334     }
3335   }
3336   ConstantBusUseCount += NumLiterals;
3337 
3338   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3339     return true;
3340 
3341   SMLoc LitLoc = getLitLoc(Operands);
3342   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3343   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3344   Error(Loc, "invalid operand (violates constant bus restrictions)");
3345   return false;
3346 }
3347 
3348 bool
3349 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3350                                                  const OperandVector &Operands) {
3351   const unsigned Opcode = Inst.getOpcode();
3352   const MCInstrDesc &Desc = MII.get(Opcode);
3353 
3354   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3355   if (DstIdx == -1 ||
3356       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3357     return true;
3358   }
3359 
3360   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3361 
3362   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3363   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3364   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3365 
3366   assert(DstIdx != -1);
3367   const MCOperand &Dst = Inst.getOperand(DstIdx);
3368   assert(Dst.isReg());
3369   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3370 
3371   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3372 
3373   for (int SrcIdx : SrcIndices) {
3374     if (SrcIdx == -1) break;
3375     const MCOperand &Src = Inst.getOperand(SrcIdx);
3376     if (Src.isReg()) {
3377       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3378       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3379         Error(getRegLoc(SrcReg, Operands),
3380           "destination must be different than all sources");
3381         return false;
3382       }
3383     }
3384   }
3385 
3386   return true;
3387 }
3388 
3389 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3390 
3391   const unsigned Opc = Inst.getOpcode();
3392   const MCInstrDesc &Desc = MII.get(Opc);
3393 
3394   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3395     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3396     assert(ClampIdx != -1);
3397     return Inst.getOperand(ClampIdx).getImm() == 0;
3398   }
3399 
3400   return true;
3401 }
3402 
3403 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3404 
3405   const unsigned Opc = Inst.getOpcode();
3406   const MCInstrDesc &Desc = MII.get(Opc);
3407 
3408   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3409     return true;
3410 
3411   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3412   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3413   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3414 
3415   assert(VDataIdx != -1);
3416 
3417   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3418     return true;
3419 
3420   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3421   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3422   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3423   if (DMask == 0)
3424     DMask = 1;
3425 
3426   unsigned DataSize =
3427     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3428   if (hasPackedD16()) {
3429     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3430     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3431       DataSize = (DataSize + 1) / 2;
3432   }
3433 
3434   return (VDataSize / 4) == DataSize + TFESize;
3435 }
3436 
3437 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3438   const unsigned Opc = Inst.getOpcode();
3439   const MCInstrDesc &Desc = MII.get(Opc);
3440 
3441   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3442     return true;
3443 
3444   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3445 
3446   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3447       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3448   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3449   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3450   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3451   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3452 
3453   assert(VAddr0Idx != -1);
3454   assert(SrsrcIdx != -1);
3455   assert(SrsrcIdx > VAddr0Idx);
3456 
3457   if (DimIdx == -1)
3458     return true; // intersect_ray
3459 
3460   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3461   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3462   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3463   unsigned ActualAddrSize =
3464       IsNSA ? SrsrcIdx - VAddr0Idx
3465             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3466   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3467 
3468   unsigned ExpectedAddrSize =
3469       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3470 
3471   if (!IsNSA) {
3472     if (ExpectedAddrSize > 8)
3473       ExpectedAddrSize = 16;
3474 
3475     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3476     // This provides backward compatibility for assembly created
3477     // before 160b/192b/224b types were directly supported.
3478     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3479       return true;
3480   }
3481 
3482   return ActualAddrSize == ExpectedAddrSize;
3483 }
3484 
3485 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3486 
3487   const unsigned Opc = Inst.getOpcode();
3488   const MCInstrDesc &Desc = MII.get(Opc);
3489 
3490   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3491     return true;
3492   if (!Desc.mayLoad() || !Desc.mayStore())
3493     return true; // Not atomic
3494 
3495   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3496   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3497 
3498   // This is an incomplete check because image_atomic_cmpswap
3499   // may only use 0x3 and 0xf while other atomic operations
3500   // may use 0x1 and 0x3. However these limitations are
3501   // verified when we check that dmask matches dst size.
3502   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3503 }
3504 
3505 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3506 
3507   const unsigned Opc = Inst.getOpcode();
3508   const MCInstrDesc &Desc = MII.get(Opc);
3509 
3510   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3511     return true;
3512 
3513   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3514   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3515 
3516   // GATHER4 instructions use dmask in a different fashion compared to
3517   // other MIMG instructions. The only useful DMASK values are
3518   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3519   // (red,red,red,red) etc.) The ISA document doesn't mention
3520   // this.
3521   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3522 }
3523 
3524 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3525   const unsigned Opc = Inst.getOpcode();
3526   const MCInstrDesc &Desc = MII.get(Opc);
3527 
3528   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3529     return true;
3530 
3531   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3532   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3533       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3534 
3535   if (!BaseOpcode->MSAA)
3536     return true;
3537 
3538   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3539   assert(DimIdx != -1);
3540 
3541   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3542   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3543 
3544   return DimInfo->MSAA;
3545 }
3546 
3547 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3548 {
3549   switch (Opcode) {
3550   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3551   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3552   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3553     return true;
3554   default:
3555     return false;
3556   }
3557 }
3558 
3559 // movrels* opcodes should only allow VGPRS as src0.
3560 // This is specified in .td description for vop1/vop3,
3561 // but sdwa is handled differently. See isSDWAOperand.
3562 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3563                                       const OperandVector &Operands) {
3564 
3565   const unsigned Opc = Inst.getOpcode();
3566   const MCInstrDesc &Desc = MII.get(Opc);
3567 
3568   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3569     return true;
3570 
3571   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3572   assert(Src0Idx != -1);
3573 
3574   SMLoc ErrLoc;
3575   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3576   if (Src0.isReg()) {
3577     auto Reg = mc2PseudoReg(Src0.getReg());
3578     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3579     if (!isSGPR(Reg, TRI))
3580       return true;
3581     ErrLoc = getRegLoc(Reg, Operands);
3582   } else {
3583     ErrLoc = getConstLoc(Operands);
3584   }
3585 
3586   Error(ErrLoc, "source operand must be a VGPR");
3587   return false;
3588 }
3589 
3590 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3591                                           const OperandVector &Operands) {
3592 
3593   const unsigned Opc = Inst.getOpcode();
3594 
3595   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3596     return true;
3597 
3598   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3599   assert(Src0Idx != -1);
3600 
3601   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3602   if (!Src0.isReg())
3603     return true;
3604 
3605   auto Reg = mc2PseudoReg(Src0.getReg());
3606   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3607   if (isSGPR(Reg, TRI)) {
3608     Error(getRegLoc(Reg, Operands),
3609           "source operand must be either a VGPR or an inline constant");
3610     return false;
3611   }
3612 
3613   return true;
3614 }
3615 
3616 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3617   switch (Inst.getOpcode()) {
3618   default:
3619     return true;
3620   case V_DIV_SCALE_F32_gfx6_gfx7:
3621   case V_DIV_SCALE_F32_vi:
3622   case V_DIV_SCALE_F32_gfx10:
3623   case V_DIV_SCALE_F64_gfx6_gfx7:
3624   case V_DIV_SCALE_F64_vi:
3625   case V_DIV_SCALE_F64_gfx10:
3626     break;
3627   }
3628 
3629   // TODO: Check that src0 = src1 or src2.
3630 
3631   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3632                     AMDGPU::OpName::src2_modifiers,
3633                     AMDGPU::OpName::src2_modifiers}) {
3634     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3635             .getImm() &
3636         SISrcMods::ABS) {
3637       return false;
3638     }
3639   }
3640 
3641   return true;
3642 }
3643 
3644 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3645 
3646   const unsigned Opc = Inst.getOpcode();
3647   const MCInstrDesc &Desc = MII.get(Opc);
3648 
3649   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3650     return true;
3651 
3652   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3653   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3654     if (isCI() || isSI())
3655       return false;
3656   }
3657 
3658   return true;
3659 }
3660 
3661 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3662   const unsigned Opc = Inst.getOpcode();
3663   const MCInstrDesc &Desc = MII.get(Opc);
3664 
3665   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3666     return true;
3667 
3668   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3669   if (DimIdx < 0)
3670     return true;
3671 
3672   long Imm = Inst.getOperand(DimIdx).getImm();
3673   if (Imm < 0 || Imm >= 8)
3674     return false;
3675 
3676   return true;
3677 }
3678 
3679 static bool IsRevOpcode(const unsigned Opcode)
3680 {
3681   switch (Opcode) {
3682   case AMDGPU::V_SUBREV_F32_e32:
3683   case AMDGPU::V_SUBREV_F32_e64:
3684   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3685   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3686   case AMDGPU::V_SUBREV_F32_e32_vi:
3687   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3688   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3689   case AMDGPU::V_SUBREV_F32_e64_vi:
3690 
3691   case AMDGPU::V_SUBREV_CO_U32_e32:
3692   case AMDGPU::V_SUBREV_CO_U32_e64:
3693   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3694   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3695 
3696   case AMDGPU::V_SUBBREV_U32_e32:
3697   case AMDGPU::V_SUBBREV_U32_e64:
3698   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3699   case AMDGPU::V_SUBBREV_U32_e32_vi:
3700   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3701   case AMDGPU::V_SUBBREV_U32_e64_vi:
3702 
3703   case AMDGPU::V_SUBREV_U32_e32:
3704   case AMDGPU::V_SUBREV_U32_e64:
3705   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3706   case AMDGPU::V_SUBREV_U32_e32_vi:
3707   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3708   case AMDGPU::V_SUBREV_U32_e64_vi:
3709 
3710   case AMDGPU::V_SUBREV_F16_e32:
3711   case AMDGPU::V_SUBREV_F16_e64:
3712   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3713   case AMDGPU::V_SUBREV_F16_e32_vi:
3714   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3715   case AMDGPU::V_SUBREV_F16_e64_vi:
3716 
3717   case AMDGPU::V_SUBREV_U16_e32:
3718   case AMDGPU::V_SUBREV_U16_e64:
3719   case AMDGPU::V_SUBREV_U16_e32_vi:
3720   case AMDGPU::V_SUBREV_U16_e64_vi:
3721 
3722   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3723   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3724   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3725 
3726   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3727   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3728 
3729   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3730   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3731 
3732   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3733   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3734 
3735   case AMDGPU::V_LSHRREV_B32_e32:
3736   case AMDGPU::V_LSHRREV_B32_e64:
3737   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3738   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3739   case AMDGPU::V_LSHRREV_B32_e32_vi:
3740   case AMDGPU::V_LSHRREV_B32_e64_vi:
3741   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3742   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3743 
3744   case AMDGPU::V_ASHRREV_I32_e32:
3745   case AMDGPU::V_ASHRREV_I32_e64:
3746   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3747   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3748   case AMDGPU::V_ASHRREV_I32_e32_vi:
3749   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3750   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3751   case AMDGPU::V_ASHRREV_I32_e64_vi:
3752 
3753   case AMDGPU::V_LSHLREV_B32_e32:
3754   case AMDGPU::V_LSHLREV_B32_e64:
3755   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3756   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3757   case AMDGPU::V_LSHLREV_B32_e32_vi:
3758   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3759   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3760   case AMDGPU::V_LSHLREV_B32_e64_vi:
3761 
3762   case AMDGPU::V_LSHLREV_B16_e32:
3763   case AMDGPU::V_LSHLREV_B16_e64:
3764   case AMDGPU::V_LSHLREV_B16_e32_vi:
3765   case AMDGPU::V_LSHLREV_B16_e64_vi:
3766   case AMDGPU::V_LSHLREV_B16_gfx10:
3767 
3768   case AMDGPU::V_LSHRREV_B16_e32:
3769   case AMDGPU::V_LSHRREV_B16_e64:
3770   case AMDGPU::V_LSHRREV_B16_e32_vi:
3771   case AMDGPU::V_LSHRREV_B16_e64_vi:
3772   case AMDGPU::V_LSHRREV_B16_gfx10:
3773 
3774   case AMDGPU::V_ASHRREV_I16_e32:
3775   case AMDGPU::V_ASHRREV_I16_e64:
3776   case AMDGPU::V_ASHRREV_I16_e32_vi:
3777   case AMDGPU::V_ASHRREV_I16_e64_vi:
3778   case AMDGPU::V_ASHRREV_I16_gfx10:
3779 
3780   case AMDGPU::V_LSHLREV_B64_e64:
3781   case AMDGPU::V_LSHLREV_B64_gfx10:
3782   case AMDGPU::V_LSHLREV_B64_vi:
3783 
3784   case AMDGPU::V_LSHRREV_B64_e64:
3785   case AMDGPU::V_LSHRREV_B64_gfx10:
3786   case AMDGPU::V_LSHRREV_B64_vi:
3787 
3788   case AMDGPU::V_ASHRREV_I64_e64:
3789   case AMDGPU::V_ASHRREV_I64_gfx10:
3790   case AMDGPU::V_ASHRREV_I64_vi:
3791 
3792   case AMDGPU::V_PK_LSHLREV_B16:
3793   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3794   case AMDGPU::V_PK_LSHLREV_B16_vi:
3795 
3796   case AMDGPU::V_PK_LSHRREV_B16:
3797   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3798   case AMDGPU::V_PK_LSHRREV_B16_vi:
3799   case AMDGPU::V_PK_ASHRREV_I16:
3800   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3801   case AMDGPU::V_PK_ASHRREV_I16_vi:
3802     return true;
3803   default:
3804     return false;
3805   }
3806 }
3807 
3808 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3809 
3810   using namespace SIInstrFlags;
3811   const unsigned Opcode = Inst.getOpcode();
3812   const MCInstrDesc &Desc = MII.get(Opcode);
3813 
3814   // lds_direct register is defined so that it can be used
3815   // with 9-bit operands only. Ignore encodings which do not accept these.
3816   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3817   if ((Desc.TSFlags & Enc) == 0)
3818     return None;
3819 
3820   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3821     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3822     if (SrcIdx == -1)
3823       break;
3824     const auto &Src = Inst.getOperand(SrcIdx);
3825     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3826 
3827       if (isGFX90A())
3828         return StringRef("lds_direct is not supported on this GPU");
3829 
3830       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3831         return StringRef("lds_direct cannot be used with this instruction");
3832 
3833       if (SrcName != OpName::src0)
3834         return StringRef("lds_direct may be used as src0 only");
3835     }
3836   }
3837 
3838   return None;
3839 }
3840 
3841 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3842   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3843     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3844     if (Op.isFlatOffset())
3845       return Op.getStartLoc();
3846   }
3847   return getLoc();
3848 }
3849 
3850 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3851                                          const OperandVector &Operands) {
3852   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3853   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3854     return true;
3855 
3856   auto Opcode = Inst.getOpcode();
3857   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3858   assert(OpNum != -1);
3859 
3860   const auto &Op = Inst.getOperand(OpNum);
3861   if (!hasFlatOffsets() && Op.getImm() != 0) {
3862     Error(getFlatOffsetLoc(Operands),
3863           "flat offset modifier is not supported on this GPU");
3864     return false;
3865   }
3866 
3867   // For FLAT segment the offset must be positive;
3868   // MSB is ignored and forced to zero.
3869   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3870     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3871     if (!isIntN(OffsetSize, Op.getImm())) {
3872       Error(getFlatOffsetLoc(Operands),
3873             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3874       return false;
3875     }
3876   } else {
3877     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3878     if (!isUIntN(OffsetSize, Op.getImm())) {
3879       Error(getFlatOffsetLoc(Operands),
3880             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3881       return false;
3882     }
3883   }
3884 
3885   return true;
3886 }
3887 
3888 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3889   // Start with second operand because SMEM Offset cannot be dst or src0.
3890   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3891     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3892     if (Op.isSMEMOffset())
3893       return Op.getStartLoc();
3894   }
3895   return getLoc();
3896 }
3897 
3898 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3899                                          const OperandVector &Operands) {
3900   if (isCI() || isSI())
3901     return true;
3902 
3903   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3904   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3905     return true;
3906 
3907   auto Opcode = Inst.getOpcode();
3908   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3909   if (OpNum == -1)
3910     return true;
3911 
3912   const auto &Op = Inst.getOperand(OpNum);
3913   if (!Op.isImm())
3914     return true;
3915 
3916   uint64_t Offset = Op.getImm();
3917   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3918   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3919       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3920     return true;
3921 
3922   Error(getSMEMOffsetLoc(Operands),
3923         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3924                                "expected a 21-bit signed offset");
3925 
3926   return false;
3927 }
3928 
3929 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3930   unsigned Opcode = Inst.getOpcode();
3931   const MCInstrDesc &Desc = MII.get(Opcode);
3932   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3933     return true;
3934 
3935   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3936   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3937 
3938   const int OpIndices[] = { Src0Idx, Src1Idx };
3939 
3940   unsigned NumExprs = 0;
3941   unsigned NumLiterals = 0;
3942   uint32_t LiteralValue;
3943 
3944   for (int OpIdx : OpIndices) {
3945     if (OpIdx == -1) break;
3946 
3947     const MCOperand &MO = Inst.getOperand(OpIdx);
3948     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3949     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3950       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3951         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3952         if (NumLiterals == 0 || LiteralValue != Value) {
3953           LiteralValue = Value;
3954           ++NumLiterals;
3955         }
3956       } else if (MO.isExpr()) {
3957         ++NumExprs;
3958       }
3959     }
3960   }
3961 
3962   return NumLiterals + NumExprs <= 1;
3963 }
3964 
3965 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3966   const unsigned Opc = Inst.getOpcode();
3967   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3968       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3969     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3970     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3971 
3972     if (OpSel & ~3)
3973       return false;
3974   }
3975   return true;
3976 }
3977 
3978 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
3979                                   const OperandVector &Operands) {
3980   const unsigned Opc = Inst.getOpcode();
3981   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
3982   if (DppCtrlIdx < 0)
3983     return true;
3984   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
3985 
3986   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
3987     // DPP64 is supported for row_newbcast only.
3988     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3989     if (Src0Idx >= 0 &&
3990         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
3991       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
3992       Error(S, "64 bit dpp only supports row_newbcast");
3993       return false;
3994     }
3995   }
3996 
3997   return true;
3998 }
3999 
4000 // Check if VCC register matches wavefront size
4001 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4002   auto FB = getFeatureBits();
4003   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4004     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4005 }
4006 
4007 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4008 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4009                                          const OperandVector &Operands) {
4010   unsigned Opcode = Inst.getOpcode();
4011   const MCInstrDesc &Desc = MII.get(Opcode);
4012   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4013   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4014       ImmIdx == -1)
4015     return true;
4016 
4017   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4018   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4019   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4020 
4021   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4022 
4023   unsigned NumExprs = 0;
4024   unsigned NumLiterals = 0;
4025   uint32_t LiteralValue;
4026 
4027   for (int OpIdx : OpIndices) {
4028     if (OpIdx == -1)
4029       continue;
4030 
4031     const MCOperand &MO = Inst.getOperand(OpIdx);
4032     if (!MO.isImm() && !MO.isExpr())
4033       continue;
4034     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4035       continue;
4036 
4037     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4038         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4039       Error(getConstLoc(Operands),
4040             "inline constants are not allowed for this operand");
4041       return false;
4042     }
4043 
4044     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4045       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4046       if (NumLiterals == 0 || LiteralValue != Value) {
4047         LiteralValue = Value;
4048         ++NumLiterals;
4049       }
4050     } else if (MO.isExpr()) {
4051       ++NumExprs;
4052     }
4053   }
4054   NumLiterals += NumExprs;
4055 
4056   if (!NumLiterals)
4057     return true;
4058 
4059   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4060     Error(getLitLoc(Operands), "literal operands are not supported");
4061     return false;
4062   }
4063 
4064   if (NumLiterals > 1) {
4065     Error(getLitLoc(Operands), "only one literal operand is allowed");
4066     return false;
4067   }
4068 
4069   return true;
4070 }
4071 
4072 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4073 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4074                          const MCRegisterInfo *MRI) {
4075   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4076   if (OpIdx < 0)
4077     return -1;
4078 
4079   const MCOperand &Op = Inst.getOperand(OpIdx);
4080   if (!Op.isReg())
4081     return -1;
4082 
4083   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4084   auto Reg = Sub ? Sub : Op.getReg();
4085   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4086   return AGPR32.contains(Reg) ? 1 : 0;
4087 }
4088 
4089 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4090   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4091   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4092                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4093                   SIInstrFlags::DS)) == 0)
4094     return true;
4095 
4096   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4097                                                       : AMDGPU::OpName::vdata;
4098 
4099   const MCRegisterInfo *MRI = getMRI();
4100   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4101   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4102 
4103   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4104     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4105     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4106       return false;
4107   }
4108 
4109   auto FB = getFeatureBits();
4110   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4111     if (DataAreg < 0 || DstAreg < 0)
4112       return true;
4113     return DstAreg == DataAreg;
4114   }
4115 
4116   return DstAreg < 1 && DataAreg < 1;
4117 }
4118 
4119 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4120   auto FB = getFeatureBits();
4121   if (!FB[AMDGPU::FeatureGFX90AInsts])
4122     return true;
4123 
4124   const MCRegisterInfo *MRI = getMRI();
4125   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4126   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4127   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4128     const MCOperand &Op = Inst.getOperand(I);
4129     if (!Op.isReg())
4130       continue;
4131 
4132     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4133     if (!Sub)
4134       continue;
4135 
4136     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4137       return false;
4138     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4139       return false;
4140   }
4141 
4142   return true;
4143 }
4144 
4145 // gfx90a has an undocumented limitation:
4146 // DS_GWS opcodes must use even aligned registers.
4147 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4148                                   const OperandVector &Operands) {
4149   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4150     return true;
4151 
4152   int Opc = Inst.getOpcode();
4153   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4154       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4155     return true;
4156 
4157   const MCRegisterInfo *MRI = getMRI();
4158   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4159   int Data0Pos =
4160       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4161   assert(Data0Pos != -1);
4162   auto Reg = Inst.getOperand(Data0Pos).getReg();
4163   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4164   if (RegIdx & 1) {
4165     SMLoc RegLoc = getRegLoc(Reg, Operands);
4166     Error(RegLoc, "vgpr must be even aligned");
4167     return false;
4168   }
4169 
4170   return true;
4171 }
4172 
4173 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4174                                             const OperandVector &Operands,
4175                                             const SMLoc &IDLoc) {
4176   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4177                                            AMDGPU::OpName::cpol);
4178   if (CPolPos == -1)
4179     return true;
4180 
4181   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4182 
4183   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4184   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4185       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4186     Error(IDLoc, "invalid cache policy for SMRD instruction");
4187     return false;
4188   }
4189 
4190   if (isGFX90A() && (CPol & CPol::SCC)) {
4191     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4192     StringRef CStr(S.getPointer());
4193     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4194     Error(S, "scc is not supported on this GPU");
4195     return false;
4196   }
4197 
4198   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4199     return true;
4200 
4201   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4202     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4203       Error(IDLoc, "instruction must use glc");
4204       return false;
4205     }
4206   } else {
4207     if (CPol & CPol::GLC) {
4208       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4209       StringRef CStr(S.getPointer());
4210       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4211       Error(S, "instruction must not use glc");
4212       return false;
4213     }
4214   }
4215 
4216   return true;
4217 }
4218 
4219 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4220                                           const SMLoc &IDLoc,
4221                                           const OperandVector &Operands) {
4222   if (auto ErrMsg = validateLdsDirect(Inst)) {
4223     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4224     return false;
4225   }
4226   if (!validateSOPLiteral(Inst)) {
4227     Error(getLitLoc(Operands),
4228       "only one literal operand is allowed");
4229     return false;
4230   }
4231   if (!validateVOPLiteral(Inst, Operands)) {
4232     return false;
4233   }
4234   if (!validateConstantBusLimitations(Inst, Operands)) {
4235     return false;
4236   }
4237   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4238     return false;
4239   }
4240   if (!validateIntClampSupported(Inst)) {
4241     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4242       "integer clamping is not supported on this GPU");
4243     return false;
4244   }
4245   if (!validateOpSel(Inst)) {
4246     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4247       "invalid op_sel operand");
4248     return false;
4249   }
4250   if (!validateDPP(Inst, Operands)) {
4251     return false;
4252   }
4253   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4254   if (!validateMIMGD16(Inst)) {
4255     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4256       "d16 modifier is not supported on this GPU");
4257     return false;
4258   }
4259   if (!validateMIMGDim(Inst)) {
4260     Error(IDLoc, "dim modifier is required on this GPU");
4261     return false;
4262   }
4263   if (!validateMIMGMSAA(Inst)) {
4264     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4265           "invalid dim; must be MSAA type");
4266     return false;
4267   }
4268   if (!validateMIMGDataSize(Inst)) {
4269     Error(IDLoc,
4270       "image data size does not match dmask and tfe");
4271     return false;
4272   }
4273   if (!validateMIMGAddrSize(Inst)) {
4274     Error(IDLoc,
4275       "image address size does not match dim and a16");
4276     return false;
4277   }
4278   if (!validateMIMGAtomicDMask(Inst)) {
4279     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4280       "invalid atomic image dmask");
4281     return false;
4282   }
4283   if (!validateMIMGGatherDMask(Inst)) {
4284     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4285       "invalid image_gather dmask: only one bit must be set");
4286     return false;
4287   }
4288   if (!validateMovrels(Inst, Operands)) {
4289     return false;
4290   }
4291   if (!validateFlatOffset(Inst, Operands)) {
4292     return false;
4293   }
4294   if (!validateSMEMOffset(Inst, Operands)) {
4295     return false;
4296   }
4297   if (!validateMAIAccWrite(Inst, Operands)) {
4298     return false;
4299   }
4300   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4301     return false;
4302   }
4303 
4304   if (!validateAGPRLdSt(Inst)) {
4305     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4306     ? "invalid register class: data and dst should be all VGPR or AGPR"
4307     : "invalid register class: agpr loads and stores not supported on this GPU"
4308     );
4309     return false;
4310   }
4311   if (!validateVGPRAlign(Inst)) {
4312     Error(IDLoc,
4313       "invalid register class: vgpr tuples must be 64 bit aligned");
4314     return false;
4315   }
4316   if (!validateGWS(Inst, Operands)) {
4317     return false;
4318   }
4319 
4320   if (!validateDivScale(Inst)) {
4321     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4322     return false;
4323   }
4324   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4325     return false;
4326   }
4327 
4328   return true;
4329 }
4330 
4331 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4332                                             const FeatureBitset &FBS,
4333                                             unsigned VariantID = 0);
4334 
4335 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4336                                 const FeatureBitset &AvailableFeatures,
4337                                 unsigned VariantID);
4338 
4339 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4340                                        const FeatureBitset &FBS) {
4341   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4342 }
4343 
4344 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4345                                        const FeatureBitset &FBS,
4346                                        ArrayRef<unsigned> Variants) {
4347   for (auto Variant : Variants) {
4348     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4349       return true;
4350   }
4351 
4352   return false;
4353 }
4354 
4355 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4356                                                   const SMLoc &IDLoc) {
4357   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4358 
4359   // Check if requested instruction variant is supported.
4360   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4361     return false;
4362 
4363   // This instruction is not supported.
4364   // Clear any other pending errors because they are no longer relevant.
4365   getParser().clearPendingErrors();
4366 
4367   // Requested instruction variant is not supported.
4368   // Check if any other variants are supported.
4369   StringRef VariantName = getMatchedVariantName();
4370   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4371     return Error(IDLoc,
4372                  Twine(VariantName,
4373                        " variant of this instruction is not supported"));
4374   }
4375 
4376   // Finally check if this instruction is supported on any other GPU.
4377   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4378     return Error(IDLoc, "instruction not supported on this GPU");
4379   }
4380 
4381   // Instruction not supported on any GPU. Probably a typo.
4382   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4383   return Error(IDLoc, "invalid instruction" + Suggestion);
4384 }
4385 
4386 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4387                                               OperandVector &Operands,
4388                                               MCStreamer &Out,
4389                                               uint64_t &ErrorInfo,
4390                                               bool MatchingInlineAsm) {
4391   MCInst Inst;
4392   unsigned Result = Match_Success;
4393   for (auto Variant : getMatchedVariants()) {
4394     uint64_t EI;
4395     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4396                                   Variant);
4397     // We order match statuses from least to most specific. We use most specific
4398     // status as resulting
4399     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4400     if ((R == Match_Success) ||
4401         (R == Match_PreferE32) ||
4402         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4403         (R == Match_InvalidOperand && Result != Match_MissingFeature
4404                                    && Result != Match_PreferE32) ||
4405         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4406                                    && Result != Match_MissingFeature
4407                                    && Result != Match_PreferE32)) {
4408       Result = R;
4409       ErrorInfo = EI;
4410     }
4411     if (R == Match_Success)
4412       break;
4413   }
4414 
4415   if (Result == Match_Success) {
4416     if (!validateInstruction(Inst, IDLoc, Operands)) {
4417       return true;
4418     }
4419     Inst.setLoc(IDLoc);
4420     Out.emitInstruction(Inst, getSTI());
4421     return false;
4422   }
4423 
4424   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4425   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4426     return true;
4427   }
4428 
4429   switch (Result) {
4430   default: break;
4431   case Match_MissingFeature:
4432     // It has been verified that the specified instruction
4433     // mnemonic is valid. A match was found but it requires
4434     // features which are not supported on this GPU.
4435     return Error(IDLoc, "operands are not valid for this GPU or mode");
4436 
4437   case Match_InvalidOperand: {
4438     SMLoc ErrorLoc = IDLoc;
4439     if (ErrorInfo != ~0ULL) {
4440       if (ErrorInfo >= Operands.size()) {
4441         return Error(IDLoc, "too few operands for instruction");
4442       }
4443       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4444       if (ErrorLoc == SMLoc())
4445         ErrorLoc = IDLoc;
4446     }
4447     return Error(ErrorLoc, "invalid operand for instruction");
4448   }
4449 
4450   case Match_PreferE32:
4451     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4452                         "should be encoded as e32");
4453   case Match_MnemonicFail:
4454     llvm_unreachable("Invalid instructions should have been handled already");
4455   }
4456   llvm_unreachable("Implement any new match types added!");
4457 }
4458 
4459 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4460   int64_t Tmp = -1;
4461   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4462     return true;
4463   }
4464   if (getParser().parseAbsoluteExpression(Tmp)) {
4465     return true;
4466   }
4467   Ret = static_cast<uint32_t>(Tmp);
4468   return false;
4469 }
4470 
4471 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4472                                                uint32_t &Minor) {
4473   if (ParseAsAbsoluteExpression(Major))
4474     return TokError("invalid major version");
4475 
4476   if (!trySkipToken(AsmToken::Comma))
4477     return TokError("minor version number required, comma expected");
4478 
4479   if (ParseAsAbsoluteExpression(Minor))
4480     return TokError("invalid minor version");
4481 
4482   return false;
4483 }
4484 
4485 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4486   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4487     return TokError("directive only supported for amdgcn architecture");
4488 
4489   std::string TargetIDDirective;
4490   SMLoc TargetStart = getTok().getLoc();
4491   if (getParser().parseEscapedString(TargetIDDirective))
4492     return true;
4493 
4494   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4495   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4496     return getParser().Error(TargetRange.Start,
4497         (Twine(".amdgcn_target directive's target id ") +
4498          Twine(TargetIDDirective) +
4499          Twine(" does not match the specified target id ") +
4500          Twine(getTargetStreamer().getTargetID()->toString())).str());
4501 
4502   return false;
4503 }
4504 
4505 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4506   return Error(Range.Start, "value out of range", Range);
4507 }
4508 
4509 bool AMDGPUAsmParser::calculateGPRBlocks(
4510     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4511     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4512     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4513     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4514   // TODO(scott.linder): These calculations are duplicated from
4515   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4516   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4517 
4518   unsigned NumVGPRs = NextFreeVGPR;
4519   unsigned NumSGPRs = NextFreeSGPR;
4520 
4521   if (Version.Major >= 10)
4522     NumSGPRs = 0;
4523   else {
4524     unsigned MaxAddressableNumSGPRs =
4525         IsaInfo::getAddressableNumSGPRs(&getSTI());
4526 
4527     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4528         NumSGPRs > MaxAddressableNumSGPRs)
4529       return OutOfRangeError(SGPRRange);
4530 
4531     NumSGPRs +=
4532         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4533 
4534     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4535         NumSGPRs > MaxAddressableNumSGPRs)
4536       return OutOfRangeError(SGPRRange);
4537 
4538     if (Features.test(FeatureSGPRInitBug))
4539       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4540   }
4541 
4542   VGPRBlocks =
4543       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4544   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4545 
4546   return false;
4547 }
4548 
4549 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4550   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4551     return TokError("directive only supported for amdgcn architecture");
4552 
4553   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4554     return TokError("directive only supported for amdhsa OS");
4555 
4556   StringRef KernelName;
4557   if (getParser().parseIdentifier(KernelName))
4558     return true;
4559 
4560   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4561 
4562   StringSet<> Seen;
4563 
4564   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4565 
4566   SMRange VGPRRange;
4567   uint64_t NextFreeVGPR = 0;
4568   uint64_t AccumOffset = 0;
4569   SMRange SGPRRange;
4570   uint64_t NextFreeSGPR = 0;
4571   unsigned UserSGPRCount = 0;
4572   bool ReserveVCC = true;
4573   bool ReserveFlatScr = true;
4574   Optional<bool> EnableWavefrontSize32;
4575 
4576   while (true) {
4577     while (trySkipToken(AsmToken::EndOfStatement));
4578 
4579     StringRef ID;
4580     SMRange IDRange = getTok().getLocRange();
4581     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4582       return true;
4583 
4584     if (ID == ".end_amdhsa_kernel")
4585       break;
4586 
4587     if (Seen.find(ID) != Seen.end())
4588       return TokError(".amdhsa_ directives cannot be repeated");
4589     Seen.insert(ID);
4590 
4591     SMLoc ValStart = getLoc();
4592     int64_t IVal;
4593     if (getParser().parseAbsoluteExpression(IVal))
4594       return true;
4595     SMLoc ValEnd = getLoc();
4596     SMRange ValRange = SMRange(ValStart, ValEnd);
4597 
4598     if (IVal < 0)
4599       return OutOfRangeError(ValRange);
4600 
4601     uint64_t Val = IVal;
4602 
4603 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4604   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4605     return OutOfRangeError(RANGE);                                             \
4606   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4607 
4608     if (ID == ".amdhsa_group_segment_fixed_size") {
4609       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4610         return OutOfRangeError(ValRange);
4611       KD.group_segment_fixed_size = Val;
4612     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4613       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4614         return OutOfRangeError(ValRange);
4615       KD.private_segment_fixed_size = Val;
4616     } else if (ID == ".amdhsa_kernarg_size") {
4617       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4618         return OutOfRangeError(ValRange);
4619       KD.kernarg_size = Val;
4620     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4621       if (hasArchitectedFlatScratch())
4622         return Error(IDRange.Start,
4623                      "directive is not supported with architected flat scratch",
4624                      IDRange);
4625       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4626                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4627                        Val, ValRange);
4628       if (Val)
4629         UserSGPRCount += 4;
4630     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4631       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4632                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4633                        ValRange);
4634       if (Val)
4635         UserSGPRCount += 2;
4636     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4637       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4638                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4639                        ValRange);
4640       if (Val)
4641         UserSGPRCount += 2;
4642     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4643       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4644                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4645                        Val, ValRange);
4646       if (Val)
4647         UserSGPRCount += 2;
4648     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4649       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4650                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4651                        ValRange);
4652       if (Val)
4653         UserSGPRCount += 2;
4654     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4655       if (hasArchitectedFlatScratch())
4656         return Error(IDRange.Start,
4657                      "directive is not supported with architected flat scratch",
4658                      IDRange);
4659       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4660                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4661                        ValRange);
4662       if (Val)
4663         UserSGPRCount += 2;
4664     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4665       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4666                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4667                        Val, ValRange);
4668       if (Val)
4669         UserSGPRCount += 1;
4670     } else if (ID == ".amdhsa_wavefront_size32") {
4671       if (IVersion.Major < 10)
4672         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4673       EnableWavefrontSize32 = Val;
4674       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4675                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4676                        Val, ValRange);
4677     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4678       if (hasArchitectedFlatScratch())
4679         return Error(IDRange.Start,
4680                      "directive is not supported with architected flat scratch",
4681                      IDRange);
4682       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4683                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4684     } else if (ID == ".amdhsa_enable_private_segment") {
4685       if (!hasArchitectedFlatScratch())
4686         return Error(
4687             IDRange.Start,
4688             "directive is not supported without architected flat scratch",
4689             IDRange);
4690       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4691                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4692     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4693       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4694                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4695                        ValRange);
4696     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4697       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4698                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4699                        ValRange);
4700     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4701       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4702                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4703                        ValRange);
4704     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4705       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4706                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4707                        ValRange);
4708     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4709       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4710                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4711                        ValRange);
4712     } else if (ID == ".amdhsa_next_free_vgpr") {
4713       VGPRRange = ValRange;
4714       NextFreeVGPR = Val;
4715     } else if (ID == ".amdhsa_next_free_sgpr") {
4716       SGPRRange = ValRange;
4717       NextFreeSGPR = Val;
4718     } else if (ID == ".amdhsa_accum_offset") {
4719       if (!isGFX90A())
4720         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4721       AccumOffset = Val;
4722     } else if (ID == ".amdhsa_reserve_vcc") {
4723       if (!isUInt<1>(Val))
4724         return OutOfRangeError(ValRange);
4725       ReserveVCC = Val;
4726     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4727       if (IVersion.Major < 7)
4728         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4729       if (hasArchitectedFlatScratch())
4730         return Error(IDRange.Start,
4731                      "directive is not supported with architected flat scratch",
4732                      IDRange);
4733       if (!isUInt<1>(Val))
4734         return OutOfRangeError(ValRange);
4735       ReserveFlatScr = Val;
4736     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4737       if (IVersion.Major < 8)
4738         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4739       if (!isUInt<1>(Val))
4740         return OutOfRangeError(ValRange);
4741       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4742         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4743                                  IDRange);
4744     } else if (ID == ".amdhsa_float_round_mode_32") {
4745       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4746                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4747     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4748       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4749                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4750     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4751       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4752                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4753     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4754       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4755                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4756                        ValRange);
4757     } else if (ID == ".amdhsa_dx10_clamp") {
4758       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4759                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4760     } else if (ID == ".amdhsa_ieee_mode") {
4761       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4762                        Val, ValRange);
4763     } else if (ID == ".amdhsa_fp16_overflow") {
4764       if (IVersion.Major < 9)
4765         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4766       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4767                        ValRange);
4768     } else if (ID == ".amdhsa_tg_split") {
4769       if (!isGFX90A())
4770         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4771       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4772                        ValRange);
4773     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4774       if (IVersion.Major < 10)
4775         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4776       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4777                        ValRange);
4778     } else if (ID == ".amdhsa_memory_ordered") {
4779       if (IVersion.Major < 10)
4780         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4781       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4782                        ValRange);
4783     } else if (ID == ".amdhsa_forward_progress") {
4784       if (IVersion.Major < 10)
4785         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4786       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4787                        ValRange);
4788     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4789       PARSE_BITS_ENTRY(
4790           KD.compute_pgm_rsrc2,
4791           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4792           ValRange);
4793     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4794       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4795                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4796                        Val, ValRange);
4797     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4798       PARSE_BITS_ENTRY(
4799           KD.compute_pgm_rsrc2,
4800           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4801           ValRange);
4802     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4803       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4804                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4805                        Val, ValRange);
4806     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4807       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4808                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4809                        Val, ValRange);
4810     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4811       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4812                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4813                        Val, ValRange);
4814     } else if (ID == ".amdhsa_exception_int_div_zero") {
4815       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4816                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4817                        Val, ValRange);
4818     } else {
4819       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4820     }
4821 
4822 #undef PARSE_BITS_ENTRY
4823   }
4824 
4825   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4826     return TokError(".amdhsa_next_free_vgpr directive is required");
4827 
4828   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4829     return TokError(".amdhsa_next_free_sgpr directive is required");
4830 
4831   unsigned VGPRBlocks;
4832   unsigned SGPRBlocks;
4833   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4834                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4835                          EnableWavefrontSize32, NextFreeVGPR,
4836                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4837                          SGPRBlocks))
4838     return true;
4839 
4840   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4841           VGPRBlocks))
4842     return OutOfRangeError(VGPRRange);
4843   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4844                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4845 
4846   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4847           SGPRBlocks))
4848     return OutOfRangeError(SGPRRange);
4849   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4850                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4851                   SGPRBlocks);
4852 
4853   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4854     return TokError("too many user SGPRs enabled");
4855   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4856                   UserSGPRCount);
4857 
4858   if (isGFX90A()) {
4859     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4860       return TokError(".amdhsa_accum_offset directive is required");
4861     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4862       return TokError("accum_offset should be in range [4..256] in "
4863                       "increments of 4");
4864     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4865       return TokError("accum_offset exceeds total VGPR allocation");
4866     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4867                     (AccumOffset / 4 - 1));
4868   }
4869 
4870   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4871       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4872       ReserveFlatScr);
4873   return false;
4874 }
4875 
4876 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4877   uint32_t Major;
4878   uint32_t Minor;
4879 
4880   if (ParseDirectiveMajorMinor(Major, Minor))
4881     return true;
4882 
4883   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4884   return false;
4885 }
4886 
4887 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4888   uint32_t Major;
4889   uint32_t Minor;
4890   uint32_t Stepping;
4891   StringRef VendorName;
4892   StringRef ArchName;
4893 
4894   // If this directive has no arguments, then use the ISA version for the
4895   // targeted GPU.
4896   if (isToken(AsmToken::EndOfStatement)) {
4897     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4898     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4899                                                         ISA.Stepping,
4900                                                         "AMD", "AMDGPU");
4901     return false;
4902   }
4903 
4904   if (ParseDirectiveMajorMinor(Major, Minor))
4905     return true;
4906 
4907   if (!trySkipToken(AsmToken::Comma))
4908     return TokError("stepping version number required, comma expected");
4909 
4910   if (ParseAsAbsoluteExpression(Stepping))
4911     return TokError("invalid stepping version");
4912 
4913   if (!trySkipToken(AsmToken::Comma))
4914     return TokError("vendor name required, comma expected");
4915 
4916   if (!parseString(VendorName, "invalid vendor name"))
4917     return true;
4918 
4919   if (!trySkipToken(AsmToken::Comma))
4920     return TokError("arch name required, comma expected");
4921 
4922   if (!parseString(ArchName, "invalid arch name"))
4923     return true;
4924 
4925   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4926                                                       VendorName, ArchName);
4927   return false;
4928 }
4929 
4930 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4931                                                amd_kernel_code_t &Header) {
4932   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4933   // assembly for backwards compatibility.
4934   if (ID == "max_scratch_backing_memory_byte_size") {
4935     Parser.eatToEndOfStatement();
4936     return false;
4937   }
4938 
4939   SmallString<40> ErrStr;
4940   raw_svector_ostream Err(ErrStr);
4941   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4942     return TokError(Err.str());
4943   }
4944   Lex();
4945 
4946   if (ID == "enable_wavefront_size32") {
4947     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4948       if (!isGFX10Plus())
4949         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4950       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4951         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4952     } else {
4953       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4954         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4955     }
4956   }
4957 
4958   if (ID == "wavefront_size") {
4959     if (Header.wavefront_size == 5) {
4960       if (!isGFX10Plus())
4961         return TokError("wavefront_size=5 is only allowed on GFX10+");
4962       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4963         return TokError("wavefront_size=5 requires +WavefrontSize32");
4964     } else if (Header.wavefront_size == 6) {
4965       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4966         return TokError("wavefront_size=6 requires +WavefrontSize64");
4967     }
4968   }
4969 
4970   if (ID == "enable_wgp_mode") {
4971     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4972         !isGFX10Plus())
4973       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4974   }
4975 
4976   if (ID == "enable_mem_ordered") {
4977     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4978         !isGFX10Plus())
4979       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4980   }
4981 
4982   if (ID == "enable_fwd_progress") {
4983     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4984         !isGFX10Plus())
4985       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4986   }
4987 
4988   return false;
4989 }
4990 
4991 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4992   amd_kernel_code_t Header;
4993   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4994 
4995   while (true) {
4996     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4997     // will set the current token to EndOfStatement.
4998     while(trySkipToken(AsmToken::EndOfStatement));
4999 
5000     StringRef ID;
5001     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5002       return true;
5003 
5004     if (ID == ".end_amd_kernel_code_t")
5005       break;
5006 
5007     if (ParseAMDKernelCodeTValue(ID, Header))
5008       return true;
5009   }
5010 
5011   getTargetStreamer().EmitAMDKernelCodeT(Header);
5012 
5013   return false;
5014 }
5015 
5016 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5017   StringRef KernelName;
5018   if (!parseId(KernelName, "expected symbol name"))
5019     return true;
5020 
5021   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5022                                            ELF::STT_AMDGPU_HSA_KERNEL);
5023 
5024   KernelScope.initialize(getContext());
5025   return false;
5026 }
5027 
5028 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5029   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5030     return Error(getLoc(),
5031                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5032                  "architectures");
5033   }
5034 
5035   auto TargetIDDirective = getLexer().getTok().getStringContents();
5036   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5037     return Error(getParser().getTok().getLoc(), "target id must match options");
5038 
5039   getTargetStreamer().EmitISAVersion();
5040   Lex();
5041 
5042   return false;
5043 }
5044 
5045 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5046   const char *AssemblerDirectiveBegin;
5047   const char *AssemblerDirectiveEnd;
5048   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5049       isHsaAbiVersion3Or4(&getSTI())
5050           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5051                             HSAMD::V3::AssemblerDirectiveEnd)
5052           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5053                             HSAMD::AssemblerDirectiveEnd);
5054 
5055   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5056     return Error(getLoc(),
5057                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5058                  "not available on non-amdhsa OSes")).str());
5059   }
5060 
5061   std::string HSAMetadataString;
5062   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5063                           HSAMetadataString))
5064     return true;
5065 
5066   if (isHsaAbiVersion3Or4(&getSTI())) {
5067     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5068       return Error(getLoc(), "invalid HSA metadata");
5069   } else {
5070     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5071       return Error(getLoc(), "invalid HSA metadata");
5072   }
5073 
5074   return false;
5075 }
5076 
5077 /// Common code to parse out a block of text (typically YAML) between start and
5078 /// end directives.
5079 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5080                                           const char *AssemblerDirectiveEnd,
5081                                           std::string &CollectString) {
5082 
5083   raw_string_ostream CollectStream(CollectString);
5084 
5085   getLexer().setSkipSpace(false);
5086 
5087   bool FoundEnd = false;
5088   while (!isToken(AsmToken::Eof)) {
5089     while (isToken(AsmToken::Space)) {
5090       CollectStream << getTokenStr();
5091       Lex();
5092     }
5093 
5094     if (trySkipId(AssemblerDirectiveEnd)) {
5095       FoundEnd = true;
5096       break;
5097     }
5098 
5099     CollectStream << Parser.parseStringToEndOfStatement()
5100                   << getContext().getAsmInfo()->getSeparatorString();
5101 
5102     Parser.eatToEndOfStatement();
5103   }
5104 
5105   getLexer().setSkipSpace(true);
5106 
5107   if (isToken(AsmToken::Eof) && !FoundEnd) {
5108     return TokError(Twine("expected directive ") +
5109                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5110   }
5111 
5112   CollectStream.flush();
5113   return false;
5114 }
5115 
5116 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5117 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5118   std::string String;
5119   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5120                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5121     return true;
5122 
5123   auto PALMetadata = getTargetStreamer().getPALMetadata();
5124   if (!PALMetadata->setFromString(String))
5125     return Error(getLoc(), "invalid PAL metadata");
5126   return false;
5127 }
5128 
5129 /// Parse the assembler directive for old linear-format PAL metadata.
5130 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5131   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5132     return Error(getLoc(),
5133                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5134                  "not available on non-amdpal OSes")).str());
5135   }
5136 
5137   auto PALMetadata = getTargetStreamer().getPALMetadata();
5138   PALMetadata->setLegacy();
5139   for (;;) {
5140     uint32_t Key, Value;
5141     if (ParseAsAbsoluteExpression(Key)) {
5142       return TokError(Twine("invalid value in ") +
5143                       Twine(PALMD::AssemblerDirective));
5144     }
5145     if (!trySkipToken(AsmToken::Comma)) {
5146       return TokError(Twine("expected an even number of values in ") +
5147                       Twine(PALMD::AssemblerDirective));
5148     }
5149     if (ParseAsAbsoluteExpression(Value)) {
5150       return TokError(Twine("invalid value in ") +
5151                       Twine(PALMD::AssemblerDirective));
5152     }
5153     PALMetadata->setRegister(Key, Value);
5154     if (!trySkipToken(AsmToken::Comma))
5155       break;
5156   }
5157   return false;
5158 }
5159 
5160 /// ParseDirectiveAMDGPULDS
5161 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5162 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5163   if (getParser().checkForValidSection())
5164     return true;
5165 
5166   StringRef Name;
5167   SMLoc NameLoc = getLoc();
5168   if (getParser().parseIdentifier(Name))
5169     return TokError("expected identifier in directive");
5170 
5171   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5172   if (parseToken(AsmToken::Comma, "expected ','"))
5173     return true;
5174 
5175   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5176 
5177   int64_t Size;
5178   SMLoc SizeLoc = getLoc();
5179   if (getParser().parseAbsoluteExpression(Size))
5180     return true;
5181   if (Size < 0)
5182     return Error(SizeLoc, "size must be non-negative");
5183   if (Size > LocalMemorySize)
5184     return Error(SizeLoc, "size is too large");
5185 
5186   int64_t Alignment = 4;
5187   if (trySkipToken(AsmToken::Comma)) {
5188     SMLoc AlignLoc = getLoc();
5189     if (getParser().parseAbsoluteExpression(Alignment))
5190       return true;
5191     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5192       return Error(AlignLoc, "alignment must be a power of two");
5193 
5194     // Alignment larger than the size of LDS is possible in theory, as long
5195     // as the linker manages to place to symbol at address 0, but we do want
5196     // to make sure the alignment fits nicely into a 32-bit integer.
5197     if (Alignment >= 1u << 31)
5198       return Error(AlignLoc, "alignment is too large");
5199   }
5200 
5201   if (parseToken(AsmToken::EndOfStatement,
5202                  "unexpected token in '.amdgpu_lds' directive"))
5203     return true;
5204 
5205   Symbol->redefineIfPossible();
5206   if (!Symbol->isUndefined())
5207     return Error(NameLoc, "invalid symbol redefinition");
5208 
5209   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5210   return false;
5211 }
5212 
5213 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5214   StringRef IDVal = DirectiveID.getString();
5215 
5216   if (isHsaAbiVersion3Or4(&getSTI())) {
5217     if (IDVal == ".amdhsa_kernel")
5218      return ParseDirectiveAMDHSAKernel();
5219 
5220     // TODO: Restructure/combine with PAL metadata directive.
5221     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5222       return ParseDirectiveHSAMetadata();
5223   } else {
5224     if (IDVal == ".hsa_code_object_version")
5225       return ParseDirectiveHSACodeObjectVersion();
5226 
5227     if (IDVal == ".hsa_code_object_isa")
5228       return ParseDirectiveHSACodeObjectISA();
5229 
5230     if (IDVal == ".amd_kernel_code_t")
5231       return ParseDirectiveAMDKernelCodeT();
5232 
5233     if (IDVal == ".amdgpu_hsa_kernel")
5234       return ParseDirectiveAMDGPUHsaKernel();
5235 
5236     if (IDVal == ".amd_amdgpu_isa")
5237       return ParseDirectiveISAVersion();
5238 
5239     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5240       return ParseDirectiveHSAMetadata();
5241   }
5242 
5243   if (IDVal == ".amdgcn_target")
5244     return ParseDirectiveAMDGCNTarget();
5245 
5246   if (IDVal == ".amdgpu_lds")
5247     return ParseDirectiveAMDGPULDS();
5248 
5249   if (IDVal == PALMD::AssemblerDirectiveBegin)
5250     return ParseDirectivePALMetadataBegin();
5251 
5252   if (IDVal == PALMD::AssemblerDirective)
5253     return ParseDirectivePALMetadata();
5254 
5255   return true;
5256 }
5257 
5258 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5259                                            unsigned RegNo) {
5260 
5261   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5262        R.isValid(); ++R) {
5263     if (*R == RegNo)
5264       return isGFX9Plus();
5265   }
5266 
5267   // GFX10 has 2 more SGPRs 104 and 105.
5268   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5269        R.isValid(); ++R) {
5270     if (*R == RegNo)
5271       return hasSGPR104_SGPR105();
5272   }
5273 
5274   switch (RegNo) {
5275   case AMDGPU::SRC_SHARED_BASE:
5276   case AMDGPU::SRC_SHARED_LIMIT:
5277   case AMDGPU::SRC_PRIVATE_BASE:
5278   case AMDGPU::SRC_PRIVATE_LIMIT:
5279   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5280     return isGFX9Plus();
5281   case AMDGPU::TBA:
5282   case AMDGPU::TBA_LO:
5283   case AMDGPU::TBA_HI:
5284   case AMDGPU::TMA:
5285   case AMDGPU::TMA_LO:
5286   case AMDGPU::TMA_HI:
5287     return !isGFX9Plus();
5288   case AMDGPU::XNACK_MASK:
5289   case AMDGPU::XNACK_MASK_LO:
5290   case AMDGPU::XNACK_MASK_HI:
5291     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5292   case AMDGPU::SGPR_NULL:
5293     return isGFX10Plus();
5294   default:
5295     break;
5296   }
5297 
5298   if (isCI())
5299     return true;
5300 
5301   if (isSI() || isGFX10Plus()) {
5302     // No flat_scr on SI.
5303     // On GFX10 flat scratch is not a valid register operand and can only be
5304     // accessed with s_setreg/s_getreg.
5305     switch (RegNo) {
5306     case AMDGPU::FLAT_SCR:
5307     case AMDGPU::FLAT_SCR_LO:
5308     case AMDGPU::FLAT_SCR_HI:
5309       return false;
5310     default:
5311       return true;
5312     }
5313   }
5314 
5315   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5316   // SI/CI have.
5317   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5318        R.isValid(); ++R) {
5319     if (*R == RegNo)
5320       return hasSGPR102_SGPR103();
5321   }
5322 
5323   return true;
5324 }
5325 
5326 OperandMatchResultTy
5327 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5328                               OperandMode Mode) {
5329   // Try to parse with a custom parser
5330   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5331 
5332   // If we successfully parsed the operand or if there as an error parsing,
5333   // we are done.
5334   //
5335   // If we are parsing after we reach EndOfStatement then this means we
5336   // are appending default values to the Operands list.  This is only done
5337   // by custom parser, so we shouldn't continue on to the generic parsing.
5338   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5339       isToken(AsmToken::EndOfStatement))
5340     return ResTy;
5341 
5342   SMLoc RBraceLoc;
5343   SMLoc LBraceLoc = getLoc();
5344   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5345     unsigned Prefix = Operands.size();
5346 
5347     for (;;) {
5348       auto Loc = getLoc();
5349       ResTy = parseReg(Operands);
5350       if (ResTy == MatchOperand_NoMatch)
5351         Error(Loc, "expected a register");
5352       if (ResTy != MatchOperand_Success)
5353         return MatchOperand_ParseFail;
5354 
5355       RBraceLoc = getLoc();
5356       if (trySkipToken(AsmToken::RBrac))
5357         break;
5358 
5359       if (!skipToken(AsmToken::Comma,
5360                      "expected a comma or a closing square bracket")) {
5361         return MatchOperand_ParseFail;
5362       }
5363     }
5364 
5365     if (Operands.size() - Prefix > 1) {
5366       Operands.insert(Operands.begin() + Prefix,
5367                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5368       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5369     }
5370 
5371     return MatchOperand_Success;
5372   }
5373 
5374   return parseRegOrImm(Operands);
5375 }
5376 
5377 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5378   // Clear any forced encodings from the previous instruction.
5379   setForcedEncodingSize(0);
5380   setForcedDPP(false);
5381   setForcedSDWA(false);
5382 
5383   if (Name.endswith("_e64")) {
5384     setForcedEncodingSize(64);
5385     return Name.substr(0, Name.size() - 4);
5386   } else if (Name.endswith("_e32")) {
5387     setForcedEncodingSize(32);
5388     return Name.substr(0, Name.size() - 4);
5389   } else if (Name.endswith("_dpp")) {
5390     setForcedDPP(true);
5391     return Name.substr(0, Name.size() - 4);
5392   } else if (Name.endswith("_sdwa")) {
5393     setForcedSDWA(true);
5394     return Name.substr(0, Name.size() - 5);
5395   }
5396   return Name;
5397 }
5398 
5399 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5400                                        StringRef Name,
5401                                        SMLoc NameLoc, OperandVector &Operands) {
5402   // Add the instruction mnemonic
5403   Name = parseMnemonicSuffix(Name);
5404   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5405 
5406   bool IsMIMG = Name.startswith("image_");
5407 
5408   while (!trySkipToken(AsmToken::EndOfStatement)) {
5409     OperandMode Mode = OperandMode_Default;
5410     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5411       Mode = OperandMode_NSA;
5412     CPolSeen = 0;
5413     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5414 
5415     if (Res != MatchOperand_Success) {
5416       checkUnsupportedInstruction(Name, NameLoc);
5417       if (!Parser.hasPendingError()) {
5418         // FIXME: use real operand location rather than the current location.
5419         StringRef Msg =
5420           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5421                                             "not a valid operand.";
5422         Error(getLoc(), Msg);
5423       }
5424       while (!trySkipToken(AsmToken::EndOfStatement)) {
5425         lex();
5426       }
5427       return true;
5428     }
5429 
5430     // Eat the comma or space if there is one.
5431     trySkipToken(AsmToken::Comma);
5432   }
5433 
5434   return false;
5435 }
5436 
5437 //===----------------------------------------------------------------------===//
5438 // Utility functions
5439 //===----------------------------------------------------------------------===//
5440 
5441 OperandMatchResultTy
5442 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5443 
5444   if (!trySkipId(Prefix, AsmToken::Colon))
5445     return MatchOperand_NoMatch;
5446 
5447   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5448 }
5449 
5450 OperandMatchResultTy
5451 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5452                                     AMDGPUOperand::ImmTy ImmTy,
5453                                     bool (*ConvertResult)(int64_t&)) {
5454   SMLoc S = getLoc();
5455   int64_t Value = 0;
5456 
5457   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5458   if (Res != MatchOperand_Success)
5459     return Res;
5460 
5461   if (ConvertResult && !ConvertResult(Value)) {
5462     Error(S, "invalid " + StringRef(Prefix) + " value.");
5463   }
5464 
5465   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5466   return MatchOperand_Success;
5467 }
5468 
5469 OperandMatchResultTy
5470 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5471                                              OperandVector &Operands,
5472                                              AMDGPUOperand::ImmTy ImmTy,
5473                                              bool (*ConvertResult)(int64_t&)) {
5474   SMLoc S = getLoc();
5475   if (!trySkipId(Prefix, AsmToken::Colon))
5476     return MatchOperand_NoMatch;
5477 
5478   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5479     return MatchOperand_ParseFail;
5480 
5481   unsigned Val = 0;
5482   const unsigned MaxSize = 4;
5483 
5484   // FIXME: How to verify the number of elements matches the number of src
5485   // operands?
5486   for (int I = 0; ; ++I) {
5487     int64_t Op;
5488     SMLoc Loc = getLoc();
5489     if (!parseExpr(Op))
5490       return MatchOperand_ParseFail;
5491 
5492     if (Op != 0 && Op != 1) {
5493       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5494       return MatchOperand_ParseFail;
5495     }
5496 
5497     Val |= (Op << I);
5498 
5499     if (trySkipToken(AsmToken::RBrac))
5500       break;
5501 
5502     if (I + 1 == MaxSize) {
5503       Error(getLoc(), "expected a closing square bracket");
5504       return MatchOperand_ParseFail;
5505     }
5506 
5507     if (!skipToken(AsmToken::Comma, "expected a comma"))
5508       return MatchOperand_ParseFail;
5509   }
5510 
5511   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5512   return MatchOperand_Success;
5513 }
5514 
5515 OperandMatchResultTy
5516 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5517                                AMDGPUOperand::ImmTy ImmTy) {
5518   int64_t Bit;
5519   SMLoc S = getLoc();
5520 
5521   if (trySkipId(Name)) {
5522     Bit = 1;
5523   } else if (trySkipId("no", Name)) {
5524     Bit = 0;
5525   } else {
5526     return MatchOperand_NoMatch;
5527   }
5528 
5529   if (Name == "r128" && !hasMIMG_R128()) {
5530     Error(S, "r128 modifier is not supported on this GPU");
5531     return MatchOperand_ParseFail;
5532   }
5533   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5534     Error(S, "a16 modifier is not supported on this GPU");
5535     return MatchOperand_ParseFail;
5536   }
5537 
5538   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5539     ImmTy = AMDGPUOperand::ImmTyR128A16;
5540 
5541   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5542   return MatchOperand_Success;
5543 }
5544 
5545 OperandMatchResultTy
5546 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5547   unsigned CPolOn = 0;
5548   unsigned CPolOff = 0;
5549   SMLoc S = getLoc();
5550 
5551   if (trySkipId("glc"))
5552     CPolOn = AMDGPU::CPol::GLC;
5553   else if (trySkipId("noglc"))
5554     CPolOff = AMDGPU::CPol::GLC;
5555   else if (trySkipId("slc"))
5556     CPolOn = AMDGPU::CPol::SLC;
5557   else if (trySkipId("noslc"))
5558     CPolOff = AMDGPU::CPol::SLC;
5559   else if (trySkipId("dlc"))
5560     CPolOn = AMDGPU::CPol::DLC;
5561   else if (trySkipId("nodlc"))
5562     CPolOff = AMDGPU::CPol::DLC;
5563   else if (trySkipId("scc"))
5564     CPolOn = AMDGPU::CPol::SCC;
5565   else if (trySkipId("noscc"))
5566     CPolOff = AMDGPU::CPol::SCC;
5567   else
5568     return MatchOperand_NoMatch;
5569 
5570   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5571     Error(S, "dlc modifier is not supported on this GPU");
5572     return MatchOperand_ParseFail;
5573   }
5574 
5575   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5576     Error(S, "scc modifier is not supported on this GPU");
5577     return MatchOperand_ParseFail;
5578   }
5579 
5580   if (CPolSeen & (CPolOn | CPolOff)) {
5581     Error(S, "duplicate cache policy modifier");
5582     return MatchOperand_ParseFail;
5583   }
5584 
5585   CPolSeen |= (CPolOn | CPolOff);
5586 
5587   for (unsigned I = 1; I != Operands.size(); ++I) {
5588     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5589     if (Op.isCPol()) {
5590       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5591       return MatchOperand_Success;
5592     }
5593   }
5594 
5595   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5596                                               AMDGPUOperand::ImmTyCPol));
5597 
5598   return MatchOperand_Success;
5599 }
5600 
5601 static void addOptionalImmOperand(
5602   MCInst& Inst, const OperandVector& Operands,
5603   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5604   AMDGPUOperand::ImmTy ImmT,
5605   int64_t Default = 0) {
5606   auto i = OptionalIdx.find(ImmT);
5607   if (i != OptionalIdx.end()) {
5608     unsigned Idx = i->second;
5609     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5610   } else {
5611     Inst.addOperand(MCOperand::createImm(Default));
5612   }
5613 }
5614 
5615 OperandMatchResultTy
5616 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5617                                        StringRef &Value,
5618                                        SMLoc &StringLoc) {
5619   if (!trySkipId(Prefix, AsmToken::Colon))
5620     return MatchOperand_NoMatch;
5621 
5622   StringLoc = getLoc();
5623   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5624                                                   : MatchOperand_ParseFail;
5625 }
5626 
5627 //===----------------------------------------------------------------------===//
5628 // MTBUF format
5629 //===----------------------------------------------------------------------===//
5630 
5631 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5632                                   int64_t MaxVal,
5633                                   int64_t &Fmt) {
5634   int64_t Val;
5635   SMLoc Loc = getLoc();
5636 
5637   auto Res = parseIntWithPrefix(Pref, Val);
5638   if (Res == MatchOperand_ParseFail)
5639     return false;
5640   if (Res == MatchOperand_NoMatch)
5641     return true;
5642 
5643   if (Val < 0 || Val > MaxVal) {
5644     Error(Loc, Twine("out of range ", StringRef(Pref)));
5645     return false;
5646   }
5647 
5648   Fmt = Val;
5649   return true;
5650 }
5651 
5652 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5653 // values to live in a joint format operand in the MCInst encoding.
5654 OperandMatchResultTy
5655 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5656   using namespace llvm::AMDGPU::MTBUFFormat;
5657 
5658   int64_t Dfmt = DFMT_UNDEF;
5659   int64_t Nfmt = NFMT_UNDEF;
5660 
5661   // dfmt and nfmt can appear in either order, and each is optional.
5662   for (int I = 0; I < 2; ++I) {
5663     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5664       return MatchOperand_ParseFail;
5665 
5666     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5667       return MatchOperand_ParseFail;
5668     }
5669     // Skip optional comma between dfmt/nfmt
5670     // but guard against 2 commas following each other.
5671     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5672         !peekToken().is(AsmToken::Comma)) {
5673       trySkipToken(AsmToken::Comma);
5674     }
5675   }
5676 
5677   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5678     return MatchOperand_NoMatch;
5679 
5680   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5681   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5682 
5683   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5684   return MatchOperand_Success;
5685 }
5686 
5687 OperandMatchResultTy
5688 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5689   using namespace llvm::AMDGPU::MTBUFFormat;
5690 
5691   int64_t Fmt = UFMT_UNDEF;
5692 
5693   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5694     return MatchOperand_ParseFail;
5695 
5696   if (Fmt == UFMT_UNDEF)
5697     return MatchOperand_NoMatch;
5698 
5699   Format = Fmt;
5700   return MatchOperand_Success;
5701 }
5702 
5703 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5704                                     int64_t &Nfmt,
5705                                     StringRef FormatStr,
5706                                     SMLoc Loc) {
5707   using namespace llvm::AMDGPU::MTBUFFormat;
5708   int64_t Format;
5709 
5710   Format = getDfmt(FormatStr);
5711   if (Format != DFMT_UNDEF) {
5712     Dfmt = Format;
5713     return true;
5714   }
5715 
5716   Format = getNfmt(FormatStr, getSTI());
5717   if (Format != NFMT_UNDEF) {
5718     Nfmt = Format;
5719     return true;
5720   }
5721 
5722   Error(Loc, "unsupported format");
5723   return false;
5724 }
5725 
5726 OperandMatchResultTy
5727 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5728                                           SMLoc FormatLoc,
5729                                           int64_t &Format) {
5730   using namespace llvm::AMDGPU::MTBUFFormat;
5731 
5732   int64_t Dfmt = DFMT_UNDEF;
5733   int64_t Nfmt = NFMT_UNDEF;
5734   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5735     return MatchOperand_ParseFail;
5736 
5737   if (trySkipToken(AsmToken::Comma)) {
5738     StringRef Str;
5739     SMLoc Loc = getLoc();
5740     if (!parseId(Str, "expected a format string") ||
5741         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5742       return MatchOperand_ParseFail;
5743     }
5744     if (Dfmt == DFMT_UNDEF) {
5745       Error(Loc, "duplicate numeric format");
5746       return MatchOperand_ParseFail;
5747     } else if (Nfmt == NFMT_UNDEF) {
5748       Error(Loc, "duplicate data format");
5749       return MatchOperand_ParseFail;
5750     }
5751   }
5752 
5753   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5754   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5755 
5756   if (isGFX10Plus()) {
5757     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5758     if (Ufmt == UFMT_UNDEF) {
5759       Error(FormatLoc, "unsupported format");
5760       return MatchOperand_ParseFail;
5761     }
5762     Format = Ufmt;
5763   } else {
5764     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5765   }
5766 
5767   return MatchOperand_Success;
5768 }
5769 
5770 OperandMatchResultTy
5771 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5772                                             SMLoc Loc,
5773                                             int64_t &Format) {
5774   using namespace llvm::AMDGPU::MTBUFFormat;
5775 
5776   auto Id = getUnifiedFormat(FormatStr);
5777   if (Id == UFMT_UNDEF)
5778     return MatchOperand_NoMatch;
5779 
5780   if (!isGFX10Plus()) {
5781     Error(Loc, "unified format is not supported on this GPU");
5782     return MatchOperand_ParseFail;
5783   }
5784 
5785   Format = Id;
5786   return MatchOperand_Success;
5787 }
5788 
5789 OperandMatchResultTy
5790 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5791   using namespace llvm::AMDGPU::MTBUFFormat;
5792   SMLoc Loc = getLoc();
5793 
5794   if (!parseExpr(Format))
5795     return MatchOperand_ParseFail;
5796   if (!isValidFormatEncoding(Format, getSTI())) {
5797     Error(Loc, "out of range format");
5798     return MatchOperand_ParseFail;
5799   }
5800 
5801   return MatchOperand_Success;
5802 }
5803 
5804 OperandMatchResultTy
5805 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5806   using namespace llvm::AMDGPU::MTBUFFormat;
5807 
5808   if (!trySkipId("format", AsmToken::Colon))
5809     return MatchOperand_NoMatch;
5810 
5811   if (trySkipToken(AsmToken::LBrac)) {
5812     StringRef FormatStr;
5813     SMLoc Loc = getLoc();
5814     if (!parseId(FormatStr, "expected a format string"))
5815       return MatchOperand_ParseFail;
5816 
5817     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5818     if (Res == MatchOperand_NoMatch)
5819       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5820     if (Res != MatchOperand_Success)
5821       return Res;
5822 
5823     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5824       return MatchOperand_ParseFail;
5825 
5826     return MatchOperand_Success;
5827   }
5828 
5829   return parseNumericFormat(Format);
5830 }
5831 
5832 OperandMatchResultTy
5833 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5834   using namespace llvm::AMDGPU::MTBUFFormat;
5835 
5836   int64_t Format = getDefaultFormatEncoding(getSTI());
5837   OperandMatchResultTy Res;
5838   SMLoc Loc = getLoc();
5839 
5840   // Parse legacy format syntax.
5841   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5842   if (Res == MatchOperand_ParseFail)
5843     return Res;
5844 
5845   bool FormatFound = (Res == MatchOperand_Success);
5846 
5847   Operands.push_back(
5848     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5849 
5850   if (FormatFound)
5851     trySkipToken(AsmToken::Comma);
5852 
5853   if (isToken(AsmToken::EndOfStatement)) {
5854     // We are expecting an soffset operand,
5855     // but let matcher handle the error.
5856     return MatchOperand_Success;
5857   }
5858 
5859   // Parse soffset.
5860   Res = parseRegOrImm(Operands);
5861   if (Res != MatchOperand_Success)
5862     return Res;
5863 
5864   trySkipToken(AsmToken::Comma);
5865 
5866   if (!FormatFound) {
5867     Res = parseSymbolicOrNumericFormat(Format);
5868     if (Res == MatchOperand_ParseFail)
5869       return Res;
5870     if (Res == MatchOperand_Success) {
5871       auto Size = Operands.size();
5872       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5873       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5874       Op.setImm(Format);
5875     }
5876     return MatchOperand_Success;
5877   }
5878 
5879   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5880     Error(getLoc(), "duplicate format");
5881     return MatchOperand_ParseFail;
5882   }
5883   return MatchOperand_Success;
5884 }
5885 
5886 //===----------------------------------------------------------------------===//
5887 // ds
5888 //===----------------------------------------------------------------------===//
5889 
5890 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5891                                     const OperandVector &Operands) {
5892   OptionalImmIndexMap OptionalIdx;
5893 
5894   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5895     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5896 
5897     // Add the register arguments
5898     if (Op.isReg()) {
5899       Op.addRegOperands(Inst, 1);
5900       continue;
5901     }
5902 
5903     // Handle optional arguments
5904     OptionalIdx[Op.getImmTy()] = i;
5905   }
5906 
5907   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5908   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5909   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5910 
5911   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5912 }
5913 
5914 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5915                                 bool IsGdsHardcoded) {
5916   OptionalImmIndexMap OptionalIdx;
5917 
5918   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5919     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5920 
5921     // Add the register arguments
5922     if (Op.isReg()) {
5923       Op.addRegOperands(Inst, 1);
5924       continue;
5925     }
5926 
5927     if (Op.isToken() && Op.getToken() == "gds") {
5928       IsGdsHardcoded = true;
5929       continue;
5930     }
5931 
5932     // Handle optional arguments
5933     OptionalIdx[Op.getImmTy()] = i;
5934   }
5935 
5936   AMDGPUOperand::ImmTy OffsetType =
5937     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5938      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5939      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5940                                                       AMDGPUOperand::ImmTyOffset;
5941 
5942   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5943 
5944   if (!IsGdsHardcoded) {
5945     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5946   }
5947   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5948 }
5949 
5950 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5951   OptionalImmIndexMap OptionalIdx;
5952 
5953   unsigned OperandIdx[4];
5954   unsigned EnMask = 0;
5955   int SrcIdx = 0;
5956 
5957   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5958     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5959 
5960     // Add the register arguments
5961     if (Op.isReg()) {
5962       assert(SrcIdx < 4);
5963       OperandIdx[SrcIdx] = Inst.size();
5964       Op.addRegOperands(Inst, 1);
5965       ++SrcIdx;
5966       continue;
5967     }
5968 
5969     if (Op.isOff()) {
5970       assert(SrcIdx < 4);
5971       OperandIdx[SrcIdx] = Inst.size();
5972       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5973       ++SrcIdx;
5974       continue;
5975     }
5976 
5977     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5978       Op.addImmOperands(Inst, 1);
5979       continue;
5980     }
5981 
5982     if (Op.isToken() && Op.getToken() == "done")
5983       continue;
5984 
5985     // Handle optional arguments
5986     OptionalIdx[Op.getImmTy()] = i;
5987   }
5988 
5989   assert(SrcIdx == 4);
5990 
5991   bool Compr = false;
5992   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5993     Compr = true;
5994     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5995     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5996     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5997   }
5998 
5999   for (auto i = 0; i < SrcIdx; ++i) {
6000     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6001       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6002     }
6003   }
6004 
6005   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6006   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6007 
6008   Inst.addOperand(MCOperand::createImm(EnMask));
6009 }
6010 
6011 //===----------------------------------------------------------------------===//
6012 // s_waitcnt
6013 //===----------------------------------------------------------------------===//
6014 
6015 static bool
6016 encodeCnt(
6017   const AMDGPU::IsaVersion ISA,
6018   int64_t &IntVal,
6019   int64_t CntVal,
6020   bool Saturate,
6021   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6022   unsigned (*decode)(const IsaVersion &Version, unsigned))
6023 {
6024   bool Failed = false;
6025 
6026   IntVal = encode(ISA, IntVal, CntVal);
6027   if (CntVal != decode(ISA, IntVal)) {
6028     if (Saturate) {
6029       IntVal = encode(ISA, IntVal, -1);
6030     } else {
6031       Failed = true;
6032     }
6033   }
6034   return Failed;
6035 }
6036 
6037 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6038 
6039   SMLoc CntLoc = getLoc();
6040   StringRef CntName = getTokenStr();
6041 
6042   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6043       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6044     return false;
6045 
6046   int64_t CntVal;
6047   SMLoc ValLoc = getLoc();
6048   if (!parseExpr(CntVal))
6049     return false;
6050 
6051   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6052 
6053   bool Failed = true;
6054   bool Sat = CntName.endswith("_sat");
6055 
6056   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6057     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6058   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6059     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6060   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6061     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6062   } else {
6063     Error(CntLoc, "invalid counter name " + CntName);
6064     return false;
6065   }
6066 
6067   if (Failed) {
6068     Error(ValLoc, "too large value for " + CntName);
6069     return false;
6070   }
6071 
6072   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6073     return false;
6074 
6075   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6076     if (isToken(AsmToken::EndOfStatement)) {
6077       Error(getLoc(), "expected a counter name");
6078       return false;
6079     }
6080   }
6081 
6082   return true;
6083 }
6084 
6085 OperandMatchResultTy
6086 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6087   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6088   int64_t Waitcnt = getWaitcntBitMask(ISA);
6089   SMLoc S = getLoc();
6090 
6091   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6092     while (!isToken(AsmToken::EndOfStatement)) {
6093       if (!parseCnt(Waitcnt))
6094         return MatchOperand_ParseFail;
6095     }
6096   } else {
6097     if (!parseExpr(Waitcnt))
6098       return MatchOperand_ParseFail;
6099   }
6100 
6101   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6102   return MatchOperand_Success;
6103 }
6104 
6105 bool
6106 AMDGPUOperand::isSWaitCnt() const {
6107   return isImm();
6108 }
6109 
6110 //===----------------------------------------------------------------------===//
6111 // hwreg
6112 //===----------------------------------------------------------------------===//
6113 
6114 bool
6115 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6116                                 OperandInfoTy &Offset,
6117                                 OperandInfoTy &Width) {
6118   using namespace llvm::AMDGPU::Hwreg;
6119 
6120   // The register may be specified by name or using a numeric code
6121   HwReg.Loc = getLoc();
6122   if (isToken(AsmToken::Identifier) &&
6123       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6124     HwReg.IsSymbolic = true;
6125     lex(); // skip register name
6126   } else if (!parseExpr(HwReg.Id, "a register name")) {
6127     return false;
6128   }
6129 
6130   if (trySkipToken(AsmToken::RParen))
6131     return true;
6132 
6133   // parse optional params
6134   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6135     return false;
6136 
6137   Offset.Loc = getLoc();
6138   if (!parseExpr(Offset.Id))
6139     return false;
6140 
6141   if (!skipToken(AsmToken::Comma, "expected a comma"))
6142     return false;
6143 
6144   Width.Loc = getLoc();
6145   return parseExpr(Width.Id) &&
6146          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6147 }
6148 
6149 bool
6150 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6151                                const OperandInfoTy &Offset,
6152                                const OperandInfoTy &Width) {
6153 
6154   using namespace llvm::AMDGPU::Hwreg;
6155 
6156   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6157     Error(HwReg.Loc,
6158           "specified hardware register is not supported on this GPU");
6159     return false;
6160   }
6161   if (!isValidHwreg(HwReg.Id)) {
6162     Error(HwReg.Loc,
6163           "invalid code of hardware register: only 6-bit values are legal");
6164     return false;
6165   }
6166   if (!isValidHwregOffset(Offset.Id)) {
6167     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6168     return false;
6169   }
6170   if (!isValidHwregWidth(Width.Id)) {
6171     Error(Width.Loc,
6172           "invalid bitfield width: only values from 1 to 32 are legal");
6173     return false;
6174   }
6175   return true;
6176 }
6177 
6178 OperandMatchResultTy
6179 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6180   using namespace llvm::AMDGPU::Hwreg;
6181 
6182   int64_t ImmVal = 0;
6183   SMLoc Loc = getLoc();
6184 
6185   if (trySkipId("hwreg", AsmToken::LParen)) {
6186     OperandInfoTy HwReg(ID_UNKNOWN_);
6187     OperandInfoTy Offset(OFFSET_DEFAULT_);
6188     OperandInfoTy Width(WIDTH_DEFAULT_);
6189     if (parseHwregBody(HwReg, Offset, Width) &&
6190         validateHwreg(HwReg, Offset, Width)) {
6191       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6192     } else {
6193       return MatchOperand_ParseFail;
6194     }
6195   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6196     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6197       Error(Loc, "invalid immediate: only 16-bit values are legal");
6198       return MatchOperand_ParseFail;
6199     }
6200   } else {
6201     return MatchOperand_ParseFail;
6202   }
6203 
6204   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6205   return MatchOperand_Success;
6206 }
6207 
6208 bool AMDGPUOperand::isHwreg() const {
6209   return isImmTy(ImmTyHwreg);
6210 }
6211 
6212 //===----------------------------------------------------------------------===//
6213 // sendmsg
6214 //===----------------------------------------------------------------------===//
6215 
6216 bool
6217 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6218                                   OperandInfoTy &Op,
6219                                   OperandInfoTy &Stream) {
6220   using namespace llvm::AMDGPU::SendMsg;
6221 
6222   Msg.Loc = getLoc();
6223   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6224     Msg.IsSymbolic = true;
6225     lex(); // skip message name
6226   } else if (!parseExpr(Msg.Id, "a message name")) {
6227     return false;
6228   }
6229 
6230   if (trySkipToken(AsmToken::Comma)) {
6231     Op.IsDefined = true;
6232     Op.Loc = getLoc();
6233     if (isToken(AsmToken::Identifier) &&
6234         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6235       lex(); // skip operation name
6236     } else if (!parseExpr(Op.Id, "an operation name")) {
6237       return false;
6238     }
6239 
6240     if (trySkipToken(AsmToken::Comma)) {
6241       Stream.IsDefined = true;
6242       Stream.Loc = getLoc();
6243       if (!parseExpr(Stream.Id))
6244         return false;
6245     }
6246   }
6247 
6248   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6249 }
6250 
6251 bool
6252 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6253                                  const OperandInfoTy &Op,
6254                                  const OperandInfoTy &Stream) {
6255   using namespace llvm::AMDGPU::SendMsg;
6256 
6257   // Validation strictness depends on whether message is specified
6258   // in a symbolc or in a numeric form. In the latter case
6259   // only encoding possibility is checked.
6260   bool Strict = Msg.IsSymbolic;
6261 
6262   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6263     Error(Msg.Loc, "invalid message id");
6264     return false;
6265   }
6266   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6267     if (Op.IsDefined) {
6268       Error(Op.Loc, "message does not support operations");
6269     } else {
6270       Error(Msg.Loc, "missing message operation");
6271     }
6272     return false;
6273   }
6274   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6275     Error(Op.Loc, "invalid operation id");
6276     return false;
6277   }
6278   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6279     Error(Stream.Loc, "message operation does not support streams");
6280     return false;
6281   }
6282   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6283     Error(Stream.Loc, "invalid message stream id");
6284     return false;
6285   }
6286   return true;
6287 }
6288 
6289 OperandMatchResultTy
6290 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6291   using namespace llvm::AMDGPU::SendMsg;
6292 
6293   int64_t ImmVal = 0;
6294   SMLoc Loc = getLoc();
6295 
6296   if (trySkipId("sendmsg", AsmToken::LParen)) {
6297     OperandInfoTy Msg(ID_UNKNOWN_);
6298     OperandInfoTy Op(OP_NONE_);
6299     OperandInfoTy Stream(STREAM_ID_NONE_);
6300     if (parseSendMsgBody(Msg, Op, Stream) &&
6301         validateSendMsg(Msg, Op, Stream)) {
6302       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6303     } else {
6304       return MatchOperand_ParseFail;
6305     }
6306   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6307     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6308       Error(Loc, "invalid immediate: only 16-bit values are legal");
6309       return MatchOperand_ParseFail;
6310     }
6311   } else {
6312     return MatchOperand_ParseFail;
6313   }
6314 
6315   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6316   return MatchOperand_Success;
6317 }
6318 
6319 bool AMDGPUOperand::isSendMsg() const {
6320   return isImmTy(ImmTySendMsg);
6321 }
6322 
6323 //===----------------------------------------------------------------------===//
6324 // v_interp
6325 //===----------------------------------------------------------------------===//
6326 
6327 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6328   StringRef Str;
6329   SMLoc S = getLoc();
6330 
6331   if (!parseId(Str))
6332     return MatchOperand_NoMatch;
6333 
6334   int Slot = StringSwitch<int>(Str)
6335     .Case("p10", 0)
6336     .Case("p20", 1)
6337     .Case("p0", 2)
6338     .Default(-1);
6339 
6340   if (Slot == -1) {
6341     Error(S, "invalid interpolation slot");
6342     return MatchOperand_ParseFail;
6343   }
6344 
6345   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6346                                               AMDGPUOperand::ImmTyInterpSlot));
6347   return MatchOperand_Success;
6348 }
6349 
6350 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6351   StringRef Str;
6352   SMLoc S = getLoc();
6353 
6354   if (!parseId(Str))
6355     return MatchOperand_NoMatch;
6356 
6357   if (!Str.startswith("attr")) {
6358     Error(S, "invalid interpolation attribute");
6359     return MatchOperand_ParseFail;
6360   }
6361 
6362   StringRef Chan = Str.take_back(2);
6363   int AttrChan = StringSwitch<int>(Chan)
6364     .Case(".x", 0)
6365     .Case(".y", 1)
6366     .Case(".z", 2)
6367     .Case(".w", 3)
6368     .Default(-1);
6369   if (AttrChan == -1) {
6370     Error(S, "invalid or missing interpolation attribute channel");
6371     return MatchOperand_ParseFail;
6372   }
6373 
6374   Str = Str.drop_back(2).drop_front(4);
6375 
6376   uint8_t Attr;
6377   if (Str.getAsInteger(10, Attr)) {
6378     Error(S, "invalid or missing interpolation attribute number");
6379     return MatchOperand_ParseFail;
6380   }
6381 
6382   if (Attr > 63) {
6383     Error(S, "out of bounds interpolation attribute number");
6384     return MatchOperand_ParseFail;
6385   }
6386 
6387   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6388 
6389   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6390                                               AMDGPUOperand::ImmTyInterpAttr));
6391   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6392                                               AMDGPUOperand::ImmTyAttrChan));
6393   return MatchOperand_Success;
6394 }
6395 
6396 //===----------------------------------------------------------------------===//
6397 // exp
6398 //===----------------------------------------------------------------------===//
6399 
6400 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6401   using namespace llvm::AMDGPU::Exp;
6402 
6403   StringRef Str;
6404   SMLoc S = getLoc();
6405 
6406   if (!parseId(Str))
6407     return MatchOperand_NoMatch;
6408 
6409   unsigned Id = getTgtId(Str);
6410   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6411     Error(S, (Id == ET_INVALID) ?
6412                 "invalid exp target" :
6413                 "exp target is not supported on this GPU");
6414     return MatchOperand_ParseFail;
6415   }
6416 
6417   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6418                                               AMDGPUOperand::ImmTyExpTgt));
6419   return MatchOperand_Success;
6420 }
6421 
6422 //===----------------------------------------------------------------------===//
6423 // parser helpers
6424 //===----------------------------------------------------------------------===//
6425 
6426 bool
6427 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6428   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6429 }
6430 
6431 bool
6432 AMDGPUAsmParser::isId(const StringRef Id) const {
6433   return isId(getToken(), Id);
6434 }
6435 
6436 bool
6437 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6438   return getTokenKind() == Kind;
6439 }
6440 
6441 bool
6442 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6443   if (isId(Id)) {
6444     lex();
6445     return true;
6446   }
6447   return false;
6448 }
6449 
6450 bool
6451 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6452   if (isToken(AsmToken::Identifier)) {
6453     StringRef Tok = getTokenStr();
6454     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6455       lex();
6456       return true;
6457     }
6458   }
6459   return false;
6460 }
6461 
6462 bool
6463 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6464   if (isId(Id) && peekToken().is(Kind)) {
6465     lex();
6466     lex();
6467     return true;
6468   }
6469   return false;
6470 }
6471 
6472 bool
6473 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6474   if (isToken(Kind)) {
6475     lex();
6476     return true;
6477   }
6478   return false;
6479 }
6480 
6481 bool
6482 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6483                            const StringRef ErrMsg) {
6484   if (!trySkipToken(Kind)) {
6485     Error(getLoc(), ErrMsg);
6486     return false;
6487   }
6488   return true;
6489 }
6490 
6491 bool
6492 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6493   SMLoc S = getLoc();
6494 
6495   const MCExpr *Expr;
6496   if (Parser.parseExpression(Expr))
6497     return false;
6498 
6499   if (Expr->evaluateAsAbsolute(Imm))
6500     return true;
6501 
6502   if (Expected.empty()) {
6503     Error(S, "expected absolute expression");
6504   } else {
6505     Error(S, Twine("expected ", Expected) +
6506              Twine(" or an absolute expression"));
6507   }
6508   return false;
6509 }
6510 
6511 bool
6512 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6513   SMLoc S = getLoc();
6514 
6515   const MCExpr *Expr;
6516   if (Parser.parseExpression(Expr))
6517     return false;
6518 
6519   int64_t IntVal;
6520   if (Expr->evaluateAsAbsolute(IntVal)) {
6521     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6522   } else {
6523     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6524   }
6525   return true;
6526 }
6527 
6528 bool
6529 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6530   if (isToken(AsmToken::String)) {
6531     Val = getToken().getStringContents();
6532     lex();
6533     return true;
6534   } else {
6535     Error(getLoc(), ErrMsg);
6536     return false;
6537   }
6538 }
6539 
6540 bool
6541 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6542   if (isToken(AsmToken::Identifier)) {
6543     Val = getTokenStr();
6544     lex();
6545     return true;
6546   } else {
6547     if (!ErrMsg.empty())
6548       Error(getLoc(), ErrMsg);
6549     return false;
6550   }
6551 }
6552 
6553 AsmToken
6554 AMDGPUAsmParser::getToken() const {
6555   return Parser.getTok();
6556 }
6557 
6558 AsmToken
6559 AMDGPUAsmParser::peekToken() {
6560   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6561 }
6562 
6563 void
6564 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6565   auto TokCount = getLexer().peekTokens(Tokens);
6566 
6567   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6568     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6569 }
6570 
6571 AsmToken::TokenKind
6572 AMDGPUAsmParser::getTokenKind() const {
6573   return getLexer().getKind();
6574 }
6575 
6576 SMLoc
6577 AMDGPUAsmParser::getLoc() const {
6578   return getToken().getLoc();
6579 }
6580 
6581 StringRef
6582 AMDGPUAsmParser::getTokenStr() const {
6583   return getToken().getString();
6584 }
6585 
6586 void
6587 AMDGPUAsmParser::lex() {
6588   Parser.Lex();
6589 }
6590 
6591 SMLoc
6592 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6593                                const OperandVector &Operands) const {
6594   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6595     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6596     if (Test(Op))
6597       return Op.getStartLoc();
6598   }
6599   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6600 }
6601 
6602 SMLoc
6603 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6604                            const OperandVector &Operands) const {
6605   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6606   return getOperandLoc(Test, Operands);
6607 }
6608 
6609 SMLoc
6610 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6611                            const OperandVector &Operands) const {
6612   auto Test = [=](const AMDGPUOperand& Op) {
6613     return Op.isRegKind() && Op.getReg() == Reg;
6614   };
6615   return getOperandLoc(Test, Operands);
6616 }
6617 
6618 SMLoc
6619 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6620   auto Test = [](const AMDGPUOperand& Op) {
6621     return Op.IsImmKindLiteral() || Op.isExpr();
6622   };
6623   return getOperandLoc(Test, Operands);
6624 }
6625 
6626 SMLoc
6627 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6628   auto Test = [](const AMDGPUOperand& Op) {
6629     return Op.isImmKindConst();
6630   };
6631   return getOperandLoc(Test, Operands);
6632 }
6633 
6634 //===----------------------------------------------------------------------===//
6635 // swizzle
6636 //===----------------------------------------------------------------------===//
6637 
6638 LLVM_READNONE
6639 static unsigned
6640 encodeBitmaskPerm(const unsigned AndMask,
6641                   const unsigned OrMask,
6642                   const unsigned XorMask) {
6643   using namespace llvm::AMDGPU::Swizzle;
6644 
6645   return BITMASK_PERM_ENC |
6646          (AndMask << BITMASK_AND_SHIFT) |
6647          (OrMask  << BITMASK_OR_SHIFT)  |
6648          (XorMask << BITMASK_XOR_SHIFT);
6649 }
6650 
6651 bool
6652 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6653                                      const unsigned MinVal,
6654                                      const unsigned MaxVal,
6655                                      const StringRef ErrMsg,
6656                                      SMLoc &Loc) {
6657   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6658     return false;
6659   }
6660   Loc = getLoc();
6661   if (!parseExpr(Op)) {
6662     return false;
6663   }
6664   if (Op < MinVal || Op > MaxVal) {
6665     Error(Loc, ErrMsg);
6666     return false;
6667   }
6668 
6669   return true;
6670 }
6671 
6672 bool
6673 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6674                                       const unsigned MinVal,
6675                                       const unsigned MaxVal,
6676                                       const StringRef ErrMsg) {
6677   SMLoc Loc;
6678   for (unsigned i = 0; i < OpNum; ++i) {
6679     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6680       return false;
6681   }
6682 
6683   return true;
6684 }
6685 
6686 bool
6687 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6688   using namespace llvm::AMDGPU::Swizzle;
6689 
6690   int64_t Lane[LANE_NUM];
6691   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6692                            "expected a 2-bit lane id")) {
6693     Imm = QUAD_PERM_ENC;
6694     for (unsigned I = 0; I < LANE_NUM; ++I) {
6695       Imm |= Lane[I] << (LANE_SHIFT * I);
6696     }
6697     return true;
6698   }
6699   return false;
6700 }
6701 
6702 bool
6703 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6704   using namespace llvm::AMDGPU::Swizzle;
6705 
6706   SMLoc Loc;
6707   int64_t GroupSize;
6708   int64_t LaneIdx;
6709 
6710   if (!parseSwizzleOperand(GroupSize,
6711                            2, 32,
6712                            "group size must be in the interval [2,32]",
6713                            Loc)) {
6714     return false;
6715   }
6716   if (!isPowerOf2_64(GroupSize)) {
6717     Error(Loc, "group size must be a power of two");
6718     return false;
6719   }
6720   if (parseSwizzleOperand(LaneIdx,
6721                           0, GroupSize - 1,
6722                           "lane id must be in the interval [0,group size - 1]",
6723                           Loc)) {
6724     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6725     return true;
6726   }
6727   return false;
6728 }
6729 
6730 bool
6731 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6732   using namespace llvm::AMDGPU::Swizzle;
6733 
6734   SMLoc Loc;
6735   int64_t GroupSize;
6736 
6737   if (!parseSwizzleOperand(GroupSize,
6738                            2, 32,
6739                            "group size must be in the interval [2,32]",
6740                            Loc)) {
6741     return false;
6742   }
6743   if (!isPowerOf2_64(GroupSize)) {
6744     Error(Loc, "group size must be a power of two");
6745     return false;
6746   }
6747 
6748   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6749   return true;
6750 }
6751 
6752 bool
6753 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6754   using namespace llvm::AMDGPU::Swizzle;
6755 
6756   SMLoc Loc;
6757   int64_t GroupSize;
6758 
6759   if (!parseSwizzleOperand(GroupSize,
6760                            1, 16,
6761                            "group size must be in the interval [1,16]",
6762                            Loc)) {
6763     return false;
6764   }
6765   if (!isPowerOf2_64(GroupSize)) {
6766     Error(Loc, "group size must be a power of two");
6767     return false;
6768   }
6769 
6770   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6771   return true;
6772 }
6773 
6774 bool
6775 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6776   using namespace llvm::AMDGPU::Swizzle;
6777 
6778   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6779     return false;
6780   }
6781 
6782   StringRef Ctl;
6783   SMLoc StrLoc = getLoc();
6784   if (!parseString(Ctl)) {
6785     return false;
6786   }
6787   if (Ctl.size() != BITMASK_WIDTH) {
6788     Error(StrLoc, "expected a 5-character mask");
6789     return false;
6790   }
6791 
6792   unsigned AndMask = 0;
6793   unsigned OrMask = 0;
6794   unsigned XorMask = 0;
6795 
6796   for (size_t i = 0; i < Ctl.size(); ++i) {
6797     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6798     switch(Ctl[i]) {
6799     default:
6800       Error(StrLoc, "invalid mask");
6801       return false;
6802     case '0':
6803       break;
6804     case '1':
6805       OrMask |= Mask;
6806       break;
6807     case 'p':
6808       AndMask |= Mask;
6809       break;
6810     case 'i':
6811       AndMask |= Mask;
6812       XorMask |= Mask;
6813       break;
6814     }
6815   }
6816 
6817   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6818   return true;
6819 }
6820 
6821 bool
6822 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6823 
6824   SMLoc OffsetLoc = getLoc();
6825 
6826   if (!parseExpr(Imm, "a swizzle macro")) {
6827     return false;
6828   }
6829   if (!isUInt<16>(Imm)) {
6830     Error(OffsetLoc, "expected a 16-bit offset");
6831     return false;
6832   }
6833   return true;
6834 }
6835 
6836 bool
6837 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6838   using namespace llvm::AMDGPU::Swizzle;
6839 
6840   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6841 
6842     SMLoc ModeLoc = getLoc();
6843     bool Ok = false;
6844 
6845     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6846       Ok = parseSwizzleQuadPerm(Imm);
6847     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6848       Ok = parseSwizzleBitmaskPerm(Imm);
6849     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6850       Ok = parseSwizzleBroadcast(Imm);
6851     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6852       Ok = parseSwizzleSwap(Imm);
6853     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6854       Ok = parseSwizzleReverse(Imm);
6855     } else {
6856       Error(ModeLoc, "expected a swizzle mode");
6857     }
6858 
6859     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6860   }
6861 
6862   return false;
6863 }
6864 
6865 OperandMatchResultTy
6866 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6867   SMLoc S = getLoc();
6868   int64_t Imm = 0;
6869 
6870   if (trySkipId("offset")) {
6871 
6872     bool Ok = false;
6873     if (skipToken(AsmToken::Colon, "expected a colon")) {
6874       if (trySkipId("swizzle")) {
6875         Ok = parseSwizzleMacro(Imm);
6876       } else {
6877         Ok = parseSwizzleOffset(Imm);
6878       }
6879     }
6880 
6881     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6882 
6883     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6884   } else {
6885     // Swizzle "offset" operand is optional.
6886     // If it is omitted, try parsing other optional operands.
6887     return parseOptionalOpr(Operands);
6888   }
6889 }
6890 
6891 bool
6892 AMDGPUOperand::isSwizzle() const {
6893   return isImmTy(ImmTySwizzle);
6894 }
6895 
6896 //===----------------------------------------------------------------------===//
6897 // VGPR Index Mode
6898 //===----------------------------------------------------------------------===//
6899 
6900 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6901 
6902   using namespace llvm::AMDGPU::VGPRIndexMode;
6903 
6904   if (trySkipToken(AsmToken::RParen)) {
6905     return OFF;
6906   }
6907 
6908   int64_t Imm = 0;
6909 
6910   while (true) {
6911     unsigned Mode = 0;
6912     SMLoc S = getLoc();
6913 
6914     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6915       if (trySkipId(IdSymbolic[ModeId])) {
6916         Mode = 1 << ModeId;
6917         break;
6918       }
6919     }
6920 
6921     if (Mode == 0) {
6922       Error(S, (Imm == 0)?
6923                "expected a VGPR index mode or a closing parenthesis" :
6924                "expected a VGPR index mode");
6925       return UNDEF;
6926     }
6927 
6928     if (Imm & Mode) {
6929       Error(S, "duplicate VGPR index mode");
6930       return UNDEF;
6931     }
6932     Imm |= Mode;
6933 
6934     if (trySkipToken(AsmToken::RParen))
6935       break;
6936     if (!skipToken(AsmToken::Comma,
6937                    "expected a comma or a closing parenthesis"))
6938       return UNDEF;
6939   }
6940 
6941   return Imm;
6942 }
6943 
6944 OperandMatchResultTy
6945 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6946 
6947   using namespace llvm::AMDGPU::VGPRIndexMode;
6948 
6949   int64_t Imm = 0;
6950   SMLoc S = getLoc();
6951 
6952   if (trySkipId("gpr_idx", AsmToken::LParen)) {
6953     Imm = parseGPRIdxMacro();
6954     if (Imm == UNDEF)
6955       return MatchOperand_ParseFail;
6956   } else {
6957     if (getParser().parseAbsoluteExpression(Imm))
6958       return MatchOperand_ParseFail;
6959     if (Imm < 0 || !isUInt<4>(Imm)) {
6960       Error(S, "invalid immediate: only 4-bit values are legal");
6961       return MatchOperand_ParseFail;
6962     }
6963   }
6964 
6965   Operands.push_back(
6966       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6967   return MatchOperand_Success;
6968 }
6969 
6970 bool AMDGPUOperand::isGPRIdxMode() const {
6971   return isImmTy(ImmTyGprIdxMode);
6972 }
6973 
6974 //===----------------------------------------------------------------------===//
6975 // sopp branch targets
6976 //===----------------------------------------------------------------------===//
6977 
6978 OperandMatchResultTy
6979 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6980 
6981   // Make sure we are not parsing something
6982   // that looks like a label or an expression but is not.
6983   // This will improve error messages.
6984   if (isRegister() || isModifier())
6985     return MatchOperand_NoMatch;
6986 
6987   if (!parseExpr(Operands))
6988     return MatchOperand_ParseFail;
6989 
6990   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6991   assert(Opr.isImm() || Opr.isExpr());
6992   SMLoc Loc = Opr.getStartLoc();
6993 
6994   // Currently we do not support arbitrary expressions as branch targets.
6995   // Only labels and absolute expressions are accepted.
6996   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6997     Error(Loc, "expected an absolute expression or a label");
6998   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6999     Error(Loc, "expected a 16-bit signed jump offset");
7000   }
7001 
7002   return MatchOperand_Success;
7003 }
7004 
7005 //===----------------------------------------------------------------------===//
7006 // Boolean holding registers
7007 //===----------------------------------------------------------------------===//
7008 
7009 OperandMatchResultTy
7010 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7011   return parseReg(Operands);
7012 }
7013 
7014 //===----------------------------------------------------------------------===//
7015 // mubuf
7016 //===----------------------------------------------------------------------===//
7017 
7018 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7019   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7020 }
7021 
7022 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7023                                    const OperandVector &Operands,
7024                                    bool IsAtomic,
7025                                    bool IsLds) {
7026   bool IsLdsOpcode = IsLds;
7027   bool HasLdsModifier = false;
7028   OptionalImmIndexMap OptionalIdx;
7029   unsigned FirstOperandIdx = 1;
7030   bool IsAtomicReturn = false;
7031 
7032   if (IsAtomic) {
7033     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7034       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7035       if (!Op.isCPol())
7036         continue;
7037       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7038       break;
7039     }
7040 
7041     if (!IsAtomicReturn) {
7042       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7043       if (NewOpc != -1)
7044         Inst.setOpcode(NewOpc);
7045     }
7046 
7047     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7048                       SIInstrFlags::IsAtomicRet;
7049   }
7050 
7051   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7052     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7053 
7054     // Add the register arguments
7055     if (Op.isReg()) {
7056       Op.addRegOperands(Inst, 1);
7057       // Insert a tied src for atomic return dst.
7058       // This cannot be postponed as subsequent calls to
7059       // addImmOperands rely on correct number of MC operands.
7060       if (IsAtomicReturn && i == FirstOperandIdx)
7061         Op.addRegOperands(Inst, 1);
7062       continue;
7063     }
7064 
7065     // Handle the case where soffset is an immediate
7066     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7067       Op.addImmOperands(Inst, 1);
7068       continue;
7069     }
7070 
7071     HasLdsModifier |= Op.isLDS();
7072 
7073     // Handle tokens like 'offen' which are sometimes hard-coded into the
7074     // asm string.  There are no MCInst operands for these.
7075     if (Op.isToken()) {
7076       continue;
7077     }
7078     assert(Op.isImm());
7079 
7080     // Handle optional arguments
7081     OptionalIdx[Op.getImmTy()] = i;
7082   }
7083 
7084   // This is a workaround for an llvm quirk which may result in an
7085   // incorrect instruction selection. Lds and non-lds versions of
7086   // MUBUF instructions are identical except that lds versions
7087   // have mandatory 'lds' modifier. However this modifier follows
7088   // optional modifiers and llvm asm matcher regards this 'lds'
7089   // modifier as an optional one. As a result, an lds version
7090   // of opcode may be selected even if it has no 'lds' modifier.
7091   if (IsLdsOpcode && !HasLdsModifier) {
7092     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7093     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7094       Inst.setOpcode(NoLdsOpcode);
7095       IsLdsOpcode = false;
7096     }
7097   }
7098 
7099   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7100   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7101 
7102   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7103     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7104   }
7105   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7106 }
7107 
7108 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7109   OptionalImmIndexMap OptionalIdx;
7110 
7111   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7112     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7113 
7114     // Add the register arguments
7115     if (Op.isReg()) {
7116       Op.addRegOperands(Inst, 1);
7117       continue;
7118     }
7119 
7120     // Handle the case where soffset is an immediate
7121     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7122       Op.addImmOperands(Inst, 1);
7123       continue;
7124     }
7125 
7126     // Handle tokens like 'offen' which are sometimes hard-coded into the
7127     // asm string.  There are no MCInst operands for these.
7128     if (Op.isToken()) {
7129       continue;
7130     }
7131     assert(Op.isImm());
7132 
7133     // Handle optional arguments
7134     OptionalIdx[Op.getImmTy()] = i;
7135   }
7136 
7137   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7138                         AMDGPUOperand::ImmTyOffset);
7139   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7140   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7141   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7142   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7143 }
7144 
7145 //===----------------------------------------------------------------------===//
7146 // mimg
7147 //===----------------------------------------------------------------------===//
7148 
7149 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7150                               bool IsAtomic) {
7151   unsigned I = 1;
7152   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7153   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7154     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7155   }
7156 
7157   if (IsAtomic) {
7158     // Add src, same as dst
7159     assert(Desc.getNumDefs() == 1);
7160     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7161   }
7162 
7163   OptionalImmIndexMap OptionalIdx;
7164 
7165   for (unsigned E = Operands.size(); I != E; ++I) {
7166     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7167 
7168     // Add the register arguments
7169     if (Op.isReg()) {
7170       Op.addRegOperands(Inst, 1);
7171     } else if (Op.isImmModifier()) {
7172       OptionalIdx[Op.getImmTy()] = I;
7173     } else if (!Op.isToken()) {
7174       llvm_unreachable("unexpected operand type");
7175     }
7176   }
7177 
7178   bool IsGFX10Plus = isGFX10Plus();
7179 
7180   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7181   if (IsGFX10Plus)
7182     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7183   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7184   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7185   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7186   if (IsGFX10Plus)
7187     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7188   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7189     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7190   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7191   if (!IsGFX10Plus)
7192     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7193   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7194 }
7195 
7196 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7197   cvtMIMG(Inst, Operands, true);
7198 }
7199 
7200 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7201   OptionalImmIndexMap OptionalIdx;
7202   bool IsAtomicReturn = false;
7203 
7204   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7205     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7206     if (!Op.isCPol())
7207       continue;
7208     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7209     break;
7210   }
7211 
7212   if (!IsAtomicReturn) {
7213     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7214     if (NewOpc != -1)
7215       Inst.setOpcode(NewOpc);
7216   }
7217 
7218   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7219                     SIInstrFlags::IsAtomicRet;
7220 
7221   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7222     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7223 
7224     // Add the register arguments
7225     if (Op.isReg()) {
7226       Op.addRegOperands(Inst, 1);
7227       if (IsAtomicReturn && i == 1)
7228         Op.addRegOperands(Inst, 1);
7229       continue;
7230     }
7231 
7232     // Handle the case where soffset is an immediate
7233     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7234       Op.addImmOperands(Inst, 1);
7235       continue;
7236     }
7237 
7238     // Handle tokens like 'offen' which are sometimes hard-coded into the
7239     // asm string.  There are no MCInst operands for these.
7240     if (Op.isToken()) {
7241       continue;
7242     }
7243     assert(Op.isImm());
7244 
7245     // Handle optional arguments
7246     OptionalIdx[Op.getImmTy()] = i;
7247   }
7248 
7249   if ((int)Inst.getNumOperands() <=
7250       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7251     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7252   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7253 }
7254 
7255 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7256                                       const OperandVector &Operands) {
7257   for (unsigned I = 1; I < Operands.size(); ++I) {
7258     auto &Operand = (AMDGPUOperand &)*Operands[I];
7259     if (Operand.isReg())
7260       Operand.addRegOperands(Inst, 1);
7261   }
7262 
7263   Inst.addOperand(MCOperand::createImm(1)); // a16
7264 }
7265 
7266 //===----------------------------------------------------------------------===//
7267 // smrd
7268 //===----------------------------------------------------------------------===//
7269 
7270 bool AMDGPUOperand::isSMRDOffset8() const {
7271   return isImm() && isUInt<8>(getImm());
7272 }
7273 
7274 bool AMDGPUOperand::isSMEMOffset() const {
7275   return isImm(); // Offset range is checked later by validator.
7276 }
7277 
7278 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7279   // 32-bit literals are only supported on CI and we only want to use them
7280   // when the offset is > 8-bits.
7281   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7282 }
7283 
7284 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7285   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7286 }
7287 
7288 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7289   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7290 }
7291 
7292 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7293   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7294 }
7295 
7296 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7297   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7298 }
7299 
7300 //===----------------------------------------------------------------------===//
7301 // vop3
7302 //===----------------------------------------------------------------------===//
7303 
7304 static bool ConvertOmodMul(int64_t &Mul) {
7305   if (Mul != 1 && Mul != 2 && Mul != 4)
7306     return false;
7307 
7308   Mul >>= 1;
7309   return true;
7310 }
7311 
7312 static bool ConvertOmodDiv(int64_t &Div) {
7313   if (Div == 1) {
7314     Div = 0;
7315     return true;
7316   }
7317 
7318   if (Div == 2) {
7319     Div = 3;
7320     return true;
7321   }
7322 
7323   return false;
7324 }
7325 
7326 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7327 // This is intentional and ensures compatibility with sp3.
7328 // See bug 35397 for details.
7329 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7330   if (BoundCtrl == 0 || BoundCtrl == 1) {
7331     BoundCtrl = 1;
7332     return true;
7333   }
7334   return false;
7335 }
7336 
7337 // Note: the order in this table matches the order of operands in AsmString.
7338 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7339   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7340   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7341   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7342   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7343   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7344   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7345   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7346   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7347   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7348   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7349   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7350   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7351   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7352   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7353   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7354   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7355   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7356   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7357   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7358   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7359   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7360   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7361   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7362   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7363   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7364   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7365   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7366   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7367   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7368   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7369   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7370   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7371   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7372   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7373   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7374   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7375   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7376   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7377   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7378   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7379   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7380 };
7381 
7382 void AMDGPUAsmParser::onBeginOfFile() {
7383   if (!getParser().getStreamer().getTargetStreamer() ||
7384       getSTI().getTargetTriple().getArch() == Triple::r600)
7385     return;
7386 
7387   if (!getTargetStreamer().getTargetID())
7388     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7389 
7390   if (isHsaAbiVersion3Or4(&getSTI()))
7391     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7392 }
7393 
7394 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7395 
7396   OperandMatchResultTy res = parseOptionalOpr(Operands);
7397 
7398   // This is a hack to enable hardcoded mandatory operands which follow
7399   // optional operands.
7400   //
7401   // Current design assumes that all operands after the first optional operand
7402   // are also optional. However implementation of some instructions violates
7403   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7404   //
7405   // To alleviate this problem, we have to (implicitly) parse extra operands
7406   // to make sure autogenerated parser of custom operands never hit hardcoded
7407   // mandatory operands.
7408 
7409   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7410     if (res != MatchOperand_Success ||
7411         isToken(AsmToken::EndOfStatement))
7412       break;
7413 
7414     trySkipToken(AsmToken::Comma);
7415     res = parseOptionalOpr(Operands);
7416   }
7417 
7418   return res;
7419 }
7420 
7421 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7422   OperandMatchResultTy res;
7423   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7424     // try to parse any optional operand here
7425     if (Op.IsBit) {
7426       res = parseNamedBit(Op.Name, Operands, Op.Type);
7427     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7428       res = parseOModOperand(Operands);
7429     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7430                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7431                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7432       res = parseSDWASel(Operands, Op.Name, Op.Type);
7433     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7434       res = parseSDWADstUnused(Operands);
7435     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7436                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7437                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7438                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7439       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7440                                         Op.ConvertResult);
7441     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7442       res = parseDim(Operands);
7443     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7444       res = parseCPol(Operands);
7445     } else {
7446       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7447     }
7448     if (res != MatchOperand_NoMatch) {
7449       return res;
7450     }
7451   }
7452   return MatchOperand_NoMatch;
7453 }
7454 
7455 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7456   StringRef Name = getTokenStr();
7457   if (Name == "mul") {
7458     return parseIntWithPrefix("mul", Operands,
7459                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7460   }
7461 
7462   if (Name == "div") {
7463     return parseIntWithPrefix("div", Operands,
7464                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7465   }
7466 
7467   return MatchOperand_NoMatch;
7468 }
7469 
7470 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7471   cvtVOP3P(Inst, Operands);
7472 
7473   int Opc = Inst.getOpcode();
7474 
7475   int SrcNum;
7476   const int Ops[] = { AMDGPU::OpName::src0,
7477                       AMDGPU::OpName::src1,
7478                       AMDGPU::OpName::src2 };
7479   for (SrcNum = 0;
7480        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7481        ++SrcNum);
7482   assert(SrcNum > 0);
7483 
7484   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7485   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7486 
7487   if ((OpSel & (1 << SrcNum)) != 0) {
7488     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7489     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7490     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7491   }
7492 }
7493 
7494 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7495       // 1. This operand is input modifiers
7496   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7497       // 2. This is not last operand
7498       && Desc.NumOperands > (OpNum + 1)
7499       // 3. Next operand is register class
7500       && Desc.OpInfo[OpNum + 1].RegClass != -1
7501       // 4. Next register is not tied to any other operand
7502       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7503 }
7504 
7505 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7506 {
7507   OptionalImmIndexMap OptionalIdx;
7508   unsigned Opc = Inst.getOpcode();
7509 
7510   unsigned I = 1;
7511   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7512   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7513     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7514   }
7515 
7516   for (unsigned E = Operands.size(); I != E; ++I) {
7517     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7518     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7519       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7520     } else if (Op.isInterpSlot() ||
7521                Op.isInterpAttr() ||
7522                Op.isAttrChan()) {
7523       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7524     } else if (Op.isImmModifier()) {
7525       OptionalIdx[Op.getImmTy()] = I;
7526     } else {
7527       llvm_unreachable("unhandled operand type");
7528     }
7529   }
7530 
7531   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7532     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7533   }
7534 
7535   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7536     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7537   }
7538 
7539   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7540     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7541   }
7542 }
7543 
7544 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7545                               OptionalImmIndexMap &OptionalIdx) {
7546   unsigned Opc = Inst.getOpcode();
7547 
7548   unsigned I = 1;
7549   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7550   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7551     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7552   }
7553 
7554   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7555     // This instruction has src modifiers
7556     for (unsigned E = Operands.size(); I != E; ++I) {
7557       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7558       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7559         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7560       } else if (Op.isImmModifier()) {
7561         OptionalIdx[Op.getImmTy()] = I;
7562       } else if (Op.isRegOrImm()) {
7563         Op.addRegOrImmOperands(Inst, 1);
7564       } else {
7565         llvm_unreachable("unhandled operand type");
7566       }
7567     }
7568   } else {
7569     // No src modifiers
7570     for (unsigned E = Operands.size(); I != E; ++I) {
7571       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7572       if (Op.isMod()) {
7573         OptionalIdx[Op.getImmTy()] = I;
7574       } else {
7575         Op.addRegOrImmOperands(Inst, 1);
7576       }
7577     }
7578   }
7579 
7580   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7581     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7582   }
7583 
7584   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7585     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7586   }
7587 
7588   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7589   // it has src2 register operand that is tied to dst operand
7590   // we don't allow modifiers for this operand in assembler so src2_modifiers
7591   // should be 0.
7592   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7593       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7594       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7595       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7596       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7597       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7598       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7599       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7600       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7601       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7602       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7603     auto it = Inst.begin();
7604     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7605     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7606     ++it;
7607     // Copy the operand to ensure it's not invalidated when Inst grows.
7608     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7609   }
7610 }
7611 
7612 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7613   OptionalImmIndexMap OptionalIdx;
7614   cvtVOP3(Inst, Operands, OptionalIdx);
7615 }
7616 
7617 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7618                                OptionalImmIndexMap &OptIdx) {
7619   const int Opc = Inst.getOpcode();
7620   const MCInstrDesc &Desc = MII.get(Opc);
7621 
7622   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7623 
7624   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7625     assert(!IsPacked);
7626     Inst.addOperand(Inst.getOperand(0));
7627   }
7628 
7629   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7630   // instruction, and then figure out where to actually put the modifiers
7631 
7632   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7633   if (OpSelIdx != -1) {
7634     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7635   }
7636 
7637   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7638   if (OpSelHiIdx != -1) {
7639     int DefaultVal = IsPacked ? -1 : 0;
7640     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7641                           DefaultVal);
7642   }
7643 
7644   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7645   if (NegLoIdx != -1) {
7646     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7647     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7648   }
7649 
7650   const int Ops[] = { AMDGPU::OpName::src0,
7651                       AMDGPU::OpName::src1,
7652                       AMDGPU::OpName::src2 };
7653   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7654                          AMDGPU::OpName::src1_modifiers,
7655                          AMDGPU::OpName::src2_modifiers };
7656 
7657   unsigned OpSel = 0;
7658   unsigned OpSelHi = 0;
7659   unsigned NegLo = 0;
7660   unsigned NegHi = 0;
7661 
7662   if (OpSelIdx != -1)
7663     OpSel = Inst.getOperand(OpSelIdx).getImm();
7664 
7665   if (OpSelHiIdx != -1)
7666     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7667 
7668   if (NegLoIdx != -1) {
7669     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7670     NegLo = Inst.getOperand(NegLoIdx).getImm();
7671     NegHi = Inst.getOperand(NegHiIdx).getImm();
7672   }
7673 
7674   for (int J = 0; J < 3; ++J) {
7675     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7676     if (OpIdx == -1)
7677       break;
7678 
7679     uint32_t ModVal = 0;
7680 
7681     if ((OpSel & (1 << J)) != 0)
7682       ModVal |= SISrcMods::OP_SEL_0;
7683 
7684     if ((OpSelHi & (1 << J)) != 0)
7685       ModVal |= SISrcMods::OP_SEL_1;
7686 
7687     if ((NegLo & (1 << J)) != 0)
7688       ModVal |= SISrcMods::NEG;
7689 
7690     if ((NegHi & (1 << J)) != 0)
7691       ModVal |= SISrcMods::NEG_HI;
7692 
7693     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7694 
7695     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7696   }
7697 }
7698 
7699 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7700   OptionalImmIndexMap OptIdx;
7701   cvtVOP3(Inst, Operands, OptIdx);
7702   cvtVOP3P(Inst, Operands, OptIdx);
7703 }
7704 
7705 //===----------------------------------------------------------------------===//
7706 // dpp
7707 //===----------------------------------------------------------------------===//
7708 
7709 bool AMDGPUOperand::isDPP8() const {
7710   return isImmTy(ImmTyDPP8);
7711 }
7712 
7713 bool AMDGPUOperand::isDPPCtrl() const {
7714   using namespace AMDGPU::DPP;
7715 
7716   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7717   if (result) {
7718     int64_t Imm = getImm();
7719     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7720            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7721            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7722            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7723            (Imm == DppCtrl::WAVE_SHL1) ||
7724            (Imm == DppCtrl::WAVE_ROL1) ||
7725            (Imm == DppCtrl::WAVE_SHR1) ||
7726            (Imm == DppCtrl::WAVE_ROR1) ||
7727            (Imm == DppCtrl::ROW_MIRROR) ||
7728            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7729            (Imm == DppCtrl::BCAST15) ||
7730            (Imm == DppCtrl::BCAST31) ||
7731            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7732            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7733   }
7734   return false;
7735 }
7736 
7737 //===----------------------------------------------------------------------===//
7738 // mAI
7739 //===----------------------------------------------------------------------===//
7740 
7741 bool AMDGPUOperand::isBLGP() const {
7742   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7743 }
7744 
7745 bool AMDGPUOperand::isCBSZ() const {
7746   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7747 }
7748 
7749 bool AMDGPUOperand::isABID() const {
7750   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7751 }
7752 
7753 bool AMDGPUOperand::isS16Imm() const {
7754   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7755 }
7756 
7757 bool AMDGPUOperand::isU16Imm() const {
7758   return isImm() && isUInt<16>(getImm());
7759 }
7760 
7761 //===----------------------------------------------------------------------===//
7762 // dim
7763 //===----------------------------------------------------------------------===//
7764 
7765 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7766   // We want to allow "dim:1D" etc.,
7767   // but the initial 1 is tokenized as an integer.
7768   std::string Token;
7769   if (isToken(AsmToken::Integer)) {
7770     SMLoc Loc = getToken().getEndLoc();
7771     Token = std::string(getTokenStr());
7772     lex();
7773     if (getLoc() != Loc)
7774       return false;
7775   }
7776 
7777   StringRef Suffix;
7778   if (!parseId(Suffix))
7779     return false;
7780   Token += Suffix;
7781 
7782   StringRef DimId = Token;
7783   if (DimId.startswith("SQ_RSRC_IMG_"))
7784     DimId = DimId.drop_front(12);
7785 
7786   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7787   if (!DimInfo)
7788     return false;
7789 
7790   Encoding = DimInfo->Encoding;
7791   return true;
7792 }
7793 
7794 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7795   if (!isGFX10Plus())
7796     return MatchOperand_NoMatch;
7797 
7798   SMLoc S = getLoc();
7799 
7800   if (!trySkipId("dim", AsmToken::Colon))
7801     return MatchOperand_NoMatch;
7802 
7803   unsigned Encoding;
7804   SMLoc Loc = getLoc();
7805   if (!parseDimId(Encoding)) {
7806     Error(Loc, "invalid dim value");
7807     return MatchOperand_ParseFail;
7808   }
7809 
7810   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7811                                               AMDGPUOperand::ImmTyDim));
7812   return MatchOperand_Success;
7813 }
7814 
7815 //===----------------------------------------------------------------------===//
7816 // dpp
7817 //===----------------------------------------------------------------------===//
7818 
7819 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7820   SMLoc S = getLoc();
7821 
7822   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7823     return MatchOperand_NoMatch;
7824 
7825   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7826 
7827   int64_t Sels[8];
7828 
7829   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7830     return MatchOperand_ParseFail;
7831 
7832   for (size_t i = 0; i < 8; ++i) {
7833     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7834       return MatchOperand_ParseFail;
7835 
7836     SMLoc Loc = getLoc();
7837     if (getParser().parseAbsoluteExpression(Sels[i]))
7838       return MatchOperand_ParseFail;
7839     if (0 > Sels[i] || 7 < Sels[i]) {
7840       Error(Loc, "expected a 3-bit value");
7841       return MatchOperand_ParseFail;
7842     }
7843   }
7844 
7845   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7846     return MatchOperand_ParseFail;
7847 
7848   unsigned DPP8 = 0;
7849   for (size_t i = 0; i < 8; ++i)
7850     DPP8 |= (Sels[i] << (i * 3));
7851 
7852   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7853   return MatchOperand_Success;
7854 }
7855 
7856 bool
7857 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7858                                     const OperandVector &Operands) {
7859   if (Ctrl == "row_newbcast")
7860     return isGFX90A();
7861 
7862   if (Ctrl == "row_share" ||
7863       Ctrl == "row_xmask")
7864     return isGFX10Plus();
7865 
7866   if (Ctrl == "wave_shl" ||
7867       Ctrl == "wave_shr" ||
7868       Ctrl == "wave_rol" ||
7869       Ctrl == "wave_ror" ||
7870       Ctrl == "row_bcast")
7871     return isVI() || isGFX9();
7872 
7873   return Ctrl == "row_mirror" ||
7874          Ctrl == "row_half_mirror" ||
7875          Ctrl == "quad_perm" ||
7876          Ctrl == "row_shl" ||
7877          Ctrl == "row_shr" ||
7878          Ctrl == "row_ror";
7879 }
7880 
7881 int64_t
7882 AMDGPUAsmParser::parseDPPCtrlPerm() {
7883   // quad_perm:[%d,%d,%d,%d]
7884 
7885   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7886     return -1;
7887 
7888   int64_t Val = 0;
7889   for (int i = 0; i < 4; ++i) {
7890     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7891       return -1;
7892 
7893     int64_t Temp;
7894     SMLoc Loc = getLoc();
7895     if (getParser().parseAbsoluteExpression(Temp))
7896       return -1;
7897     if (Temp < 0 || Temp > 3) {
7898       Error(Loc, "expected a 2-bit value");
7899       return -1;
7900     }
7901 
7902     Val += (Temp << i * 2);
7903   }
7904 
7905   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7906     return -1;
7907 
7908   return Val;
7909 }
7910 
7911 int64_t
7912 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7913   using namespace AMDGPU::DPP;
7914 
7915   // sel:%d
7916 
7917   int64_t Val;
7918   SMLoc Loc = getLoc();
7919 
7920   if (getParser().parseAbsoluteExpression(Val))
7921     return -1;
7922 
7923   struct DppCtrlCheck {
7924     int64_t Ctrl;
7925     int Lo;
7926     int Hi;
7927   };
7928 
7929   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7930     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7931     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7932     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7933     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7934     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7935     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7936     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7937     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7938     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7939     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7940     .Default({-1, 0, 0});
7941 
7942   bool Valid;
7943   if (Check.Ctrl == -1) {
7944     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7945     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7946   } else {
7947     Valid = Check.Lo <= Val && Val <= Check.Hi;
7948     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7949   }
7950 
7951   if (!Valid) {
7952     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7953     return -1;
7954   }
7955 
7956   return Val;
7957 }
7958 
7959 OperandMatchResultTy
7960 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7961   using namespace AMDGPU::DPP;
7962 
7963   if (!isToken(AsmToken::Identifier) ||
7964       !isSupportedDPPCtrl(getTokenStr(), Operands))
7965     return MatchOperand_NoMatch;
7966 
7967   SMLoc S = getLoc();
7968   int64_t Val = -1;
7969   StringRef Ctrl;
7970 
7971   parseId(Ctrl);
7972 
7973   if (Ctrl == "row_mirror") {
7974     Val = DppCtrl::ROW_MIRROR;
7975   } else if (Ctrl == "row_half_mirror") {
7976     Val = DppCtrl::ROW_HALF_MIRROR;
7977   } else {
7978     if (skipToken(AsmToken::Colon, "expected a colon")) {
7979       if (Ctrl == "quad_perm") {
7980         Val = parseDPPCtrlPerm();
7981       } else {
7982         Val = parseDPPCtrlSel(Ctrl);
7983       }
7984     }
7985   }
7986 
7987   if (Val == -1)
7988     return MatchOperand_ParseFail;
7989 
7990   Operands.push_back(
7991     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7992   return MatchOperand_Success;
7993 }
7994 
7995 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7996   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7997 }
7998 
7999 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8000   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8001 }
8002 
8003 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8004   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8005 }
8006 
8007 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8008   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8009 }
8010 
8011 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8012   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8013 }
8014 
8015 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8016   OptionalImmIndexMap OptionalIdx;
8017 
8018   unsigned Opc = Inst.getOpcode();
8019   bool HasModifiers =
8020       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8021   unsigned I = 1;
8022   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8023   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8024     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8025   }
8026 
8027   int Fi = 0;
8028   for (unsigned E = Operands.size(); I != E; ++I) {
8029     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8030                                             MCOI::TIED_TO);
8031     if (TiedTo != -1) {
8032       assert((unsigned)TiedTo < Inst.getNumOperands());
8033       // handle tied old or src2 for MAC instructions
8034       Inst.addOperand(Inst.getOperand(TiedTo));
8035     }
8036     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8037     // Add the register arguments
8038     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8039       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8040       // Skip it.
8041       continue;
8042     }
8043 
8044     if (IsDPP8) {
8045       if (Op.isDPP8()) {
8046         Op.addImmOperands(Inst, 1);
8047       } else if (HasModifiers &&
8048                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8049         Op.addRegWithFPInputModsOperands(Inst, 2);
8050       } else if (Op.isFI()) {
8051         Fi = Op.getImm();
8052       } else if (Op.isReg()) {
8053         Op.addRegOperands(Inst, 1);
8054       } else {
8055         llvm_unreachable("Invalid operand type");
8056       }
8057     } else {
8058       if (HasModifiers &&
8059           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8060         Op.addRegWithFPInputModsOperands(Inst, 2);
8061       } else if (Op.isReg()) {
8062         Op.addRegOperands(Inst, 1);
8063       } else if (Op.isDPPCtrl()) {
8064         Op.addImmOperands(Inst, 1);
8065       } else if (Op.isImm()) {
8066         // Handle optional arguments
8067         OptionalIdx[Op.getImmTy()] = I;
8068       } else {
8069         llvm_unreachable("Invalid operand type");
8070       }
8071     }
8072   }
8073 
8074   if (IsDPP8) {
8075     using namespace llvm::AMDGPU::DPP;
8076     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8077   } else {
8078     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8079     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8080     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8081     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8082       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8083     }
8084   }
8085 }
8086 
8087 //===----------------------------------------------------------------------===//
8088 // sdwa
8089 //===----------------------------------------------------------------------===//
8090 
8091 OperandMatchResultTy
8092 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8093                               AMDGPUOperand::ImmTy Type) {
8094   using namespace llvm::AMDGPU::SDWA;
8095 
8096   SMLoc S = getLoc();
8097   StringRef Value;
8098   OperandMatchResultTy res;
8099 
8100   SMLoc StringLoc;
8101   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8102   if (res != MatchOperand_Success) {
8103     return res;
8104   }
8105 
8106   int64_t Int;
8107   Int = StringSwitch<int64_t>(Value)
8108         .Case("BYTE_0", SdwaSel::BYTE_0)
8109         .Case("BYTE_1", SdwaSel::BYTE_1)
8110         .Case("BYTE_2", SdwaSel::BYTE_2)
8111         .Case("BYTE_3", SdwaSel::BYTE_3)
8112         .Case("WORD_0", SdwaSel::WORD_0)
8113         .Case("WORD_1", SdwaSel::WORD_1)
8114         .Case("DWORD", SdwaSel::DWORD)
8115         .Default(0xffffffff);
8116 
8117   if (Int == 0xffffffff) {
8118     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8119     return MatchOperand_ParseFail;
8120   }
8121 
8122   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8123   return MatchOperand_Success;
8124 }
8125 
8126 OperandMatchResultTy
8127 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8128   using namespace llvm::AMDGPU::SDWA;
8129 
8130   SMLoc S = getLoc();
8131   StringRef Value;
8132   OperandMatchResultTy res;
8133 
8134   SMLoc StringLoc;
8135   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8136   if (res != MatchOperand_Success) {
8137     return res;
8138   }
8139 
8140   int64_t Int;
8141   Int = StringSwitch<int64_t>(Value)
8142         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8143         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8144         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8145         .Default(0xffffffff);
8146 
8147   if (Int == 0xffffffff) {
8148     Error(StringLoc, "invalid dst_unused value");
8149     return MatchOperand_ParseFail;
8150   }
8151 
8152   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8153   return MatchOperand_Success;
8154 }
8155 
8156 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8157   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8158 }
8159 
8160 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8161   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8162 }
8163 
8164 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8165   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8166 }
8167 
8168 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8169   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8170 }
8171 
8172 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8173   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8174 }
8175 
8176 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8177                               uint64_t BasicInstType,
8178                               bool SkipDstVcc,
8179                               bool SkipSrcVcc) {
8180   using namespace llvm::AMDGPU::SDWA;
8181 
8182   OptionalImmIndexMap OptionalIdx;
8183   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8184   bool SkippedVcc = false;
8185 
8186   unsigned I = 1;
8187   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8188   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8189     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8190   }
8191 
8192   for (unsigned E = Operands.size(); I != E; ++I) {
8193     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8194     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8195         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8196       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8197       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8198       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8199       // Skip VCC only if we didn't skip it on previous iteration.
8200       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8201       if (BasicInstType == SIInstrFlags::VOP2 &&
8202           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8203            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8204         SkippedVcc = true;
8205         continue;
8206       } else if (BasicInstType == SIInstrFlags::VOPC &&
8207                  Inst.getNumOperands() == 0) {
8208         SkippedVcc = true;
8209         continue;
8210       }
8211     }
8212     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8213       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8214     } else if (Op.isImm()) {
8215       // Handle optional arguments
8216       OptionalIdx[Op.getImmTy()] = I;
8217     } else {
8218       llvm_unreachable("Invalid operand type");
8219     }
8220     SkippedVcc = false;
8221   }
8222 
8223   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8224       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8225       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8226     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8227     switch (BasicInstType) {
8228     case SIInstrFlags::VOP1:
8229       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8230       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8231         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8232       }
8233       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8234       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8235       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8236       break;
8237 
8238     case SIInstrFlags::VOP2:
8239       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8240       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8241         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8242       }
8243       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8244       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8245       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8246       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8247       break;
8248 
8249     case SIInstrFlags::VOPC:
8250       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8251         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8252       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8253       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8254       break;
8255 
8256     default:
8257       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8258     }
8259   }
8260 
8261   // special case v_mac_{f16, f32}:
8262   // it has src2 register operand that is tied to dst operand
8263   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8264       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8265     auto it = Inst.begin();
8266     std::advance(
8267       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8268     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8269   }
8270 }
8271 
8272 //===----------------------------------------------------------------------===//
8273 // mAI
8274 //===----------------------------------------------------------------------===//
8275 
8276 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8277   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8278 }
8279 
8280 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8281   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8282 }
8283 
8284 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8285   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8286 }
8287 
8288 /// Force static initialization.
8289 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8290   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8291   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8292 }
8293 
8294 #define GET_REGISTER_MATCHER
8295 #define GET_MATCHER_IMPLEMENTATION
8296 #define GET_MNEMONIC_SPELL_CHECKER
8297 #define GET_MNEMONIC_CHECKER
8298 #include "AMDGPUGenAsmMatcher.inc"
8299 
8300 // This fuction should be defined after auto-generated include so that we have
8301 // MatchClassKind enum defined
8302 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8303                                                      unsigned Kind) {
8304   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8305   // But MatchInstructionImpl() expects to meet token and fails to validate
8306   // operand. This method checks if we are given immediate operand but expect to
8307   // get corresponding token.
8308   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8309   switch (Kind) {
8310   case MCK_addr64:
8311     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8312   case MCK_gds:
8313     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8314   case MCK_lds:
8315     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8316   case MCK_idxen:
8317     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8318   case MCK_offen:
8319     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8320   case MCK_SSrcB32:
8321     // When operands have expression values, they will return true for isToken,
8322     // because it is not possible to distinguish between a token and an
8323     // expression at parse time. MatchInstructionImpl() will always try to
8324     // match an operand as a token, when isToken returns true, and when the
8325     // name of the expression is not a valid token, the match will fail,
8326     // so we need to handle it here.
8327     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8328   case MCK_SSrcF32:
8329     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8330   case MCK_SoppBrTarget:
8331     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8332   case MCK_VReg32OrOff:
8333     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8334   case MCK_InterpSlot:
8335     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8336   case MCK_Attr:
8337     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8338   case MCK_AttrChan:
8339     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8340   case MCK_ImmSMEMOffset:
8341     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8342   case MCK_SReg_64:
8343   case MCK_SReg_64_XEXEC:
8344     // Null is defined as a 32-bit register but
8345     // it should also be enabled with 64-bit operands.
8346     // The following code enables it for SReg_64 operands
8347     // used as source and destination. Remaining source
8348     // operands are handled in isInlinableImm.
8349     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8350   default:
8351     return Match_InvalidOperand;
8352   }
8353 }
8354 
8355 //===----------------------------------------------------------------------===//
8356 // endpgm
8357 //===----------------------------------------------------------------------===//
8358 
8359 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8360   SMLoc S = getLoc();
8361   int64_t Imm = 0;
8362 
8363   if (!parseExpr(Imm)) {
8364     // The operand is optional, if not present default to 0
8365     Imm = 0;
8366   }
8367 
8368   if (!isUInt<16>(Imm)) {
8369     Error(S, "expected a 16-bit value");
8370     return MatchOperand_ParseFail;
8371   }
8372 
8373   Operands.push_back(
8374       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8375   return MatchOperand_Success;
8376 }
8377 
8378 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8379