1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/TargetRegistry.h"
32 #include "llvm/Support/AMDGPUMetadata.h"
33 #include "llvm/Support/AMDHSAKernelDescriptor.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/MachineValueType.h"
36 #include "llvm/Support/TargetParser.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53   enum KindTy {
54     Token,
55     Immediate,
56     Register,
57     Expression
58   } Kind;
59 
60   SMLoc StartLoc, EndLoc;
61   const AMDGPUAsmParser *AsmParser;
62 
63 public:
64   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65       : Kind(Kind_), AsmParser(AsmParser_) {}
66 
67   using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69   struct Modifiers {
70     bool Abs = false;
71     bool Neg = false;
72     bool Sext = false;
73 
74     bool hasFPModifiers() const { return Abs || Neg; }
75     bool hasIntModifiers() const { return Sext; }
76     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78     int64_t getFPModifiersOperand() const {
79       int64_t Operand = 0;
80       Operand |= Abs ? SISrcMods::ABS : 0u;
81       Operand |= Neg ? SISrcMods::NEG : 0u;
82       return Operand;
83     }
84 
85     int64_t getIntModifiersOperand() const {
86       int64_t Operand = 0;
87       Operand |= Sext ? SISrcMods::SEXT : 0u;
88       return Operand;
89     }
90 
91     int64_t getModifiersOperand() const {
92       assert(!(hasFPModifiers() && hasIntModifiers())
93            && "fp and int modifiers should not be used simultaneously");
94       if (hasFPModifiers()) {
95         return getFPModifiersOperand();
96       } else if (hasIntModifiers()) {
97         return getIntModifiersOperand();
98       } else {
99         return 0;
100       }
101     }
102 
103     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104   };
105 
106   enum ImmTy {
107     ImmTyNone,
108     ImmTyGDS,
109     ImmTyLDS,
110     ImmTyOffen,
111     ImmTyIdxen,
112     ImmTyAddr64,
113     ImmTyOffset,
114     ImmTyInstOffset,
115     ImmTyOffset0,
116     ImmTyOffset1,
117     ImmTyCPol,
118     ImmTySWZ,
119     ImmTyTFE,
120     ImmTyD16,
121     ImmTyClampSI,
122     ImmTyOModSI,
123     ImmTyDPP8,
124     ImmTyDppCtrl,
125     ImmTyDppRowMask,
126     ImmTyDppBankMask,
127     ImmTyDppBoundCtrl,
128     ImmTyDppFi,
129     ImmTySdwaDstSel,
130     ImmTySdwaSrc0Sel,
131     ImmTySdwaSrc1Sel,
132     ImmTySdwaDstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTySwizzle,
155     ImmTyGprIdxMode,
156     ImmTyHigh,
157     ImmTyBLGP,
158     ImmTyCBSZ,
159     ImmTyABID,
160     ImmTyEndpgm,
161   };
162 
163   enum ImmKindTy {
164     ImmKindTyNone,
165     ImmKindTyLiteral,
166     ImmKindTyConst,
167   };
168 
169 private:
170   struct TokOp {
171     const char *Data;
172     unsigned Length;
173   };
174 
175   struct ImmOp {
176     int64_t Val;
177     ImmTy Type;
178     bool IsFPImm;
179     mutable ImmKindTy Kind;
180     Modifiers Mods;
181   };
182 
183   struct RegOp {
184     unsigned RegNo;
185     Modifiers Mods;
186   };
187 
188   union {
189     TokOp Tok;
190     ImmOp Imm;
191     RegOp Reg;
192     const MCExpr *Expr;
193   };
194 
195 public:
196   bool isToken() const override {
197     if (Kind == Token)
198       return true;
199 
200     // When parsing operands, we can't always tell if something was meant to be
201     // a token, like 'gds', or an expression that references a global variable.
202     // In this case, we assume the string is an expression, and if we need to
203     // interpret is a token, then we treat the symbol name as the token.
204     return isSymbolRefExpr();
205   }
206 
207   bool isSymbolRefExpr() const {
208     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209   }
210 
211   bool isImm() const override {
212     return Kind == Immediate;
213   }
214 
215   void setImmKindNone() const {
216     assert(isImm());
217     Imm.Kind = ImmKindTyNone;
218   }
219 
220   void setImmKindLiteral() const {
221     assert(isImm());
222     Imm.Kind = ImmKindTyLiteral;
223   }
224 
225   void setImmKindConst() const {
226     assert(isImm());
227     Imm.Kind = ImmKindTyConst;
228   }
229 
230   bool IsImmKindLiteral() const {
231     return isImm() && Imm.Kind == ImmKindTyLiteral;
232   }
233 
234   bool isImmKindConst() const {
235     return isImm() && Imm.Kind == ImmKindTyConst;
236   }
237 
238   bool isInlinableImm(MVT type) const;
239   bool isLiteralImm(MVT type) const;
240 
241   bool isRegKind() const {
242     return Kind == Register;
243   }
244 
245   bool isReg() const override {
246     return isRegKind() && !hasModifiers();
247   }
248 
249   bool isRegOrInline(unsigned RCID, MVT type) const {
250     return isRegClass(RCID) || isInlinableImm(type);
251   }
252 
253   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
254     return isRegOrInline(RCID, type) || isLiteralImm(type);
255   }
256 
257   bool isRegOrImmWithInt16InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
259   }
260 
261   bool isRegOrImmWithInt32InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
263   }
264 
265   bool isRegOrImmWithInt64InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
267   }
268 
269   bool isRegOrImmWithFP16InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
271   }
272 
273   bool isRegOrImmWithFP32InputMods() const {
274     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
275   }
276 
277   bool isRegOrImmWithFP64InputMods() const {
278     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
279   }
280 
281   bool isVReg() const {
282     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
283            isRegClass(AMDGPU::VReg_64RegClassID) ||
284            isRegClass(AMDGPU::VReg_96RegClassID) ||
285            isRegClass(AMDGPU::VReg_128RegClassID) ||
286            isRegClass(AMDGPU::VReg_160RegClassID) ||
287            isRegClass(AMDGPU::VReg_192RegClassID) ||
288            isRegClass(AMDGPU::VReg_256RegClassID) ||
289            isRegClass(AMDGPU::VReg_512RegClassID) ||
290            isRegClass(AMDGPU::VReg_1024RegClassID);
291   }
292 
293   bool isVReg32() const {
294     return isRegClass(AMDGPU::VGPR_32RegClassID);
295   }
296 
297   bool isVReg32OrOff() const {
298     return isOff() || isVReg32();
299   }
300 
301   bool isNull() const {
302     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
303   }
304 
305   bool isVRegWithInputMods() const;
306 
307   bool isSDWAOperand(MVT type) const;
308   bool isSDWAFP16Operand() const;
309   bool isSDWAFP32Operand() const;
310   bool isSDWAInt16Operand() const;
311   bool isSDWAInt32Operand() const;
312 
313   bool isImmTy(ImmTy ImmT) const {
314     return isImm() && Imm.Type == ImmT;
315   }
316 
317   bool isImmModifier() const {
318     return isImm() && Imm.Type != ImmTyNone;
319   }
320 
321   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
322   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
323   bool isDMask() const { return isImmTy(ImmTyDMask); }
324   bool isDim() const { return isImmTy(ImmTyDim); }
325   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
326   bool isDA() const { return isImmTy(ImmTyDA); }
327   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
328   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
329   bool isLWE() const { return isImmTy(ImmTyLWE); }
330   bool isOff() const { return isImmTy(ImmTyOff); }
331   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
332   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
333   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
334   bool isOffen() const { return isImmTy(ImmTyOffen); }
335   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
336   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
337   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
338   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
339   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
340 
341   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
342   bool isGDS() const { return isImmTy(ImmTyGDS); }
343   bool isLDS() const { return isImmTy(ImmTyLDS); }
344   bool isCPol() const { return isImmTy(ImmTyCPol); }
345   bool isSWZ() const { return isImmTy(ImmTySWZ); }
346   bool isTFE() const { return isImmTy(ImmTyTFE); }
347   bool isD16() const { return isImmTy(ImmTyD16); }
348   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
349   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
350   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
351   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
352   bool isFI() const { return isImmTy(ImmTyDppFi); }
353   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
354   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
355   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
356   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
357   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
358   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
359   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
360   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
361   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
362   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
363   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
364   bool isHigh() const { return isImmTy(ImmTyHigh); }
365 
366   bool isMod() const {
367     return isClampSI() || isOModSI();
368   }
369 
370   bool isRegOrImm() const {
371     return isReg() || isImm();
372   }
373 
374   bool isRegClass(unsigned RCID) const;
375 
376   bool isInlineValue() const;
377 
378   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
379     return isRegOrInline(RCID, type) && !hasModifiers();
380   }
381 
382   bool isSCSrcB16() const {
383     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
384   }
385 
386   bool isSCSrcV2B16() const {
387     return isSCSrcB16();
388   }
389 
390   bool isSCSrcB32() const {
391     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
392   }
393 
394   bool isSCSrcB64() const {
395     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
396   }
397 
398   bool isBoolReg() const;
399 
400   bool isSCSrcF16() const {
401     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
402   }
403 
404   bool isSCSrcV2F16() const {
405     return isSCSrcF16();
406   }
407 
408   bool isSCSrcF32() const {
409     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
410   }
411 
412   bool isSCSrcF64() const {
413     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
414   }
415 
416   bool isSSrcB32() const {
417     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
418   }
419 
420   bool isSSrcB16() const {
421     return isSCSrcB16() || isLiteralImm(MVT::i16);
422   }
423 
424   bool isSSrcV2B16() const {
425     llvm_unreachable("cannot happen");
426     return isSSrcB16();
427   }
428 
429   bool isSSrcB64() const {
430     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
431     // See isVSrc64().
432     return isSCSrcB64() || isLiteralImm(MVT::i64);
433   }
434 
435   bool isSSrcF32() const {
436     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
437   }
438 
439   bool isSSrcF64() const {
440     return isSCSrcB64() || isLiteralImm(MVT::f64);
441   }
442 
443   bool isSSrcF16() const {
444     return isSCSrcB16() || isLiteralImm(MVT::f16);
445   }
446 
447   bool isSSrcV2F16() const {
448     llvm_unreachable("cannot happen");
449     return isSSrcF16();
450   }
451 
452   bool isSSrcV2FP32() const {
453     llvm_unreachable("cannot happen");
454     return isSSrcF32();
455   }
456 
457   bool isSCSrcV2FP32() const {
458     llvm_unreachable("cannot happen");
459     return isSCSrcF32();
460   }
461 
462   bool isSSrcV2INT32() const {
463     llvm_unreachable("cannot happen");
464     return isSSrcB32();
465   }
466 
467   bool isSCSrcV2INT32() const {
468     llvm_unreachable("cannot happen");
469     return isSCSrcB32();
470   }
471 
472   bool isSSrcOrLdsB32() const {
473     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
474            isLiteralImm(MVT::i32) || isExpr();
475   }
476 
477   bool isVCSrcB32() const {
478     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
479   }
480 
481   bool isVCSrcB64() const {
482     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
483   }
484 
485   bool isVCSrcB16() const {
486     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
487   }
488 
489   bool isVCSrcV2B16() const {
490     return isVCSrcB16();
491   }
492 
493   bool isVCSrcF32() const {
494     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
495   }
496 
497   bool isVCSrcF64() const {
498     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
499   }
500 
501   bool isVCSrcF16() const {
502     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
503   }
504 
505   bool isVCSrcV2F16() const {
506     return isVCSrcF16();
507   }
508 
509   bool isVSrcB32() const {
510     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
511   }
512 
513   bool isVSrcB64() const {
514     return isVCSrcF64() || isLiteralImm(MVT::i64);
515   }
516 
517   bool isVSrcB16() const {
518     return isVCSrcB16() || isLiteralImm(MVT::i16);
519   }
520 
521   bool isVSrcV2B16() const {
522     return isVSrcB16() || isLiteralImm(MVT::v2i16);
523   }
524 
525   bool isVCSrcV2FP32() const {
526     return isVCSrcF64();
527   }
528 
529   bool isVSrcV2FP32() const {
530     return isVSrcF64() || isLiteralImm(MVT::v2f32);
531   }
532 
533   bool isVCSrcV2INT32() const {
534     return isVCSrcB64();
535   }
536 
537   bool isVSrcV2INT32() const {
538     return isVSrcB64() || isLiteralImm(MVT::v2i32);
539   }
540 
541   bool isVSrcF32() const {
542     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
543   }
544 
545   bool isVSrcF64() const {
546     return isVCSrcF64() || isLiteralImm(MVT::f64);
547   }
548 
549   bool isVSrcF16() const {
550     return isVCSrcF16() || isLiteralImm(MVT::f16);
551   }
552 
553   bool isVSrcV2F16() const {
554     return isVSrcF16() || isLiteralImm(MVT::v2f16);
555   }
556 
557   bool isVISrcB32() const {
558     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
559   }
560 
561   bool isVISrcB16() const {
562     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
563   }
564 
565   bool isVISrcV2B16() const {
566     return isVISrcB16();
567   }
568 
569   bool isVISrcF32() const {
570     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
571   }
572 
573   bool isVISrcF16() const {
574     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
575   }
576 
577   bool isVISrcV2F16() const {
578     return isVISrcF16() || isVISrcB32();
579   }
580 
581   bool isVISrc_64B64() const {
582     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
583   }
584 
585   bool isVISrc_64F64() const {
586     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
587   }
588 
589   bool isVISrc_64V2FP32() const {
590     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
591   }
592 
593   bool isVISrc_64V2INT32() const {
594     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
595   }
596 
597   bool isVISrc_256B64() const {
598     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
599   }
600 
601   bool isVISrc_256F64() const {
602     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
603   }
604 
605   bool isVISrc_128B16() const {
606     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
607   }
608 
609   bool isVISrc_128V2B16() const {
610     return isVISrc_128B16();
611   }
612 
613   bool isVISrc_128B32() const {
614     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
615   }
616 
617   bool isVISrc_128F32() const {
618     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
619   }
620 
621   bool isVISrc_256V2FP32() const {
622     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
623   }
624 
625   bool isVISrc_256V2INT32() const {
626     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
627   }
628 
629   bool isVISrc_512B32() const {
630     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
631   }
632 
633   bool isVISrc_512B16() const {
634     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
635   }
636 
637   bool isVISrc_512V2B16() const {
638     return isVISrc_512B16();
639   }
640 
641   bool isVISrc_512F32() const {
642     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
643   }
644 
645   bool isVISrc_512F16() const {
646     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
647   }
648 
649   bool isVISrc_512V2F16() const {
650     return isVISrc_512F16() || isVISrc_512B32();
651   }
652 
653   bool isVISrc_1024B32() const {
654     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
655   }
656 
657   bool isVISrc_1024B16() const {
658     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
659   }
660 
661   bool isVISrc_1024V2B16() const {
662     return isVISrc_1024B16();
663   }
664 
665   bool isVISrc_1024F32() const {
666     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
667   }
668 
669   bool isVISrc_1024F16() const {
670     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
671   }
672 
673   bool isVISrc_1024V2F16() const {
674     return isVISrc_1024F16() || isVISrc_1024B32();
675   }
676 
677   bool isAISrcB32() const {
678     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
679   }
680 
681   bool isAISrcB16() const {
682     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
683   }
684 
685   bool isAISrcV2B16() const {
686     return isAISrcB16();
687   }
688 
689   bool isAISrcF32() const {
690     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
691   }
692 
693   bool isAISrcF16() const {
694     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
695   }
696 
697   bool isAISrcV2F16() const {
698     return isAISrcF16() || isAISrcB32();
699   }
700 
701   bool isAISrc_64B64() const {
702     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
703   }
704 
705   bool isAISrc_64F64() const {
706     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
707   }
708 
709   bool isAISrc_128B32() const {
710     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
711   }
712 
713   bool isAISrc_128B16() const {
714     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
715   }
716 
717   bool isAISrc_128V2B16() const {
718     return isAISrc_128B16();
719   }
720 
721   bool isAISrc_128F32() const {
722     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
723   }
724 
725   bool isAISrc_128F16() const {
726     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
727   }
728 
729   bool isAISrc_128V2F16() const {
730     return isAISrc_128F16() || isAISrc_128B32();
731   }
732 
733   bool isVISrc_128F16() const {
734     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
735   }
736 
737   bool isVISrc_128V2F16() const {
738     return isVISrc_128F16() || isVISrc_128B32();
739   }
740 
741   bool isAISrc_256B64() const {
742     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
743   }
744 
745   bool isAISrc_256F64() const {
746     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
747   }
748 
749   bool isAISrc_512B32() const {
750     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
751   }
752 
753   bool isAISrc_512B16() const {
754     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
755   }
756 
757   bool isAISrc_512V2B16() const {
758     return isAISrc_512B16();
759   }
760 
761   bool isAISrc_512F32() const {
762     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
763   }
764 
765   bool isAISrc_512F16() const {
766     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
767   }
768 
769   bool isAISrc_512V2F16() const {
770     return isAISrc_512F16() || isAISrc_512B32();
771   }
772 
773   bool isAISrc_1024B32() const {
774     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
775   }
776 
777   bool isAISrc_1024B16() const {
778     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
779   }
780 
781   bool isAISrc_1024V2B16() const {
782     return isAISrc_1024B16();
783   }
784 
785   bool isAISrc_1024F32() const {
786     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
787   }
788 
789   bool isAISrc_1024F16() const {
790     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
791   }
792 
793   bool isAISrc_1024V2F16() const {
794     return isAISrc_1024F16() || isAISrc_1024B32();
795   }
796 
797   bool isKImmFP32() const {
798     return isLiteralImm(MVT::f32);
799   }
800 
801   bool isKImmFP16() const {
802     return isLiteralImm(MVT::f16);
803   }
804 
805   bool isMem() const override {
806     return false;
807   }
808 
809   bool isExpr() const {
810     return Kind == Expression;
811   }
812 
813   bool isSoppBrTarget() const {
814     return isExpr() || isImm();
815   }
816 
817   bool isSWaitCnt() const;
818   bool isHwreg() const;
819   bool isSendMsg() const;
820   bool isSwizzle() const;
821   bool isSMRDOffset8() const;
822   bool isSMEMOffset() const;
823   bool isSMRDLiteralOffset() const;
824   bool isDPP8() const;
825   bool isDPPCtrl() const;
826   bool isBLGP() const;
827   bool isCBSZ() const;
828   bool isABID() const;
829   bool isGPRIdxMode() const;
830   bool isS16Imm() const;
831   bool isU16Imm() const;
832   bool isEndpgm() const;
833 
834   StringRef getExpressionAsToken() const {
835     assert(isExpr());
836     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
837     return S->getSymbol().getName();
838   }
839 
840   StringRef getToken() const {
841     assert(isToken());
842 
843     if (Kind == Expression)
844       return getExpressionAsToken();
845 
846     return StringRef(Tok.Data, Tok.Length);
847   }
848 
849   int64_t getImm() const {
850     assert(isImm());
851     return Imm.Val;
852   }
853 
854   void setImm(int64_t Val) {
855     assert(isImm());
856     Imm.Val = Val;
857   }
858 
859   ImmTy getImmTy() const {
860     assert(isImm());
861     return Imm.Type;
862   }
863 
864   unsigned getReg() const override {
865     assert(isRegKind());
866     return Reg.RegNo;
867   }
868 
869   SMLoc getStartLoc() const override {
870     return StartLoc;
871   }
872 
873   SMLoc getEndLoc() const override {
874     return EndLoc;
875   }
876 
877   SMRange getLocRange() const {
878     return SMRange(StartLoc, EndLoc);
879   }
880 
881   Modifiers getModifiers() const {
882     assert(isRegKind() || isImmTy(ImmTyNone));
883     return isRegKind() ? Reg.Mods : Imm.Mods;
884   }
885 
886   void setModifiers(Modifiers Mods) {
887     assert(isRegKind() || isImmTy(ImmTyNone));
888     if (isRegKind())
889       Reg.Mods = Mods;
890     else
891       Imm.Mods = Mods;
892   }
893 
894   bool hasModifiers() const {
895     return getModifiers().hasModifiers();
896   }
897 
898   bool hasFPModifiers() const {
899     return getModifiers().hasFPModifiers();
900   }
901 
902   bool hasIntModifiers() const {
903     return getModifiers().hasIntModifiers();
904   }
905 
906   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
907 
908   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
909 
910   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
911 
912   template <unsigned Bitwidth>
913   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
914 
915   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
916     addKImmFPOperands<16>(Inst, N);
917   }
918 
919   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
920     addKImmFPOperands<32>(Inst, N);
921   }
922 
923   void addRegOperands(MCInst &Inst, unsigned N) const;
924 
925   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
926     addRegOperands(Inst, N);
927   }
928 
929   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
930     if (isRegKind())
931       addRegOperands(Inst, N);
932     else if (isExpr())
933       Inst.addOperand(MCOperand::createExpr(Expr));
934     else
935       addImmOperands(Inst, N);
936   }
937 
938   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
939     Modifiers Mods = getModifiers();
940     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
941     if (isRegKind()) {
942       addRegOperands(Inst, N);
943     } else {
944       addImmOperands(Inst, N, false);
945     }
946   }
947 
948   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
949     assert(!hasIntModifiers());
950     addRegOrImmWithInputModsOperands(Inst, N);
951   }
952 
953   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
954     assert(!hasFPModifiers());
955     addRegOrImmWithInputModsOperands(Inst, N);
956   }
957 
958   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
959     Modifiers Mods = getModifiers();
960     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
961     assert(isRegKind());
962     addRegOperands(Inst, N);
963   }
964 
965   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
966     assert(!hasIntModifiers());
967     addRegWithInputModsOperands(Inst, N);
968   }
969 
970   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
971     assert(!hasFPModifiers());
972     addRegWithInputModsOperands(Inst, N);
973   }
974 
975   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
976     if (isImm())
977       addImmOperands(Inst, N);
978     else {
979       assert(isExpr());
980       Inst.addOperand(MCOperand::createExpr(Expr));
981     }
982   }
983 
984   static void printImmTy(raw_ostream& OS, ImmTy Type) {
985     switch (Type) {
986     case ImmTyNone: OS << "None"; break;
987     case ImmTyGDS: OS << "GDS"; break;
988     case ImmTyLDS: OS << "LDS"; break;
989     case ImmTyOffen: OS << "Offen"; break;
990     case ImmTyIdxen: OS << "Idxen"; break;
991     case ImmTyAddr64: OS << "Addr64"; break;
992     case ImmTyOffset: OS << "Offset"; break;
993     case ImmTyInstOffset: OS << "InstOffset"; break;
994     case ImmTyOffset0: OS << "Offset0"; break;
995     case ImmTyOffset1: OS << "Offset1"; break;
996     case ImmTyCPol: OS << "CPol"; break;
997     case ImmTySWZ: OS << "SWZ"; break;
998     case ImmTyTFE: OS << "TFE"; break;
999     case ImmTyD16: OS << "D16"; break;
1000     case ImmTyFORMAT: OS << "FORMAT"; break;
1001     case ImmTyClampSI: OS << "ClampSI"; break;
1002     case ImmTyOModSI: OS << "OModSI"; break;
1003     case ImmTyDPP8: OS << "DPP8"; break;
1004     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1005     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1006     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1007     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1008     case ImmTyDppFi: OS << "FI"; break;
1009     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1010     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1011     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1012     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1013     case ImmTyDMask: OS << "DMask"; break;
1014     case ImmTyDim: OS << "Dim"; break;
1015     case ImmTyUNorm: OS << "UNorm"; break;
1016     case ImmTyDA: OS << "DA"; break;
1017     case ImmTyR128A16: OS << "R128A16"; break;
1018     case ImmTyA16: OS << "A16"; break;
1019     case ImmTyLWE: OS << "LWE"; break;
1020     case ImmTyOff: OS << "Off"; break;
1021     case ImmTyExpTgt: OS << "ExpTgt"; break;
1022     case ImmTyExpCompr: OS << "ExpCompr"; break;
1023     case ImmTyExpVM: OS << "ExpVM"; break;
1024     case ImmTyHwreg: OS << "Hwreg"; break;
1025     case ImmTySendMsg: OS << "SendMsg"; break;
1026     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1027     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1028     case ImmTyAttrChan: OS << "AttrChan"; break;
1029     case ImmTyOpSel: OS << "OpSel"; break;
1030     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1031     case ImmTyNegLo: OS << "NegLo"; break;
1032     case ImmTyNegHi: OS << "NegHi"; break;
1033     case ImmTySwizzle: OS << "Swizzle"; break;
1034     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1035     case ImmTyHigh: OS << "High"; break;
1036     case ImmTyBLGP: OS << "BLGP"; break;
1037     case ImmTyCBSZ: OS << "CBSZ"; break;
1038     case ImmTyABID: OS << "ABID"; break;
1039     case ImmTyEndpgm: OS << "Endpgm"; break;
1040     }
1041   }
1042 
1043   void print(raw_ostream &OS) const override {
1044     switch (Kind) {
1045     case Register:
1046       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1047       break;
1048     case Immediate:
1049       OS << '<' << getImm();
1050       if (getImmTy() != ImmTyNone) {
1051         OS << " type: "; printImmTy(OS, getImmTy());
1052       }
1053       OS << " mods: " << Imm.Mods << '>';
1054       break;
1055     case Token:
1056       OS << '\'' << getToken() << '\'';
1057       break;
1058     case Expression:
1059       OS << "<expr " << *Expr << '>';
1060       break;
1061     }
1062   }
1063 
1064   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1065                                       int64_t Val, SMLoc Loc,
1066                                       ImmTy Type = ImmTyNone,
1067                                       bool IsFPImm = false) {
1068     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1069     Op->Imm.Val = Val;
1070     Op->Imm.IsFPImm = IsFPImm;
1071     Op->Imm.Kind = ImmKindTyNone;
1072     Op->Imm.Type = Type;
1073     Op->Imm.Mods = Modifiers();
1074     Op->StartLoc = Loc;
1075     Op->EndLoc = Loc;
1076     return Op;
1077   }
1078 
1079   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1080                                         StringRef Str, SMLoc Loc,
1081                                         bool HasExplicitEncodingSize = true) {
1082     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1083     Res->Tok.Data = Str.data();
1084     Res->Tok.Length = Str.size();
1085     Res->StartLoc = Loc;
1086     Res->EndLoc = Loc;
1087     return Res;
1088   }
1089 
1090   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1091                                       unsigned RegNo, SMLoc S,
1092                                       SMLoc E) {
1093     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1094     Op->Reg.RegNo = RegNo;
1095     Op->Reg.Mods = Modifiers();
1096     Op->StartLoc = S;
1097     Op->EndLoc = E;
1098     return Op;
1099   }
1100 
1101   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1102                                        const class MCExpr *Expr, SMLoc S) {
1103     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1104     Op->Expr = Expr;
1105     Op->StartLoc = S;
1106     Op->EndLoc = S;
1107     return Op;
1108   }
1109 };
1110 
1111 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1112   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1113   return OS;
1114 }
1115 
1116 //===----------------------------------------------------------------------===//
1117 // AsmParser
1118 //===----------------------------------------------------------------------===//
1119 
1120 // Holds info related to the current kernel, e.g. count of SGPRs used.
1121 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1122 // .amdgpu_hsa_kernel or at EOF.
1123 class KernelScopeInfo {
1124   int SgprIndexUnusedMin = -1;
1125   int VgprIndexUnusedMin = -1;
1126   MCContext *Ctx = nullptr;
1127 
1128   void usesSgprAt(int i) {
1129     if (i >= SgprIndexUnusedMin) {
1130       SgprIndexUnusedMin = ++i;
1131       if (Ctx) {
1132         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1133         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1134       }
1135     }
1136   }
1137 
1138   void usesVgprAt(int i) {
1139     if (i >= VgprIndexUnusedMin) {
1140       VgprIndexUnusedMin = ++i;
1141       if (Ctx) {
1142         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1143         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1144       }
1145     }
1146   }
1147 
1148 public:
1149   KernelScopeInfo() = default;
1150 
1151   void initialize(MCContext &Context) {
1152     Ctx = &Context;
1153     usesSgprAt(SgprIndexUnusedMin = -1);
1154     usesVgprAt(VgprIndexUnusedMin = -1);
1155   }
1156 
1157   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1158     switch (RegKind) {
1159       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1160       case IS_AGPR: // fall through
1161       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1162       default: break;
1163     }
1164   }
1165 };
1166 
1167 class AMDGPUAsmParser : public MCTargetAsmParser {
1168   MCAsmParser &Parser;
1169 
1170   // Number of extra operands parsed after the first optional operand.
1171   // This may be necessary to skip hardcoded mandatory operands.
1172   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1173 
1174   unsigned ForcedEncodingSize = 0;
1175   bool ForcedDPP = false;
1176   bool ForcedSDWA = false;
1177   KernelScopeInfo KernelScope;
1178   unsigned CPolSeen;
1179 
1180   /// @name Auto-generated Match Functions
1181   /// {
1182 
1183 #define GET_ASSEMBLER_HEADER
1184 #include "AMDGPUGenAsmMatcher.inc"
1185 
1186   /// }
1187 
1188 private:
1189   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1190   bool OutOfRangeError(SMRange Range);
1191   /// Calculate VGPR/SGPR blocks required for given target, reserved
1192   /// registers, and user-specified NextFreeXGPR values.
1193   ///
1194   /// \param Features [in] Target features, used for bug corrections.
1195   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1196   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1197   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1198   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1199   /// descriptor field, if valid.
1200   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1201   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1202   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1203   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1204   /// \param VGPRBlocks [out] Result VGPR block count.
1205   /// \param SGPRBlocks [out] Result SGPR block count.
1206   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1207                           bool FlatScrUsed, bool XNACKUsed,
1208                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1209                           SMRange VGPRRange, unsigned NextFreeSGPR,
1210                           SMRange SGPRRange, unsigned &VGPRBlocks,
1211                           unsigned &SGPRBlocks);
1212   bool ParseDirectiveAMDGCNTarget();
1213   bool ParseDirectiveAMDHSAKernel();
1214   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1215   bool ParseDirectiveHSACodeObjectVersion();
1216   bool ParseDirectiveHSACodeObjectISA();
1217   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1218   bool ParseDirectiveAMDKernelCodeT();
1219   // TODO: Possibly make subtargetHasRegister const.
1220   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1221   bool ParseDirectiveAMDGPUHsaKernel();
1222 
1223   bool ParseDirectiveISAVersion();
1224   bool ParseDirectiveHSAMetadata();
1225   bool ParseDirectivePALMetadataBegin();
1226   bool ParseDirectivePALMetadata();
1227   bool ParseDirectiveAMDGPULDS();
1228 
1229   /// Common code to parse out a block of text (typically YAML) between start and
1230   /// end directives.
1231   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1232                            const char *AssemblerDirectiveEnd,
1233                            std::string &CollectString);
1234 
1235   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1236                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1237   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1238                            unsigned &RegNum, unsigned &RegWidth,
1239                            bool RestoreOnFailure = false);
1240   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1241                            unsigned &RegNum, unsigned &RegWidth,
1242                            SmallVectorImpl<AsmToken> &Tokens);
1243   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1244                            unsigned &RegWidth,
1245                            SmallVectorImpl<AsmToken> &Tokens);
1246   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1247                            unsigned &RegWidth,
1248                            SmallVectorImpl<AsmToken> &Tokens);
1249   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1250                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1251   bool ParseRegRange(unsigned& Num, unsigned& Width);
1252   unsigned getRegularReg(RegisterKind RegKind,
1253                          unsigned RegNum,
1254                          unsigned RegWidth,
1255                          SMLoc Loc);
1256 
1257   bool isRegister();
1258   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1259   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1260   void initializeGprCountSymbol(RegisterKind RegKind);
1261   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1262                              unsigned RegWidth);
1263   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1264                     bool IsAtomic, bool IsLds = false);
1265   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1266                  bool IsGdsHardcoded);
1267 
1268 public:
1269   enum AMDGPUMatchResultTy {
1270     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1271   };
1272   enum OperandMode {
1273     OperandMode_Default,
1274     OperandMode_NSA,
1275   };
1276 
1277   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1278 
1279   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1280                const MCInstrInfo &MII,
1281                const MCTargetOptions &Options)
1282       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1283     MCAsmParserExtension::Initialize(Parser);
1284 
1285     if (getFeatureBits().none()) {
1286       // Set default features.
1287       copySTI().ToggleFeature("southern-islands");
1288     }
1289 
1290     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1291 
1292     {
1293       // TODO: make those pre-defined variables read-only.
1294       // Currently there is none suitable machinery in the core llvm-mc for this.
1295       // MCSymbol::isRedefinable is intended for another purpose, and
1296       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1297       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1298       MCContext &Ctx = getContext();
1299       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1300         MCSymbol *Sym =
1301             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1302         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1303         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1304         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1305         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1306         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1307       } else {
1308         MCSymbol *Sym =
1309             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1310         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1311         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1312         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1313         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1314         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1315       }
1316       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1317         initializeGprCountSymbol(IS_VGPR);
1318         initializeGprCountSymbol(IS_SGPR);
1319       } else
1320         KernelScope.initialize(getContext());
1321     }
1322   }
1323 
1324   bool hasMIMG_R128() const {
1325     return AMDGPU::hasMIMG_R128(getSTI());
1326   }
1327 
1328   bool hasPackedD16() const {
1329     return AMDGPU::hasPackedD16(getSTI());
1330   }
1331 
1332   bool hasGFX10A16() const {
1333     return AMDGPU::hasGFX10A16(getSTI());
1334   }
1335 
1336   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1337 
1338   bool isSI() const {
1339     return AMDGPU::isSI(getSTI());
1340   }
1341 
1342   bool isCI() const {
1343     return AMDGPU::isCI(getSTI());
1344   }
1345 
1346   bool isVI() const {
1347     return AMDGPU::isVI(getSTI());
1348   }
1349 
1350   bool isGFX9() const {
1351     return AMDGPU::isGFX9(getSTI());
1352   }
1353 
1354   bool isGFX90A() const {
1355     return AMDGPU::isGFX90A(getSTI());
1356   }
1357 
1358   bool isGFX9Plus() const {
1359     return AMDGPU::isGFX9Plus(getSTI());
1360   }
1361 
1362   bool isGFX10() const {
1363     return AMDGPU::isGFX10(getSTI());
1364   }
1365 
1366   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1367 
1368   bool isGFX10_BEncoding() const {
1369     return AMDGPU::isGFX10_BEncoding(getSTI());
1370   }
1371 
1372   bool hasInv2PiInlineImm() const {
1373     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1374   }
1375 
1376   bool hasFlatOffsets() const {
1377     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1378   }
1379 
1380   bool hasArchitectedFlatScratch() const {
1381     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1382   }
1383 
1384   bool hasSGPR102_SGPR103() const {
1385     return !isVI() && !isGFX9();
1386   }
1387 
1388   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1389 
1390   bool hasIntClamp() const {
1391     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1392   }
1393 
1394   AMDGPUTargetStreamer &getTargetStreamer() {
1395     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1396     return static_cast<AMDGPUTargetStreamer &>(TS);
1397   }
1398 
1399   const MCRegisterInfo *getMRI() const {
1400     // We need this const_cast because for some reason getContext() is not const
1401     // in MCAsmParser.
1402     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1403   }
1404 
1405   const MCInstrInfo *getMII() const {
1406     return &MII;
1407   }
1408 
1409   const FeatureBitset &getFeatureBits() const {
1410     return getSTI().getFeatureBits();
1411   }
1412 
1413   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1414   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1415   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1416 
1417   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1418   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1419   bool isForcedDPP() const { return ForcedDPP; }
1420   bool isForcedSDWA() const { return ForcedSDWA; }
1421   ArrayRef<unsigned> getMatchedVariants() const;
1422   StringRef getMatchedVariantName() const;
1423 
1424   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1425   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1426                      bool RestoreOnFailure);
1427   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1428   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1429                                         SMLoc &EndLoc) override;
1430   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1431   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1432                                       unsigned Kind) override;
1433   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1434                                OperandVector &Operands, MCStreamer &Out,
1435                                uint64_t &ErrorInfo,
1436                                bool MatchingInlineAsm) override;
1437   bool ParseDirective(AsmToken DirectiveID) override;
1438   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1439                                     OperandMode Mode = OperandMode_Default);
1440   StringRef parseMnemonicSuffix(StringRef Name);
1441   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1442                         SMLoc NameLoc, OperandVector &Operands) override;
1443   //bool ProcessInstruction(MCInst &Inst);
1444 
1445   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1446 
1447   OperandMatchResultTy
1448   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1449                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1450                      bool (*ConvertResult)(int64_t &) = nullptr);
1451 
1452   OperandMatchResultTy
1453   parseOperandArrayWithPrefix(const char *Prefix,
1454                               OperandVector &Operands,
1455                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1456                               bool (*ConvertResult)(int64_t&) = nullptr);
1457 
1458   OperandMatchResultTy
1459   parseNamedBit(StringRef Name, OperandVector &Operands,
1460                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1461   OperandMatchResultTy parseCPol(OperandVector &Operands);
1462   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1463                                              StringRef &Value,
1464                                              SMLoc &StringLoc);
1465 
1466   bool isModifier();
1467   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1468   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1469   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1470   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1471   bool parseSP3NegModifier();
1472   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1473   OperandMatchResultTy parseReg(OperandVector &Operands);
1474   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1475   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1476   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1477   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1478   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1479   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1480   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1481   OperandMatchResultTy parseUfmt(int64_t &Format);
1482   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1483   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1484   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1485   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1486   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1487   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1488   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1489 
1490   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1491   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1492   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1493   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1494 
1495   bool parseCnt(int64_t &IntVal);
1496   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1497   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1498 
1499 private:
1500   struct OperandInfoTy {
1501     SMLoc Loc;
1502     int64_t Id;
1503     bool IsSymbolic = false;
1504     bool IsDefined = false;
1505 
1506     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1507   };
1508 
1509   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1510   bool validateSendMsg(const OperandInfoTy &Msg,
1511                        const OperandInfoTy &Op,
1512                        const OperandInfoTy &Stream);
1513 
1514   bool parseHwregBody(OperandInfoTy &HwReg,
1515                       OperandInfoTy &Offset,
1516                       OperandInfoTy &Width);
1517   bool validateHwreg(const OperandInfoTy &HwReg,
1518                      const OperandInfoTy &Offset,
1519                      const OperandInfoTy &Width);
1520 
1521   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1522   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1523 
1524   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1525                       const OperandVector &Operands) const;
1526   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1527   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1528   SMLoc getLitLoc(const OperandVector &Operands) const;
1529   SMLoc getConstLoc(const OperandVector &Operands) const;
1530 
1531   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1532   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1533   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1534   bool validateSOPLiteral(const MCInst &Inst) const;
1535   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1536   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1537   bool validateIntClampSupported(const MCInst &Inst);
1538   bool validateMIMGAtomicDMask(const MCInst &Inst);
1539   bool validateMIMGGatherDMask(const MCInst &Inst);
1540   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1541   bool validateMIMGDataSize(const MCInst &Inst);
1542   bool validateMIMGAddrSize(const MCInst &Inst);
1543   bool validateMIMGD16(const MCInst &Inst);
1544   bool validateMIMGDim(const MCInst &Inst);
1545   bool validateMIMGMSAA(const MCInst &Inst);
1546   bool validateOpSel(const MCInst &Inst);
1547   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1548   bool validateVccOperand(unsigned Reg) const;
1549   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1550   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1551   bool validateAGPRLdSt(const MCInst &Inst) const;
1552   bool validateVGPRAlign(const MCInst &Inst) const;
1553   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1554   bool validateDivScale(const MCInst &Inst);
1555   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1556                              const SMLoc &IDLoc);
1557   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1558   unsigned getConstantBusLimit(unsigned Opcode) const;
1559   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1560   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1561   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1562 
1563   bool isSupportedMnemo(StringRef Mnemo,
1564                         const FeatureBitset &FBS);
1565   bool isSupportedMnemo(StringRef Mnemo,
1566                         const FeatureBitset &FBS,
1567                         ArrayRef<unsigned> Variants);
1568   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1569 
1570   bool isId(const StringRef Id) const;
1571   bool isId(const AsmToken &Token, const StringRef Id) const;
1572   bool isToken(const AsmToken::TokenKind Kind) const;
1573   bool trySkipId(const StringRef Id);
1574   bool trySkipId(const StringRef Pref, const StringRef Id);
1575   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1576   bool trySkipToken(const AsmToken::TokenKind Kind);
1577   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1578   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1579   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1580 
1581   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1582   AsmToken::TokenKind getTokenKind() const;
1583   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1584   bool parseExpr(OperandVector &Operands);
1585   StringRef getTokenStr() const;
1586   AsmToken peekToken();
1587   AsmToken getToken() const;
1588   SMLoc getLoc() const;
1589   void lex();
1590 
1591 public:
1592   void onBeginOfFile() override;
1593 
1594   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1595   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1596 
1597   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1598   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1599   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1600   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1601   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1602   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1603 
1604   bool parseSwizzleOperand(int64_t &Op,
1605                            const unsigned MinVal,
1606                            const unsigned MaxVal,
1607                            const StringRef ErrMsg,
1608                            SMLoc &Loc);
1609   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1610                             const unsigned MinVal,
1611                             const unsigned MaxVal,
1612                             const StringRef ErrMsg);
1613   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1614   bool parseSwizzleOffset(int64_t &Imm);
1615   bool parseSwizzleMacro(int64_t &Imm);
1616   bool parseSwizzleQuadPerm(int64_t &Imm);
1617   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1618   bool parseSwizzleBroadcast(int64_t &Imm);
1619   bool parseSwizzleSwap(int64_t &Imm);
1620   bool parseSwizzleReverse(int64_t &Imm);
1621 
1622   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1623   int64_t parseGPRIdxMacro();
1624 
1625   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1626   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1627   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1628   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1629 
1630   AMDGPUOperand::Ptr defaultCPol() const;
1631 
1632   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1633   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1634   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1635   AMDGPUOperand::Ptr defaultFlatOffset() const;
1636 
1637   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1638 
1639   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1640                OptionalImmIndexMap &OptionalIdx);
1641   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1642   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1643   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1644   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1645                 OptionalImmIndexMap &OptionalIdx);
1646 
1647   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1648 
1649   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1650                bool IsAtomic = false);
1651   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1652   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1653 
1654   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1655 
1656   bool parseDimId(unsigned &Encoding);
1657   OperandMatchResultTy parseDim(OperandVector &Operands);
1658   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1659   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1660   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1661   int64_t parseDPPCtrlSel(StringRef Ctrl);
1662   int64_t parseDPPCtrlPerm();
1663   AMDGPUOperand::Ptr defaultRowMask() const;
1664   AMDGPUOperand::Ptr defaultBankMask() const;
1665   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1666   AMDGPUOperand::Ptr defaultFI() const;
1667   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1668   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1669 
1670   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1671                                     AMDGPUOperand::ImmTy Type);
1672   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1673   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1674   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1675   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1676   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1677   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1678   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1679                uint64_t BasicInstType,
1680                bool SkipDstVcc = false,
1681                bool SkipSrcVcc = false);
1682 
1683   AMDGPUOperand::Ptr defaultBLGP() const;
1684   AMDGPUOperand::Ptr defaultCBSZ() const;
1685   AMDGPUOperand::Ptr defaultABID() const;
1686 
1687   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1688   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1689 };
1690 
1691 struct OptionalOperand {
1692   const char *Name;
1693   AMDGPUOperand::ImmTy Type;
1694   bool IsBit;
1695   bool (*ConvertResult)(int64_t&);
1696 };
1697 
1698 } // end anonymous namespace
1699 
1700 // May be called with integer type with equivalent bitwidth.
1701 static const fltSemantics *getFltSemantics(unsigned Size) {
1702   switch (Size) {
1703   case 4:
1704     return &APFloat::IEEEsingle();
1705   case 8:
1706     return &APFloat::IEEEdouble();
1707   case 2:
1708     return &APFloat::IEEEhalf();
1709   default:
1710     llvm_unreachable("unsupported fp type");
1711   }
1712 }
1713 
1714 static const fltSemantics *getFltSemantics(MVT VT) {
1715   return getFltSemantics(VT.getSizeInBits() / 8);
1716 }
1717 
1718 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1719   switch (OperandType) {
1720   case AMDGPU::OPERAND_REG_IMM_INT32:
1721   case AMDGPU::OPERAND_REG_IMM_FP32:
1722   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1723   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1724   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1725   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1726   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1727   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1728   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1729   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1730   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1731   case AMDGPU::OPERAND_KIMM32:
1732     return &APFloat::IEEEsingle();
1733   case AMDGPU::OPERAND_REG_IMM_INT64:
1734   case AMDGPU::OPERAND_REG_IMM_FP64:
1735   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1736   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1737   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1738     return &APFloat::IEEEdouble();
1739   case AMDGPU::OPERAND_REG_IMM_INT16:
1740   case AMDGPU::OPERAND_REG_IMM_FP16:
1741   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1742   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1743   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1744   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1745   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1746   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1747   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1748   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1749   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1750   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1751   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1752   case AMDGPU::OPERAND_KIMM16:
1753     return &APFloat::IEEEhalf();
1754   default:
1755     llvm_unreachable("unsupported fp type");
1756   }
1757 }
1758 
1759 //===----------------------------------------------------------------------===//
1760 // Operand
1761 //===----------------------------------------------------------------------===//
1762 
1763 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1764   bool Lost;
1765 
1766   // Convert literal to single precision
1767   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1768                                                APFloat::rmNearestTiesToEven,
1769                                                &Lost);
1770   // We allow precision lost but not overflow or underflow
1771   if (Status != APFloat::opOK &&
1772       Lost &&
1773       ((Status & APFloat::opOverflow)  != 0 ||
1774        (Status & APFloat::opUnderflow) != 0)) {
1775     return false;
1776   }
1777 
1778   return true;
1779 }
1780 
1781 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1782   return isUIntN(Size, Val) || isIntN(Size, Val);
1783 }
1784 
1785 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1786   if (VT.getScalarType() == MVT::i16) {
1787     // FP immediate values are broken.
1788     return isInlinableIntLiteral(Val);
1789   }
1790 
1791   // f16/v2f16 operands work correctly for all values.
1792   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1793 }
1794 
1795 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1796 
1797   // This is a hack to enable named inline values like
1798   // shared_base with both 32-bit and 64-bit operands.
1799   // Note that these values are defined as
1800   // 32-bit operands only.
1801   if (isInlineValue()) {
1802     return true;
1803   }
1804 
1805   if (!isImmTy(ImmTyNone)) {
1806     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1807     return false;
1808   }
1809   // TODO: We should avoid using host float here. It would be better to
1810   // check the float bit values which is what a few other places do.
1811   // We've had bot failures before due to weird NaN support on mips hosts.
1812 
1813   APInt Literal(64, Imm.Val);
1814 
1815   if (Imm.IsFPImm) { // We got fp literal token
1816     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1817       return AMDGPU::isInlinableLiteral64(Imm.Val,
1818                                           AsmParser->hasInv2PiInlineImm());
1819     }
1820 
1821     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1822     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1823       return false;
1824 
1825     if (type.getScalarSizeInBits() == 16) {
1826       return isInlineableLiteralOp16(
1827         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1828         type, AsmParser->hasInv2PiInlineImm());
1829     }
1830 
1831     // Check if single precision literal is inlinable
1832     return AMDGPU::isInlinableLiteral32(
1833       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1834       AsmParser->hasInv2PiInlineImm());
1835   }
1836 
1837   // We got int literal token.
1838   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1839     return AMDGPU::isInlinableLiteral64(Imm.Val,
1840                                         AsmParser->hasInv2PiInlineImm());
1841   }
1842 
1843   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1844     return false;
1845   }
1846 
1847   if (type.getScalarSizeInBits() == 16) {
1848     return isInlineableLiteralOp16(
1849       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1850       type, AsmParser->hasInv2PiInlineImm());
1851   }
1852 
1853   return AMDGPU::isInlinableLiteral32(
1854     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1855     AsmParser->hasInv2PiInlineImm());
1856 }
1857 
1858 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1859   // Check that this immediate can be added as literal
1860   if (!isImmTy(ImmTyNone)) {
1861     return false;
1862   }
1863 
1864   if (!Imm.IsFPImm) {
1865     // We got int literal token.
1866 
1867     if (type == MVT::f64 && hasFPModifiers()) {
1868       // Cannot apply fp modifiers to int literals preserving the same semantics
1869       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1870       // disable these cases.
1871       return false;
1872     }
1873 
1874     unsigned Size = type.getSizeInBits();
1875     if (Size == 64)
1876       Size = 32;
1877 
1878     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1879     // types.
1880     return isSafeTruncation(Imm.Val, Size);
1881   }
1882 
1883   // We got fp literal token
1884   if (type == MVT::f64) { // Expected 64-bit fp operand
1885     // We would set low 64-bits of literal to zeroes but we accept this literals
1886     return true;
1887   }
1888 
1889   if (type == MVT::i64) { // Expected 64-bit int operand
1890     // We don't allow fp literals in 64-bit integer instructions. It is
1891     // unclear how we should encode them.
1892     return false;
1893   }
1894 
1895   // We allow fp literals with f16x2 operands assuming that the specified
1896   // literal goes into the lower half and the upper half is zero. We also
1897   // require that the literal may be losslesly converted to f16.
1898   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1899                      (type == MVT::v2i16)? MVT::i16 :
1900                      (type == MVT::v2f32)? MVT::f32 : type;
1901 
1902   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1903   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1904 }
1905 
1906 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1907   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1908 }
1909 
1910 bool AMDGPUOperand::isVRegWithInputMods() const {
1911   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1912          // GFX90A allows DPP on 64-bit operands.
1913          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1914           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1915 }
1916 
1917 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1918   if (AsmParser->isVI())
1919     return isVReg32();
1920   else if (AsmParser->isGFX9Plus())
1921     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1922   else
1923     return false;
1924 }
1925 
1926 bool AMDGPUOperand::isSDWAFP16Operand() const {
1927   return isSDWAOperand(MVT::f16);
1928 }
1929 
1930 bool AMDGPUOperand::isSDWAFP32Operand() const {
1931   return isSDWAOperand(MVT::f32);
1932 }
1933 
1934 bool AMDGPUOperand::isSDWAInt16Operand() const {
1935   return isSDWAOperand(MVT::i16);
1936 }
1937 
1938 bool AMDGPUOperand::isSDWAInt32Operand() const {
1939   return isSDWAOperand(MVT::i32);
1940 }
1941 
1942 bool AMDGPUOperand::isBoolReg() const {
1943   auto FB = AsmParser->getFeatureBits();
1944   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1945                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1946 }
1947 
1948 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1949 {
1950   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1951   assert(Size == 2 || Size == 4 || Size == 8);
1952 
1953   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1954 
1955   if (Imm.Mods.Abs) {
1956     Val &= ~FpSignMask;
1957   }
1958   if (Imm.Mods.Neg) {
1959     Val ^= FpSignMask;
1960   }
1961 
1962   return Val;
1963 }
1964 
1965 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1966   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1967                              Inst.getNumOperands())) {
1968     addLiteralImmOperand(Inst, Imm.Val,
1969                          ApplyModifiers &
1970                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1971   } else {
1972     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1973     Inst.addOperand(MCOperand::createImm(Imm.Val));
1974     setImmKindNone();
1975   }
1976 }
1977 
1978 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1979   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1980   auto OpNum = Inst.getNumOperands();
1981   // Check that this operand accepts literals
1982   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1983 
1984   if (ApplyModifiers) {
1985     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1986     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1987     Val = applyInputFPModifiers(Val, Size);
1988   }
1989 
1990   APInt Literal(64, Val);
1991   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1992 
1993   if (Imm.IsFPImm) { // We got fp literal token
1994     switch (OpTy) {
1995     case AMDGPU::OPERAND_REG_IMM_INT64:
1996     case AMDGPU::OPERAND_REG_IMM_FP64:
1997     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1998     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1999     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2000       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2001                                        AsmParser->hasInv2PiInlineImm())) {
2002         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2003         setImmKindConst();
2004         return;
2005       }
2006 
2007       // Non-inlineable
2008       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2009         // For fp operands we check if low 32 bits are zeros
2010         if (Literal.getLoBits(32) != 0) {
2011           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2012           "Can't encode literal as exact 64-bit floating-point operand. "
2013           "Low 32-bits will be set to zero");
2014         }
2015 
2016         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2017         setImmKindLiteral();
2018         return;
2019       }
2020 
2021       // We don't allow fp literals in 64-bit integer instructions. It is
2022       // unclear how we should encode them. This case should be checked earlier
2023       // in predicate methods (isLiteralImm())
2024       llvm_unreachable("fp literal in 64-bit integer instruction.");
2025 
2026     case AMDGPU::OPERAND_REG_IMM_INT32:
2027     case AMDGPU::OPERAND_REG_IMM_FP32:
2028     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2029     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2030     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2031     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2032     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2033     case AMDGPU::OPERAND_REG_IMM_INT16:
2034     case AMDGPU::OPERAND_REG_IMM_FP16:
2035     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2036     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2037     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2038     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2039     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2040     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2041     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2042     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2043     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2044     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2045     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2046     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2047     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2048     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2049     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2050     case AMDGPU::OPERAND_KIMM32:
2051     case AMDGPU::OPERAND_KIMM16: {
2052       bool lost;
2053       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2054       // Convert literal to single precision
2055       FPLiteral.convert(*getOpFltSemantics(OpTy),
2056                         APFloat::rmNearestTiesToEven, &lost);
2057       // We allow precision lost but not overflow or underflow. This should be
2058       // checked earlier in isLiteralImm()
2059 
2060       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2061       Inst.addOperand(MCOperand::createImm(ImmVal));
2062       setImmKindLiteral();
2063       return;
2064     }
2065     default:
2066       llvm_unreachable("invalid operand size");
2067     }
2068 
2069     return;
2070   }
2071 
2072   // We got int literal token.
2073   // Only sign extend inline immediates.
2074   switch (OpTy) {
2075   case AMDGPU::OPERAND_REG_IMM_INT32:
2076   case AMDGPU::OPERAND_REG_IMM_FP32:
2077   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2078   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2079   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2080   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2081   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2082   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2083   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2084   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2085   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2086   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2087   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2088     if (isSafeTruncation(Val, 32) &&
2089         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2090                                      AsmParser->hasInv2PiInlineImm())) {
2091       Inst.addOperand(MCOperand::createImm(Val));
2092       setImmKindConst();
2093       return;
2094     }
2095 
2096     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2097     setImmKindLiteral();
2098     return;
2099 
2100   case AMDGPU::OPERAND_REG_IMM_INT64:
2101   case AMDGPU::OPERAND_REG_IMM_FP64:
2102   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2103   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2104   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2105     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2106       Inst.addOperand(MCOperand::createImm(Val));
2107       setImmKindConst();
2108       return;
2109     }
2110 
2111     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2112     setImmKindLiteral();
2113     return;
2114 
2115   case AMDGPU::OPERAND_REG_IMM_INT16:
2116   case AMDGPU::OPERAND_REG_IMM_FP16:
2117   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2118   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2119   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2120   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2121   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2122     if (isSafeTruncation(Val, 16) &&
2123         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2124                                      AsmParser->hasInv2PiInlineImm())) {
2125       Inst.addOperand(MCOperand::createImm(Val));
2126       setImmKindConst();
2127       return;
2128     }
2129 
2130     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2131     setImmKindLiteral();
2132     return;
2133 
2134   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2135   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2136   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2137   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2138     assert(isSafeTruncation(Val, 16));
2139     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2140                                         AsmParser->hasInv2PiInlineImm()));
2141 
2142     Inst.addOperand(MCOperand::createImm(Val));
2143     return;
2144   }
2145   case AMDGPU::OPERAND_KIMM32:
2146     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2147     setImmKindNone();
2148     return;
2149   case AMDGPU::OPERAND_KIMM16:
2150     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2151     setImmKindNone();
2152     return;
2153   default:
2154     llvm_unreachable("invalid operand size");
2155   }
2156 }
2157 
2158 template <unsigned Bitwidth>
2159 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2160   APInt Literal(64, Imm.Val);
2161   setImmKindNone();
2162 
2163   if (!Imm.IsFPImm) {
2164     // We got int literal token.
2165     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2166     return;
2167   }
2168 
2169   bool Lost;
2170   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2171   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2172                     APFloat::rmNearestTiesToEven, &Lost);
2173   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2174 }
2175 
2176 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2177   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2178 }
2179 
2180 static bool isInlineValue(unsigned Reg) {
2181   switch (Reg) {
2182   case AMDGPU::SRC_SHARED_BASE:
2183   case AMDGPU::SRC_SHARED_LIMIT:
2184   case AMDGPU::SRC_PRIVATE_BASE:
2185   case AMDGPU::SRC_PRIVATE_LIMIT:
2186   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2187     return true;
2188   case AMDGPU::SRC_VCCZ:
2189   case AMDGPU::SRC_EXECZ:
2190   case AMDGPU::SRC_SCC:
2191     return true;
2192   case AMDGPU::SGPR_NULL:
2193     return true;
2194   default:
2195     return false;
2196   }
2197 }
2198 
2199 bool AMDGPUOperand::isInlineValue() const {
2200   return isRegKind() && ::isInlineValue(getReg());
2201 }
2202 
2203 //===----------------------------------------------------------------------===//
2204 // AsmParser
2205 //===----------------------------------------------------------------------===//
2206 
2207 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2208   if (Is == IS_VGPR) {
2209     switch (RegWidth) {
2210       default: return -1;
2211       case 1: return AMDGPU::VGPR_32RegClassID;
2212       case 2: return AMDGPU::VReg_64RegClassID;
2213       case 3: return AMDGPU::VReg_96RegClassID;
2214       case 4: return AMDGPU::VReg_128RegClassID;
2215       case 5: return AMDGPU::VReg_160RegClassID;
2216       case 6: return AMDGPU::VReg_192RegClassID;
2217       case 7: return AMDGPU::VReg_224RegClassID;
2218       case 8: return AMDGPU::VReg_256RegClassID;
2219       case 16: return AMDGPU::VReg_512RegClassID;
2220       case 32: return AMDGPU::VReg_1024RegClassID;
2221     }
2222   } else if (Is == IS_TTMP) {
2223     switch (RegWidth) {
2224       default: return -1;
2225       case 1: return AMDGPU::TTMP_32RegClassID;
2226       case 2: return AMDGPU::TTMP_64RegClassID;
2227       case 4: return AMDGPU::TTMP_128RegClassID;
2228       case 8: return AMDGPU::TTMP_256RegClassID;
2229       case 16: return AMDGPU::TTMP_512RegClassID;
2230     }
2231   } else if (Is == IS_SGPR) {
2232     switch (RegWidth) {
2233       default: return -1;
2234       case 1: return AMDGPU::SGPR_32RegClassID;
2235       case 2: return AMDGPU::SGPR_64RegClassID;
2236       case 3: return AMDGPU::SGPR_96RegClassID;
2237       case 4: return AMDGPU::SGPR_128RegClassID;
2238       case 5: return AMDGPU::SGPR_160RegClassID;
2239       case 6: return AMDGPU::SGPR_192RegClassID;
2240       case 7: return AMDGPU::SGPR_224RegClassID;
2241       case 8: return AMDGPU::SGPR_256RegClassID;
2242       case 16: return AMDGPU::SGPR_512RegClassID;
2243     }
2244   } else if (Is == IS_AGPR) {
2245     switch (RegWidth) {
2246       default: return -1;
2247       case 1: return AMDGPU::AGPR_32RegClassID;
2248       case 2: return AMDGPU::AReg_64RegClassID;
2249       case 3: return AMDGPU::AReg_96RegClassID;
2250       case 4: return AMDGPU::AReg_128RegClassID;
2251       case 5: return AMDGPU::AReg_160RegClassID;
2252       case 6: return AMDGPU::AReg_192RegClassID;
2253       case 7: return AMDGPU::AReg_224RegClassID;
2254       case 8: return AMDGPU::AReg_256RegClassID;
2255       case 16: return AMDGPU::AReg_512RegClassID;
2256       case 32: return AMDGPU::AReg_1024RegClassID;
2257     }
2258   }
2259   return -1;
2260 }
2261 
2262 static unsigned getSpecialRegForName(StringRef RegName) {
2263   return StringSwitch<unsigned>(RegName)
2264     .Case("exec", AMDGPU::EXEC)
2265     .Case("vcc", AMDGPU::VCC)
2266     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2267     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2268     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2269     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2270     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2271     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2272     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2273     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2274     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2275     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2276     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2277     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2278     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2279     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2280     .Case("m0", AMDGPU::M0)
2281     .Case("vccz", AMDGPU::SRC_VCCZ)
2282     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2283     .Case("execz", AMDGPU::SRC_EXECZ)
2284     .Case("src_execz", AMDGPU::SRC_EXECZ)
2285     .Case("scc", AMDGPU::SRC_SCC)
2286     .Case("src_scc", AMDGPU::SRC_SCC)
2287     .Case("tba", AMDGPU::TBA)
2288     .Case("tma", AMDGPU::TMA)
2289     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2290     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2291     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2292     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2293     .Case("vcc_lo", AMDGPU::VCC_LO)
2294     .Case("vcc_hi", AMDGPU::VCC_HI)
2295     .Case("exec_lo", AMDGPU::EXEC_LO)
2296     .Case("exec_hi", AMDGPU::EXEC_HI)
2297     .Case("tma_lo", AMDGPU::TMA_LO)
2298     .Case("tma_hi", AMDGPU::TMA_HI)
2299     .Case("tba_lo", AMDGPU::TBA_LO)
2300     .Case("tba_hi", AMDGPU::TBA_HI)
2301     .Case("pc", AMDGPU::PC_REG)
2302     .Case("null", AMDGPU::SGPR_NULL)
2303     .Default(AMDGPU::NoRegister);
2304 }
2305 
2306 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2307                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2308   auto R = parseRegister();
2309   if (!R) return true;
2310   assert(R->isReg());
2311   RegNo = R->getReg();
2312   StartLoc = R->getStartLoc();
2313   EndLoc = R->getEndLoc();
2314   return false;
2315 }
2316 
2317 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2318                                     SMLoc &EndLoc) {
2319   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2320 }
2321 
2322 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2323                                                        SMLoc &StartLoc,
2324                                                        SMLoc &EndLoc) {
2325   bool Result =
2326       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2327   bool PendingErrors = getParser().hasPendingError();
2328   getParser().clearPendingErrors();
2329   if (PendingErrors)
2330     return MatchOperand_ParseFail;
2331   if (Result)
2332     return MatchOperand_NoMatch;
2333   return MatchOperand_Success;
2334 }
2335 
2336 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2337                                             RegisterKind RegKind, unsigned Reg1,
2338                                             SMLoc Loc) {
2339   switch (RegKind) {
2340   case IS_SPECIAL:
2341     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2342       Reg = AMDGPU::EXEC;
2343       RegWidth = 2;
2344       return true;
2345     }
2346     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2347       Reg = AMDGPU::FLAT_SCR;
2348       RegWidth = 2;
2349       return true;
2350     }
2351     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2352       Reg = AMDGPU::XNACK_MASK;
2353       RegWidth = 2;
2354       return true;
2355     }
2356     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2357       Reg = AMDGPU::VCC;
2358       RegWidth = 2;
2359       return true;
2360     }
2361     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2362       Reg = AMDGPU::TBA;
2363       RegWidth = 2;
2364       return true;
2365     }
2366     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2367       Reg = AMDGPU::TMA;
2368       RegWidth = 2;
2369       return true;
2370     }
2371     Error(Loc, "register does not fit in the list");
2372     return false;
2373   case IS_VGPR:
2374   case IS_SGPR:
2375   case IS_AGPR:
2376   case IS_TTMP:
2377     if (Reg1 != Reg + RegWidth) {
2378       Error(Loc, "registers in a list must have consecutive indices");
2379       return false;
2380     }
2381     RegWidth++;
2382     return true;
2383   default:
2384     llvm_unreachable("unexpected register kind");
2385   }
2386 }
2387 
2388 struct RegInfo {
2389   StringLiteral Name;
2390   RegisterKind Kind;
2391 };
2392 
2393 static constexpr RegInfo RegularRegisters[] = {
2394   {{"v"},    IS_VGPR},
2395   {{"s"},    IS_SGPR},
2396   {{"ttmp"}, IS_TTMP},
2397   {{"acc"},  IS_AGPR},
2398   {{"a"},    IS_AGPR},
2399 };
2400 
2401 static bool isRegularReg(RegisterKind Kind) {
2402   return Kind == IS_VGPR ||
2403          Kind == IS_SGPR ||
2404          Kind == IS_TTMP ||
2405          Kind == IS_AGPR;
2406 }
2407 
2408 static const RegInfo* getRegularRegInfo(StringRef Str) {
2409   for (const RegInfo &Reg : RegularRegisters)
2410     if (Str.startswith(Reg.Name))
2411       return &Reg;
2412   return nullptr;
2413 }
2414 
2415 static bool getRegNum(StringRef Str, unsigned& Num) {
2416   return !Str.getAsInteger(10, Num);
2417 }
2418 
2419 bool
2420 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2421                             const AsmToken &NextToken) const {
2422 
2423   // A list of consecutive registers: [s0,s1,s2,s3]
2424   if (Token.is(AsmToken::LBrac))
2425     return true;
2426 
2427   if (!Token.is(AsmToken::Identifier))
2428     return false;
2429 
2430   // A single register like s0 or a range of registers like s[0:1]
2431 
2432   StringRef Str = Token.getString();
2433   const RegInfo *Reg = getRegularRegInfo(Str);
2434   if (Reg) {
2435     StringRef RegName = Reg->Name;
2436     StringRef RegSuffix = Str.substr(RegName.size());
2437     if (!RegSuffix.empty()) {
2438       unsigned Num;
2439       // A single register with an index: rXX
2440       if (getRegNum(RegSuffix, Num))
2441         return true;
2442     } else {
2443       // A range of registers: r[XX:YY].
2444       if (NextToken.is(AsmToken::LBrac))
2445         return true;
2446     }
2447   }
2448 
2449   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2450 }
2451 
2452 bool
2453 AMDGPUAsmParser::isRegister()
2454 {
2455   return isRegister(getToken(), peekToken());
2456 }
2457 
2458 unsigned
2459 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2460                                unsigned RegNum,
2461                                unsigned RegWidth,
2462                                SMLoc Loc) {
2463 
2464   assert(isRegularReg(RegKind));
2465 
2466   unsigned AlignSize = 1;
2467   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2468     // SGPR and TTMP registers must be aligned.
2469     // Max required alignment is 4 dwords.
2470     AlignSize = std::min(RegWidth, 4u);
2471   }
2472 
2473   if (RegNum % AlignSize != 0) {
2474     Error(Loc, "invalid register alignment");
2475     return AMDGPU::NoRegister;
2476   }
2477 
2478   unsigned RegIdx = RegNum / AlignSize;
2479   int RCID = getRegClass(RegKind, RegWidth);
2480   if (RCID == -1) {
2481     Error(Loc, "invalid or unsupported register size");
2482     return AMDGPU::NoRegister;
2483   }
2484 
2485   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2486   const MCRegisterClass RC = TRI->getRegClass(RCID);
2487   if (RegIdx >= RC.getNumRegs()) {
2488     Error(Loc, "register index is out of range");
2489     return AMDGPU::NoRegister;
2490   }
2491 
2492   return RC.getRegister(RegIdx);
2493 }
2494 
2495 bool
2496 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2497   int64_t RegLo, RegHi;
2498   if (!skipToken(AsmToken::LBrac, "missing register index"))
2499     return false;
2500 
2501   SMLoc FirstIdxLoc = getLoc();
2502   SMLoc SecondIdxLoc;
2503 
2504   if (!parseExpr(RegLo))
2505     return false;
2506 
2507   if (trySkipToken(AsmToken::Colon)) {
2508     SecondIdxLoc = getLoc();
2509     if (!parseExpr(RegHi))
2510       return false;
2511   } else {
2512     RegHi = RegLo;
2513   }
2514 
2515   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2516     return false;
2517 
2518   if (!isUInt<32>(RegLo)) {
2519     Error(FirstIdxLoc, "invalid register index");
2520     return false;
2521   }
2522 
2523   if (!isUInt<32>(RegHi)) {
2524     Error(SecondIdxLoc, "invalid register index");
2525     return false;
2526   }
2527 
2528   if (RegLo > RegHi) {
2529     Error(FirstIdxLoc, "first register index should not exceed second index");
2530     return false;
2531   }
2532 
2533   Num = static_cast<unsigned>(RegLo);
2534   Width = (RegHi - RegLo) + 1;
2535   return true;
2536 }
2537 
2538 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2539                                           unsigned &RegNum, unsigned &RegWidth,
2540                                           SmallVectorImpl<AsmToken> &Tokens) {
2541   assert(isToken(AsmToken::Identifier));
2542   unsigned Reg = getSpecialRegForName(getTokenStr());
2543   if (Reg) {
2544     RegNum = 0;
2545     RegWidth = 1;
2546     RegKind = IS_SPECIAL;
2547     Tokens.push_back(getToken());
2548     lex(); // skip register name
2549   }
2550   return Reg;
2551 }
2552 
2553 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2554                                           unsigned &RegNum, unsigned &RegWidth,
2555                                           SmallVectorImpl<AsmToken> &Tokens) {
2556   assert(isToken(AsmToken::Identifier));
2557   StringRef RegName = getTokenStr();
2558   auto Loc = getLoc();
2559 
2560   const RegInfo *RI = getRegularRegInfo(RegName);
2561   if (!RI) {
2562     Error(Loc, "invalid register name");
2563     return AMDGPU::NoRegister;
2564   }
2565 
2566   Tokens.push_back(getToken());
2567   lex(); // skip register name
2568 
2569   RegKind = RI->Kind;
2570   StringRef RegSuffix = RegName.substr(RI->Name.size());
2571   if (!RegSuffix.empty()) {
2572     // Single 32-bit register: vXX.
2573     if (!getRegNum(RegSuffix, RegNum)) {
2574       Error(Loc, "invalid register index");
2575       return AMDGPU::NoRegister;
2576     }
2577     RegWidth = 1;
2578   } else {
2579     // Range of registers: v[XX:YY]. ":YY" is optional.
2580     if (!ParseRegRange(RegNum, RegWidth))
2581       return AMDGPU::NoRegister;
2582   }
2583 
2584   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2585 }
2586 
2587 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2588                                        unsigned &RegWidth,
2589                                        SmallVectorImpl<AsmToken> &Tokens) {
2590   unsigned Reg = AMDGPU::NoRegister;
2591   auto ListLoc = getLoc();
2592 
2593   if (!skipToken(AsmToken::LBrac,
2594                  "expected a register or a list of registers")) {
2595     return AMDGPU::NoRegister;
2596   }
2597 
2598   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2599 
2600   auto Loc = getLoc();
2601   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2602     return AMDGPU::NoRegister;
2603   if (RegWidth != 1) {
2604     Error(Loc, "expected a single 32-bit register");
2605     return AMDGPU::NoRegister;
2606   }
2607 
2608   for (; trySkipToken(AsmToken::Comma); ) {
2609     RegisterKind NextRegKind;
2610     unsigned NextReg, NextRegNum, NextRegWidth;
2611     Loc = getLoc();
2612 
2613     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2614                              NextRegNum, NextRegWidth,
2615                              Tokens)) {
2616       return AMDGPU::NoRegister;
2617     }
2618     if (NextRegWidth != 1) {
2619       Error(Loc, "expected a single 32-bit register");
2620       return AMDGPU::NoRegister;
2621     }
2622     if (NextRegKind != RegKind) {
2623       Error(Loc, "registers in a list must be of the same kind");
2624       return AMDGPU::NoRegister;
2625     }
2626     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2627       return AMDGPU::NoRegister;
2628   }
2629 
2630   if (!skipToken(AsmToken::RBrac,
2631                  "expected a comma or a closing square bracket")) {
2632     return AMDGPU::NoRegister;
2633   }
2634 
2635   if (isRegularReg(RegKind))
2636     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2637 
2638   return Reg;
2639 }
2640 
2641 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2642                                           unsigned &RegNum, unsigned &RegWidth,
2643                                           SmallVectorImpl<AsmToken> &Tokens) {
2644   auto Loc = getLoc();
2645   Reg = AMDGPU::NoRegister;
2646 
2647   if (isToken(AsmToken::Identifier)) {
2648     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2649     if (Reg == AMDGPU::NoRegister)
2650       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2651   } else {
2652     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2653   }
2654 
2655   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2656   if (Reg == AMDGPU::NoRegister) {
2657     assert(Parser.hasPendingError());
2658     return false;
2659   }
2660 
2661   if (!subtargetHasRegister(*TRI, Reg)) {
2662     if (Reg == AMDGPU::SGPR_NULL) {
2663       Error(Loc, "'null' operand is not supported on this GPU");
2664     } else {
2665       Error(Loc, "register not available on this GPU");
2666     }
2667     return false;
2668   }
2669 
2670   return true;
2671 }
2672 
2673 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2674                                           unsigned &RegNum, unsigned &RegWidth,
2675                                           bool RestoreOnFailure /*=false*/) {
2676   Reg = AMDGPU::NoRegister;
2677 
2678   SmallVector<AsmToken, 1> Tokens;
2679   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2680     if (RestoreOnFailure) {
2681       while (!Tokens.empty()) {
2682         getLexer().UnLex(Tokens.pop_back_val());
2683       }
2684     }
2685     return true;
2686   }
2687   return false;
2688 }
2689 
2690 Optional<StringRef>
2691 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2692   switch (RegKind) {
2693   case IS_VGPR:
2694     return StringRef(".amdgcn.next_free_vgpr");
2695   case IS_SGPR:
2696     return StringRef(".amdgcn.next_free_sgpr");
2697   default:
2698     return None;
2699   }
2700 }
2701 
2702 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2703   auto SymbolName = getGprCountSymbolName(RegKind);
2704   assert(SymbolName && "initializing invalid register kind");
2705   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2706   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2707 }
2708 
2709 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2710                                             unsigned DwordRegIndex,
2711                                             unsigned RegWidth) {
2712   // Symbols are only defined for GCN targets
2713   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2714     return true;
2715 
2716   auto SymbolName = getGprCountSymbolName(RegKind);
2717   if (!SymbolName)
2718     return true;
2719   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2720 
2721   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2722   int64_t OldCount;
2723 
2724   if (!Sym->isVariable())
2725     return !Error(getLoc(),
2726                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2727   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2728     return !Error(
2729         getLoc(),
2730         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2731 
2732   if (OldCount <= NewMax)
2733     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2734 
2735   return true;
2736 }
2737 
2738 std::unique_ptr<AMDGPUOperand>
2739 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2740   const auto &Tok = getToken();
2741   SMLoc StartLoc = Tok.getLoc();
2742   SMLoc EndLoc = Tok.getEndLoc();
2743   RegisterKind RegKind;
2744   unsigned Reg, RegNum, RegWidth;
2745 
2746   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2747     return nullptr;
2748   }
2749   if (isHsaAbiVersion3Or4(&getSTI())) {
2750     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2751       return nullptr;
2752   } else
2753     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2754   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2755 }
2756 
2757 OperandMatchResultTy
2758 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2759   // TODO: add syntactic sugar for 1/(2*PI)
2760 
2761   assert(!isRegister());
2762   assert(!isModifier());
2763 
2764   const auto& Tok = getToken();
2765   const auto& NextTok = peekToken();
2766   bool IsReal = Tok.is(AsmToken::Real);
2767   SMLoc S = getLoc();
2768   bool Negate = false;
2769 
2770   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2771     lex();
2772     IsReal = true;
2773     Negate = true;
2774   }
2775 
2776   if (IsReal) {
2777     // Floating-point expressions are not supported.
2778     // Can only allow floating-point literals with an
2779     // optional sign.
2780 
2781     StringRef Num = getTokenStr();
2782     lex();
2783 
2784     APFloat RealVal(APFloat::IEEEdouble());
2785     auto roundMode = APFloat::rmNearestTiesToEven;
2786     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2787       return MatchOperand_ParseFail;
2788     }
2789     if (Negate)
2790       RealVal.changeSign();
2791 
2792     Operands.push_back(
2793       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2794                                AMDGPUOperand::ImmTyNone, true));
2795 
2796     return MatchOperand_Success;
2797 
2798   } else {
2799     int64_t IntVal;
2800     const MCExpr *Expr;
2801     SMLoc S = getLoc();
2802 
2803     if (HasSP3AbsModifier) {
2804       // This is a workaround for handling expressions
2805       // as arguments of SP3 'abs' modifier, for example:
2806       //     |1.0|
2807       //     |-1|
2808       //     |1+x|
2809       // This syntax is not compatible with syntax of standard
2810       // MC expressions (due to the trailing '|').
2811       SMLoc EndLoc;
2812       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2813         return MatchOperand_ParseFail;
2814     } else {
2815       if (Parser.parseExpression(Expr))
2816         return MatchOperand_ParseFail;
2817     }
2818 
2819     if (Expr->evaluateAsAbsolute(IntVal)) {
2820       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2821     } else {
2822       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2823     }
2824 
2825     return MatchOperand_Success;
2826   }
2827 
2828   return MatchOperand_NoMatch;
2829 }
2830 
2831 OperandMatchResultTy
2832 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2833   if (!isRegister())
2834     return MatchOperand_NoMatch;
2835 
2836   if (auto R = parseRegister()) {
2837     assert(R->isReg());
2838     Operands.push_back(std::move(R));
2839     return MatchOperand_Success;
2840   }
2841   return MatchOperand_ParseFail;
2842 }
2843 
2844 OperandMatchResultTy
2845 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2846   auto res = parseReg(Operands);
2847   if (res != MatchOperand_NoMatch) {
2848     return res;
2849   } else if (isModifier()) {
2850     return MatchOperand_NoMatch;
2851   } else {
2852     return parseImm(Operands, HasSP3AbsMod);
2853   }
2854 }
2855 
2856 bool
2857 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2858   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2859     const auto &str = Token.getString();
2860     return str == "abs" || str == "neg" || str == "sext";
2861   }
2862   return false;
2863 }
2864 
2865 bool
2866 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2867   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2868 }
2869 
2870 bool
2871 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2872   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2873 }
2874 
2875 bool
2876 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2877   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2878 }
2879 
2880 // Check if this is an operand modifier or an opcode modifier
2881 // which may look like an expression but it is not. We should
2882 // avoid parsing these modifiers as expressions. Currently
2883 // recognized sequences are:
2884 //   |...|
2885 //   abs(...)
2886 //   neg(...)
2887 //   sext(...)
2888 //   -reg
2889 //   -|...|
2890 //   -abs(...)
2891 //   name:...
2892 // Note that simple opcode modifiers like 'gds' may be parsed as
2893 // expressions; this is a special case. See getExpressionAsToken.
2894 //
2895 bool
2896 AMDGPUAsmParser::isModifier() {
2897 
2898   AsmToken Tok = getToken();
2899   AsmToken NextToken[2];
2900   peekTokens(NextToken);
2901 
2902   return isOperandModifier(Tok, NextToken[0]) ||
2903          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2904          isOpcodeModifierWithVal(Tok, NextToken[0]);
2905 }
2906 
2907 // Check if the current token is an SP3 'neg' modifier.
2908 // Currently this modifier is allowed in the following context:
2909 //
2910 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2911 // 2. Before an 'abs' modifier: -abs(...)
2912 // 3. Before an SP3 'abs' modifier: -|...|
2913 //
2914 // In all other cases "-" is handled as a part
2915 // of an expression that follows the sign.
2916 //
2917 // Note: When "-" is followed by an integer literal,
2918 // this is interpreted as integer negation rather
2919 // than a floating-point NEG modifier applied to N.
2920 // Beside being contr-intuitive, such use of floating-point
2921 // NEG modifier would have resulted in different meaning
2922 // of integer literals used with VOP1/2/C and VOP3,
2923 // for example:
2924 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2925 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2926 // Negative fp literals with preceding "-" are
2927 // handled likewise for unifomtity
2928 //
2929 bool
2930 AMDGPUAsmParser::parseSP3NegModifier() {
2931 
2932   AsmToken NextToken[2];
2933   peekTokens(NextToken);
2934 
2935   if (isToken(AsmToken::Minus) &&
2936       (isRegister(NextToken[0], NextToken[1]) ||
2937        NextToken[0].is(AsmToken::Pipe) ||
2938        isId(NextToken[0], "abs"))) {
2939     lex();
2940     return true;
2941   }
2942 
2943   return false;
2944 }
2945 
2946 OperandMatchResultTy
2947 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2948                                               bool AllowImm) {
2949   bool Neg, SP3Neg;
2950   bool Abs, SP3Abs;
2951   SMLoc Loc;
2952 
2953   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2954   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2955     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2956     return MatchOperand_ParseFail;
2957   }
2958 
2959   SP3Neg = parseSP3NegModifier();
2960 
2961   Loc = getLoc();
2962   Neg = trySkipId("neg");
2963   if (Neg && SP3Neg) {
2964     Error(Loc, "expected register or immediate");
2965     return MatchOperand_ParseFail;
2966   }
2967   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2968     return MatchOperand_ParseFail;
2969 
2970   Abs = trySkipId("abs");
2971   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2972     return MatchOperand_ParseFail;
2973 
2974   Loc = getLoc();
2975   SP3Abs = trySkipToken(AsmToken::Pipe);
2976   if (Abs && SP3Abs) {
2977     Error(Loc, "expected register or immediate");
2978     return MatchOperand_ParseFail;
2979   }
2980 
2981   OperandMatchResultTy Res;
2982   if (AllowImm) {
2983     Res = parseRegOrImm(Operands, SP3Abs);
2984   } else {
2985     Res = parseReg(Operands);
2986   }
2987   if (Res != MatchOperand_Success) {
2988     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2989   }
2990 
2991   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2992     return MatchOperand_ParseFail;
2993   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2994     return MatchOperand_ParseFail;
2995   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2996     return MatchOperand_ParseFail;
2997 
2998   AMDGPUOperand::Modifiers Mods;
2999   Mods.Abs = Abs || SP3Abs;
3000   Mods.Neg = Neg || SP3Neg;
3001 
3002   if (Mods.hasFPModifiers()) {
3003     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3004     if (Op.isExpr()) {
3005       Error(Op.getStartLoc(), "expected an absolute expression");
3006       return MatchOperand_ParseFail;
3007     }
3008     Op.setModifiers(Mods);
3009   }
3010   return MatchOperand_Success;
3011 }
3012 
3013 OperandMatchResultTy
3014 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3015                                                bool AllowImm) {
3016   bool Sext = trySkipId("sext");
3017   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3018     return MatchOperand_ParseFail;
3019 
3020   OperandMatchResultTy Res;
3021   if (AllowImm) {
3022     Res = parseRegOrImm(Operands);
3023   } else {
3024     Res = parseReg(Operands);
3025   }
3026   if (Res != MatchOperand_Success) {
3027     return Sext? MatchOperand_ParseFail : Res;
3028   }
3029 
3030   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3031     return MatchOperand_ParseFail;
3032 
3033   AMDGPUOperand::Modifiers Mods;
3034   Mods.Sext = Sext;
3035 
3036   if (Mods.hasIntModifiers()) {
3037     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3038     if (Op.isExpr()) {
3039       Error(Op.getStartLoc(), "expected an absolute expression");
3040       return MatchOperand_ParseFail;
3041     }
3042     Op.setModifiers(Mods);
3043   }
3044 
3045   return MatchOperand_Success;
3046 }
3047 
3048 OperandMatchResultTy
3049 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3050   return parseRegOrImmWithFPInputMods(Operands, false);
3051 }
3052 
3053 OperandMatchResultTy
3054 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3055   return parseRegOrImmWithIntInputMods(Operands, false);
3056 }
3057 
3058 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3059   auto Loc = getLoc();
3060   if (trySkipId("off")) {
3061     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3062                                                 AMDGPUOperand::ImmTyOff, false));
3063     return MatchOperand_Success;
3064   }
3065 
3066   if (!isRegister())
3067     return MatchOperand_NoMatch;
3068 
3069   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3070   if (Reg) {
3071     Operands.push_back(std::move(Reg));
3072     return MatchOperand_Success;
3073   }
3074 
3075   return MatchOperand_ParseFail;
3076 
3077 }
3078 
3079 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3080   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3081 
3082   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3083       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3084       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3085       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3086     return Match_InvalidOperand;
3087 
3088   if ((TSFlags & SIInstrFlags::VOP3) &&
3089       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3090       getForcedEncodingSize() != 64)
3091     return Match_PreferE32;
3092 
3093   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3094       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3095     // v_mac_f32/16 allow only dst_sel == DWORD;
3096     auto OpNum =
3097         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3098     const auto &Op = Inst.getOperand(OpNum);
3099     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3100       return Match_InvalidOperand;
3101     }
3102   }
3103 
3104   return Match_Success;
3105 }
3106 
3107 static ArrayRef<unsigned> getAllVariants() {
3108   static const unsigned Variants[] = {
3109     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3110     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3111   };
3112 
3113   return makeArrayRef(Variants);
3114 }
3115 
3116 // What asm variants we should check
3117 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3118   if (getForcedEncodingSize() == 32) {
3119     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3120     return makeArrayRef(Variants);
3121   }
3122 
3123   if (isForcedVOP3()) {
3124     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3125     return makeArrayRef(Variants);
3126   }
3127 
3128   if (isForcedSDWA()) {
3129     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3130                                         AMDGPUAsmVariants::SDWA9};
3131     return makeArrayRef(Variants);
3132   }
3133 
3134   if (isForcedDPP()) {
3135     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3136     return makeArrayRef(Variants);
3137   }
3138 
3139   return getAllVariants();
3140 }
3141 
3142 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3143   if (getForcedEncodingSize() == 32)
3144     return "e32";
3145 
3146   if (isForcedVOP3())
3147     return "e64";
3148 
3149   if (isForcedSDWA())
3150     return "sdwa";
3151 
3152   if (isForcedDPP())
3153     return "dpp";
3154 
3155   return "";
3156 }
3157 
3158 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3159   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3160   const unsigned Num = Desc.getNumImplicitUses();
3161   for (unsigned i = 0; i < Num; ++i) {
3162     unsigned Reg = Desc.ImplicitUses[i];
3163     switch (Reg) {
3164     case AMDGPU::FLAT_SCR:
3165     case AMDGPU::VCC:
3166     case AMDGPU::VCC_LO:
3167     case AMDGPU::VCC_HI:
3168     case AMDGPU::M0:
3169       return Reg;
3170     default:
3171       break;
3172     }
3173   }
3174   return AMDGPU::NoRegister;
3175 }
3176 
3177 // NB: This code is correct only when used to check constant
3178 // bus limitations because GFX7 support no f16 inline constants.
3179 // Note that there are no cases when a GFX7 opcode violates
3180 // constant bus limitations due to the use of an f16 constant.
3181 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3182                                        unsigned OpIdx) const {
3183   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3184 
3185   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3186     return false;
3187   }
3188 
3189   const MCOperand &MO = Inst.getOperand(OpIdx);
3190 
3191   int64_t Val = MO.getImm();
3192   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3193 
3194   switch (OpSize) { // expected operand size
3195   case 8:
3196     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3197   case 4:
3198     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3199   case 2: {
3200     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3201     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3202         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3203         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3204       return AMDGPU::isInlinableIntLiteral(Val);
3205 
3206     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3207         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3208         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3209       return AMDGPU::isInlinableIntLiteralV216(Val);
3210 
3211     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3212         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3213         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3214       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3215 
3216     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3217   }
3218   default:
3219     llvm_unreachable("invalid operand size");
3220   }
3221 }
3222 
3223 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3224   if (!isGFX10Plus())
3225     return 1;
3226 
3227   switch (Opcode) {
3228   // 64-bit shift instructions can use only one scalar value input
3229   case AMDGPU::V_LSHLREV_B64_e64:
3230   case AMDGPU::V_LSHLREV_B64_gfx10:
3231   case AMDGPU::V_LSHRREV_B64_e64:
3232   case AMDGPU::V_LSHRREV_B64_gfx10:
3233   case AMDGPU::V_ASHRREV_I64_e64:
3234   case AMDGPU::V_ASHRREV_I64_gfx10:
3235   case AMDGPU::V_LSHL_B64_e64:
3236   case AMDGPU::V_LSHR_B64_e64:
3237   case AMDGPU::V_ASHR_I64_e64:
3238     return 1;
3239   default:
3240     return 2;
3241   }
3242 }
3243 
3244 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3245   const MCOperand &MO = Inst.getOperand(OpIdx);
3246   if (MO.isImm()) {
3247     return !isInlineConstant(Inst, OpIdx);
3248   } else if (MO.isReg()) {
3249     auto Reg = MO.getReg();
3250     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3251     auto PReg = mc2PseudoReg(Reg);
3252     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3253   } else {
3254     return true;
3255   }
3256 }
3257 
3258 bool
3259 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3260                                                 const OperandVector &Operands) {
3261   const unsigned Opcode = Inst.getOpcode();
3262   const MCInstrDesc &Desc = MII.get(Opcode);
3263   unsigned LastSGPR = AMDGPU::NoRegister;
3264   unsigned ConstantBusUseCount = 0;
3265   unsigned NumLiterals = 0;
3266   unsigned LiteralSize;
3267 
3268   if (Desc.TSFlags &
3269       (SIInstrFlags::VOPC |
3270        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3271        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3272        SIInstrFlags::SDWA)) {
3273     // Check special imm operands (used by madmk, etc)
3274     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3275       ++NumLiterals;
3276       LiteralSize = 4;
3277     }
3278 
3279     SmallDenseSet<unsigned> SGPRsUsed;
3280     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3281     if (SGPRUsed != AMDGPU::NoRegister) {
3282       SGPRsUsed.insert(SGPRUsed);
3283       ++ConstantBusUseCount;
3284     }
3285 
3286     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3287     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3288     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3289 
3290     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3291 
3292     for (int OpIdx : OpIndices) {
3293       if (OpIdx == -1) break;
3294 
3295       const MCOperand &MO = Inst.getOperand(OpIdx);
3296       if (usesConstantBus(Inst, OpIdx)) {
3297         if (MO.isReg()) {
3298           LastSGPR = mc2PseudoReg(MO.getReg());
3299           // Pairs of registers with a partial intersections like these
3300           //   s0, s[0:1]
3301           //   flat_scratch_lo, flat_scratch
3302           //   flat_scratch_lo, flat_scratch_hi
3303           // are theoretically valid but they are disabled anyway.
3304           // Note that this code mimics SIInstrInfo::verifyInstruction
3305           if (!SGPRsUsed.count(LastSGPR)) {
3306             SGPRsUsed.insert(LastSGPR);
3307             ++ConstantBusUseCount;
3308           }
3309         } else { // Expression or a literal
3310 
3311           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3312             continue; // special operand like VINTERP attr_chan
3313 
3314           // An instruction may use only one literal.
3315           // This has been validated on the previous step.
3316           // See validateVOPLiteral.
3317           // This literal may be used as more than one operand.
3318           // If all these operands are of the same size,
3319           // this literal counts as one scalar value.
3320           // Otherwise it counts as 2 scalar values.
3321           // See "GFX10 Shader Programming", section 3.6.2.3.
3322 
3323           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3324           if (Size < 4) Size = 4;
3325 
3326           if (NumLiterals == 0) {
3327             NumLiterals = 1;
3328             LiteralSize = Size;
3329           } else if (LiteralSize != Size) {
3330             NumLiterals = 2;
3331           }
3332         }
3333       }
3334     }
3335   }
3336   ConstantBusUseCount += NumLiterals;
3337 
3338   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3339     return true;
3340 
3341   SMLoc LitLoc = getLitLoc(Operands);
3342   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3343   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3344   Error(Loc, "invalid operand (violates constant bus restrictions)");
3345   return false;
3346 }
3347 
3348 bool
3349 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3350                                                  const OperandVector &Operands) {
3351   const unsigned Opcode = Inst.getOpcode();
3352   const MCInstrDesc &Desc = MII.get(Opcode);
3353 
3354   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3355   if (DstIdx == -1 ||
3356       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3357     return true;
3358   }
3359 
3360   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3361 
3362   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3363   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3364   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3365 
3366   assert(DstIdx != -1);
3367   const MCOperand &Dst = Inst.getOperand(DstIdx);
3368   assert(Dst.isReg());
3369   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3370 
3371   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3372 
3373   for (int SrcIdx : SrcIndices) {
3374     if (SrcIdx == -1) break;
3375     const MCOperand &Src = Inst.getOperand(SrcIdx);
3376     if (Src.isReg()) {
3377       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3378       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3379         Error(getRegLoc(SrcReg, Operands),
3380           "destination must be different than all sources");
3381         return false;
3382       }
3383     }
3384   }
3385 
3386   return true;
3387 }
3388 
3389 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3390 
3391   const unsigned Opc = Inst.getOpcode();
3392   const MCInstrDesc &Desc = MII.get(Opc);
3393 
3394   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3395     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3396     assert(ClampIdx != -1);
3397     return Inst.getOperand(ClampIdx).getImm() == 0;
3398   }
3399 
3400   return true;
3401 }
3402 
3403 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3404 
3405   const unsigned Opc = Inst.getOpcode();
3406   const MCInstrDesc &Desc = MII.get(Opc);
3407 
3408   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3409     return true;
3410 
3411   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3412   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3413   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3414 
3415   assert(VDataIdx != -1);
3416 
3417   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3418     return true;
3419 
3420   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3421   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3422   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3423   if (DMask == 0)
3424     DMask = 1;
3425 
3426   unsigned DataSize =
3427     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3428   if (hasPackedD16()) {
3429     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3430     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3431       DataSize = (DataSize + 1) / 2;
3432   }
3433 
3434   return (VDataSize / 4) == DataSize + TFESize;
3435 }
3436 
3437 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3438   const unsigned Opc = Inst.getOpcode();
3439   const MCInstrDesc &Desc = MII.get(Opc);
3440 
3441   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3442     return true;
3443 
3444   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3445 
3446   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3447       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3448   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3449   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3450   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3451   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3452 
3453   assert(VAddr0Idx != -1);
3454   assert(SrsrcIdx != -1);
3455   assert(SrsrcIdx > VAddr0Idx);
3456 
3457   if (DimIdx == -1)
3458     return true; // intersect_ray
3459 
3460   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3461   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3462   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3463   unsigned ActualAddrSize =
3464       IsNSA ? SrsrcIdx - VAddr0Idx
3465             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3466   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3467 
3468   unsigned ExpectedAddrSize =
3469       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3470 
3471   if (!IsNSA) {
3472     if (ExpectedAddrSize > 8)
3473       ExpectedAddrSize = 16;
3474 
3475     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3476     // This provides backward compatibility for assembly created
3477     // before 160b/192b/224b types were directly supported.
3478     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3479       return true;
3480   }
3481 
3482   return ActualAddrSize == ExpectedAddrSize;
3483 }
3484 
3485 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3486 
3487   const unsigned Opc = Inst.getOpcode();
3488   const MCInstrDesc &Desc = MII.get(Opc);
3489 
3490   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3491     return true;
3492   if (!Desc.mayLoad() || !Desc.mayStore())
3493     return true; // Not atomic
3494 
3495   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3496   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3497 
3498   // This is an incomplete check because image_atomic_cmpswap
3499   // may only use 0x3 and 0xf while other atomic operations
3500   // may use 0x1 and 0x3. However these limitations are
3501   // verified when we check that dmask matches dst size.
3502   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3503 }
3504 
3505 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3506 
3507   const unsigned Opc = Inst.getOpcode();
3508   const MCInstrDesc &Desc = MII.get(Opc);
3509 
3510   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3511     return true;
3512 
3513   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3514   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3515 
3516   // GATHER4 instructions use dmask in a different fashion compared to
3517   // other MIMG instructions. The only useful DMASK values are
3518   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3519   // (red,red,red,red) etc.) The ISA document doesn't mention
3520   // this.
3521   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3522 }
3523 
3524 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3525   const unsigned Opc = Inst.getOpcode();
3526   const MCInstrDesc &Desc = MII.get(Opc);
3527 
3528   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3529     return true;
3530 
3531   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3532   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3533       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3534 
3535   if (!BaseOpcode->MSAA)
3536     return true;
3537 
3538   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3539   assert(DimIdx != -1);
3540 
3541   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3542   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3543 
3544   return DimInfo->MSAA;
3545 }
3546 
3547 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3548 {
3549   switch (Opcode) {
3550   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3551   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3552   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3553     return true;
3554   default:
3555     return false;
3556   }
3557 }
3558 
3559 // movrels* opcodes should only allow VGPRS as src0.
3560 // This is specified in .td description for vop1/vop3,
3561 // but sdwa is handled differently. See isSDWAOperand.
3562 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3563                                       const OperandVector &Operands) {
3564 
3565   const unsigned Opc = Inst.getOpcode();
3566   const MCInstrDesc &Desc = MII.get(Opc);
3567 
3568   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3569     return true;
3570 
3571   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3572   assert(Src0Idx != -1);
3573 
3574   SMLoc ErrLoc;
3575   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3576   if (Src0.isReg()) {
3577     auto Reg = mc2PseudoReg(Src0.getReg());
3578     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3579     if (!isSGPR(Reg, TRI))
3580       return true;
3581     ErrLoc = getRegLoc(Reg, Operands);
3582   } else {
3583     ErrLoc = getConstLoc(Operands);
3584   }
3585 
3586   Error(ErrLoc, "source operand must be a VGPR");
3587   return false;
3588 }
3589 
3590 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3591                                           const OperandVector &Operands) {
3592 
3593   const unsigned Opc = Inst.getOpcode();
3594 
3595   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3596     return true;
3597 
3598   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3599   assert(Src0Idx != -1);
3600 
3601   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3602   if (!Src0.isReg())
3603     return true;
3604 
3605   auto Reg = mc2PseudoReg(Src0.getReg());
3606   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3607   if (isSGPR(Reg, TRI)) {
3608     Error(getRegLoc(Reg, Operands),
3609           "source operand must be either a VGPR or an inline constant");
3610     return false;
3611   }
3612 
3613   return true;
3614 }
3615 
3616 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3617   switch (Inst.getOpcode()) {
3618   default:
3619     return true;
3620   case V_DIV_SCALE_F32_gfx6_gfx7:
3621   case V_DIV_SCALE_F32_vi:
3622   case V_DIV_SCALE_F32_gfx10:
3623   case V_DIV_SCALE_F64_gfx6_gfx7:
3624   case V_DIV_SCALE_F64_vi:
3625   case V_DIV_SCALE_F64_gfx10:
3626     break;
3627   }
3628 
3629   // TODO: Check that src0 = src1 or src2.
3630 
3631   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3632                     AMDGPU::OpName::src2_modifiers,
3633                     AMDGPU::OpName::src2_modifiers}) {
3634     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3635             .getImm() &
3636         SISrcMods::ABS) {
3637       return false;
3638     }
3639   }
3640 
3641   return true;
3642 }
3643 
3644 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3645 
3646   const unsigned Opc = Inst.getOpcode();
3647   const MCInstrDesc &Desc = MII.get(Opc);
3648 
3649   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3650     return true;
3651 
3652   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3653   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3654     if (isCI() || isSI())
3655       return false;
3656   }
3657 
3658   return true;
3659 }
3660 
3661 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3662   const unsigned Opc = Inst.getOpcode();
3663   const MCInstrDesc &Desc = MII.get(Opc);
3664 
3665   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3666     return true;
3667 
3668   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3669   if (DimIdx < 0)
3670     return true;
3671 
3672   long Imm = Inst.getOperand(DimIdx).getImm();
3673   if (Imm < 0 || Imm >= 8)
3674     return false;
3675 
3676   return true;
3677 }
3678 
3679 static bool IsRevOpcode(const unsigned Opcode)
3680 {
3681   switch (Opcode) {
3682   case AMDGPU::V_SUBREV_F32_e32:
3683   case AMDGPU::V_SUBREV_F32_e64:
3684   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3685   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3686   case AMDGPU::V_SUBREV_F32_e32_vi:
3687   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3688   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3689   case AMDGPU::V_SUBREV_F32_e64_vi:
3690 
3691   case AMDGPU::V_SUBREV_CO_U32_e32:
3692   case AMDGPU::V_SUBREV_CO_U32_e64:
3693   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3694   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3695 
3696   case AMDGPU::V_SUBBREV_U32_e32:
3697   case AMDGPU::V_SUBBREV_U32_e64:
3698   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3699   case AMDGPU::V_SUBBREV_U32_e32_vi:
3700   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3701   case AMDGPU::V_SUBBREV_U32_e64_vi:
3702 
3703   case AMDGPU::V_SUBREV_U32_e32:
3704   case AMDGPU::V_SUBREV_U32_e64:
3705   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3706   case AMDGPU::V_SUBREV_U32_e32_vi:
3707   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3708   case AMDGPU::V_SUBREV_U32_e64_vi:
3709 
3710   case AMDGPU::V_SUBREV_F16_e32:
3711   case AMDGPU::V_SUBREV_F16_e64:
3712   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3713   case AMDGPU::V_SUBREV_F16_e32_vi:
3714   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3715   case AMDGPU::V_SUBREV_F16_e64_vi:
3716 
3717   case AMDGPU::V_SUBREV_U16_e32:
3718   case AMDGPU::V_SUBREV_U16_e64:
3719   case AMDGPU::V_SUBREV_U16_e32_vi:
3720   case AMDGPU::V_SUBREV_U16_e64_vi:
3721 
3722   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3723   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3724   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3725 
3726   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3727   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3728 
3729   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3730   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3731 
3732   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3733   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3734 
3735   case AMDGPU::V_LSHRREV_B32_e32:
3736   case AMDGPU::V_LSHRREV_B32_e64:
3737   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3738   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3739   case AMDGPU::V_LSHRREV_B32_e32_vi:
3740   case AMDGPU::V_LSHRREV_B32_e64_vi:
3741   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3742   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3743 
3744   case AMDGPU::V_ASHRREV_I32_e32:
3745   case AMDGPU::V_ASHRREV_I32_e64:
3746   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3747   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3748   case AMDGPU::V_ASHRREV_I32_e32_vi:
3749   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3750   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3751   case AMDGPU::V_ASHRREV_I32_e64_vi:
3752 
3753   case AMDGPU::V_LSHLREV_B32_e32:
3754   case AMDGPU::V_LSHLREV_B32_e64:
3755   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3756   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3757   case AMDGPU::V_LSHLREV_B32_e32_vi:
3758   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3759   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3760   case AMDGPU::V_LSHLREV_B32_e64_vi:
3761 
3762   case AMDGPU::V_LSHLREV_B16_e32:
3763   case AMDGPU::V_LSHLREV_B16_e64:
3764   case AMDGPU::V_LSHLREV_B16_e32_vi:
3765   case AMDGPU::V_LSHLREV_B16_e64_vi:
3766   case AMDGPU::V_LSHLREV_B16_gfx10:
3767 
3768   case AMDGPU::V_LSHRREV_B16_e32:
3769   case AMDGPU::V_LSHRREV_B16_e64:
3770   case AMDGPU::V_LSHRREV_B16_e32_vi:
3771   case AMDGPU::V_LSHRREV_B16_e64_vi:
3772   case AMDGPU::V_LSHRREV_B16_gfx10:
3773 
3774   case AMDGPU::V_ASHRREV_I16_e32:
3775   case AMDGPU::V_ASHRREV_I16_e64:
3776   case AMDGPU::V_ASHRREV_I16_e32_vi:
3777   case AMDGPU::V_ASHRREV_I16_e64_vi:
3778   case AMDGPU::V_ASHRREV_I16_gfx10:
3779 
3780   case AMDGPU::V_LSHLREV_B64_e64:
3781   case AMDGPU::V_LSHLREV_B64_gfx10:
3782   case AMDGPU::V_LSHLREV_B64_vi:
3783 
3784   case AMDGPU::V_LSHRREV_B64_e64:
3785   case AMDGPU::V_LSHRREV_B64_gfx10:
3786   case AMDGPU::V_LSHRREV_B64_vi:
3787 
3788   case AMDGPU::V_ASHRREV_I64_e64:
3789   case AMDGPU::V_ASHRREV_I64_gfx10:
3790   case AMDGPU::V_ASHRREV_I64_vi:
3791 
3792   case AMDGPU::V_PK_LSHLREV_B16:
3793   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3794   case AMDGPU::V_PK_LSHLREV_B16_vi:
3795 
3796   case AMDGPU::V_PK_LSHRREV_B16:
3797   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3798   case AMDGPU::V_PK_LSHRREV_B16_vi:
3799   case AMDGPU::V_PK_ASHRREV_I16:
3800   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3801   case AMDGPU::V_PK_ASHRREV_I16_vi:
3802     return true;
3803   default:
3804     return false;
3805   }
3806 }
3807 
3808 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3809 
3810   using namespace SIInstrFlags;
3811   const unsigned Opcode = Inst.getOpcode();
3812   const MCInstrDesc &Desc = MII.get(Opcode);
3813 
3814   // lds_direct register is defined so that it can be used
3815   // with 9-bit operands only. Ignore encodings which do not accept these.
3816   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3817   if ((Desc.TSFlags & Enc) == 0)
3818     return None;
3819 
3820   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3821     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3822     if (SrcIdx == -1)
3823       break;
3824     const auto &Src = Inst.getOperand(SrcIdx);
3825     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3826 
3827       if (isGFX90A())
3828         return StringRef("lds_direct is not supported on this GPU");
3829 
3830       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3831         return StringRef("lds_direct cannot be used with this instruction");
3832 
3833       if (SrcName != OpName::src0)
3834         return StringRef("lds_direct may be used as src0 only");
3835     }
3836   }
3837 
3838   return None;
3839 }
3840 
3841 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3842   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3843     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3844     if (Op.isFlatOffset())
3845       return Op.getStartLoc();
3846   }
3847   return getLoc();
3848 }
3849 
3850 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3851                                          const OperandVector &Operands) {
3852   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3853   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3854     return true;
3855 
3856   auto Opcode = Inst.getOpcode();
3857   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3858   assert(OpNum != -1);
3859 
3860   const auto &Op = Inst.getOperand(OpNum);
3861   if (!hasFlatOffsets() && Op.getImm() != 0) {
3862     Error(getFlatOffsetLoc(Operands),
3863           "flat offset modifier is not supported on this GPU");
3864     return false;
3865   }
3866 
3867   // For FLAT segment the offset must be positive;
3868   // MSB is ignored and forced to zero.
3869   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3870     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3871     if (!isIntN(OffsetSize, Op.getImm())) {
3872       Error(getFlatOffsetLoc(Operands),
3873             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3874       return false;
3875     }
3876   } else {
3877     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3878     if (!isUIntN(OffsetSize, Op.getImm())) {
3879       Error(getFlatOffsetLoc(Operands),
3880             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3881       return false;
3882     }
3883   }
3884 
3885   return true;
3886 }
3887 
3888 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3889   // Start with second operand because SMEM Offset cannot be dst or src0.
3890   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3891     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3892     if (Op.isSMEMOffset())
3893       return Op.getStartLoc();
3894   }
3895   return getLoc();
3896 }
3897 
3898 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3899                                          const OperandVector &Operands) {
3900   if (isCI() || isSI())
3901     return true;
3902 
3903   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3904   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3905     return true;
3906 
3907   auto Opcode = Inst.getOpcode();
3908   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3909   if (OpNum == -1)
3910     return true;
3911 
3912   const auto &Op = Inst.getOperand(OpNum);
3913   if (!Op.isImm())
3914     return true;
3915 
3916   uint64_t Offset = Op.getImm();
3917   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3918   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3919       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3920     return true;
3921 
3922   Error(getSMEMOffsetLoc(Operands),
3923         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3924                                "expected a 21-bit signed offset");
3925 
3926   return false;
3927 }
3928 
3929 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3930   unsigned Opcode = Inst.getOpcode();
3931   const MCInstrDesc &Desc = MII.get(Opcode);
3932   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3933     return true;
3934 
3935   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3936   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3937 
3938   const int OpIndices[] = { Src0Idx, Src1Idx };
3939 
3940   unsigned NumExprs = 0;
3941   unsigned NumLiterals = 0;
3942   uint32_t LiteralValue;
3943 
3944   for (int OpIdx : OpIndices) {
3945     if (OpIdx == -1) break;
3946 
3947     const MCOperand &MO = Inst.getOperand(OpIdx);
3948     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3949     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3950       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3951         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3952         if (NumLiterals == 0 || LiteralValue != Value) {
3953           LiteralValue = Value;
3954           ++NumLiterals;
3955         }
3956       } else if (MO.isExpr()) {
3957         ++NumExprs;
3958       }
3959     }
3960   }
3961 
3962   return NumLiterals + NumExprs <= 1;
3963 }
3964 
3965 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3966   const unsigned Opc = Inst.getOpcode();
3967   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3968       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3969     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3970     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3971 
3972     if (OpSel & ~3)
3973       return false;
3974   }
3975   return true;
3976 }
3977 
3978 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
3979                                   const OperandVector &Operands) {
3980   const unsigned Opc = Inst.getOpcode();
3981   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
3982   if (DppCtrlIdx < 0)
3983     return true;
3984   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
3985 
3986   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
3987     // DPP64 is supported for row_newbcast only.
3988     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3989     if (Src0Idx >= 0 &&
3990         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
3991       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
3992       Error(S, "64 bit dpp only supports row_newbcast");
3993       return false;
3994     }
3995   }
3996 
3997   return true;
3998 }
3999 
4000 // Check if VCC register matches wavefront size
4001 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4002   auto FB = getFeatureBits();
4003   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4004     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4005 }
4006 
4007 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4008 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4009                                          const OperandVector &Operands) {
4010   unsigned Opcode = Inst.getOpcode();
4011   const MCInstrDesc &Desc = MII.get(Opcode);
4012   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4013   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4014       ImmIdx == -1)
4015     return true;
4016 
4017   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4018   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4019   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4020 
4021   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4022 
4023   unsigned NumExprs = 0;
4024   unsigned NumLiterals = 0;
4025   uint32_t LiteralValue;
4026 
4027   for (int OpIdx : OpIndices) {
4028     if (OpIdx == -1)
4029       continue;
4030 
4031     const MCOperand &MO = Inst.getOperand(OpIdx);
4032     if (!MO.isImm() && !MO.isExpr())
4033       continue;
4034     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4035       continue;
4036 
4037     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4038         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4039       Error(getConstLoc(Operands),
4040             "inline constants are not allowed for this operand");
4041       return false;
4042     }
4043 
4044     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4045       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4046       if (NumLiterals == 0 || LiteralValue != Value) {
4047         LiteralValue = Value;
4048         ++NumLiterals;
4049       }
4050     } else if (MO.isExpr()) {
4051       ++NumExprs;
4052     }
4053   }
4054   NumLiterals += NumExprs;
4055 
4056   if (!NumLiterals)
4057     return true;
4058 
4059   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4060     Error(getLitLoc(Operands), "literal operands are not supported");
4061     return false;
4062   }
4063 
4064   if (NumLiterals > 1) {
4065     Error(getLitLoc(Operands), "only one literal operand is allowed");
4066     return false;
4067   }
4068 
4069   return true;
4070 }
4071 
4072 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4073 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4074                          const MCRegisterInfo *MRI) {
4075   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4076   if (OpIdx < 0)
4077     return -1;
4078 
4079   const MCOperand &Op = Inst.getOperand(OpIdx);
4080   if (!Op.isReg())
4081     return -1;
4082 
4083   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4084   auto Reg = Sub ? Sub : Op.getReg();
4085   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4086   return AGPR32.contains(Reg) ? 1 : 0;
4087 }
4088 
4089 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4090   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4091   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4092                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4093                   SIInstrFlags::DS)) == 0)
4094     return true;
4095 
4096   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4097                                                       : AMDGPU::OpName::vdata;
4098 
4099   const MCRegisterInfo *MRI = getMRI();
4100   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4101   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4102 
4103   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4104     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4105     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4106       return false;
4107   }
4108 
4109   auto FB = getFeatureBits();
4110   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4111     if (DataAreg < 0 || DstAreg < 0)
4112       return true;
4113     return DstAreg == DataAreg;
4114   }
4115 
4116   return DstAreg < 1 && DataAreg < 1;
4117 }
4118 
4119 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4120   auto FB = getFeatureBits();
4121   if (!FB[AMDGPU::FeatureGFX90AInsts])
4122     return true;
4123 
4124   const MCRegisterInfo *MRI = getMRI();
4125   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4126   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4127   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4128     const MCOperand &Op = Inst.getOperand(I);
4129     if (!Op.isReg())
4130       continue;
4131 
4132     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4133     if (!Sub)
4134       continue;
4135 
4136     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4137       return false;
4138     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4139       return false;
4140   }
4141 
4142   return true;
4143 }
4144 
4145 // gfx90a has an undocumented limitation:
4146 // DS_GWS opcodes must use even aligned registers.
4147 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4148                                   const OperandVector &Operands) {
4149   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4150     return true;
4151 
4152   int Opc = Inst.getOpcode();
4153   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4154       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4155     return true;
4156 
4157   const MCRegisterInfo *MRI = getMRI();
4158   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4159   int Data0Pos =
4160       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4161   assert(Data0Pos != -1);
4162   auto Reg = Inst.getOperand(Data0Pos).getReg();
4163   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4164   if (RegIdx & 1) {
4165     SMLoc RegLoc = getRegLoc(Reg, Operands);
4166     Error(RegLoc, "vgpr must be even aligned");
4167     return false;
4168   }
4169 
4170   return true;
4171 }
4172 
4173 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4174                                             const OperandVector &Operands,
4175                                             const SMLoc &IDLoc) {
4176   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4177                                            AMDGPU::OpName::cpol);
4178   if (CPolPos == -1)
4179     return true;
4180 
4181   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4182 
4183   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4184   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4185       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4186     Error(IDLoc, "invalid cache policy for SMRD instruction");
4187     return false;
4188   }
4189 
4190   if (isGFX90A() && (CPol & CPol::SCC)) {
4191     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4192     StringRef CStr(S.getPointer());
4193     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4194     Error(S, "scc is not supported on this GPU");
4195     return false;
4196   }
4197 
4198   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4199     return true;
4200 
4201   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4202     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4203       Error(IDLoc, "instruction must use glc");
4204       return false;
4205     }
4206   } else {
4207     if (CPol & CPol::GLC) {
4208       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4209       StringRef CStr(S.getPointer());
4210       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4211       Error(S, "instruction must not use glc");
4212       return false;
4213     }
4214   }
4215 
4216   return true;
4217 }
4218 
4219 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4220                                           const SMLoc &IDLoc,
4221                                           const OperandVector &Operands) {
4222   if (auto ErrMsg = validateLdsDirect(Inst)) {
4223     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4224     return false;
4225   }
4226   if (!validateSOPLiteral(Inst)) {
4227     Error(getLitLoc(Operands),
4228       "only one literal operand is allowed");
4229     return false;
4230   }
4231   if (!validateVOPLiteral(Inst, Operands)) {
4232     return false;
4233   }
4234   if (!validateConstantBusLimitations(Inst, Operands)) {
4235     return false;
4236   }
4237   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4238     return false;
4239   }
4240   if (!validateIntClampSupported(Inst)) {
4241     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4242       "integer clamping is not supported on this GPU");
4243     return false;
4244   }
4245   if (!validateOpSel(Inst)) {
4246     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4247       "invalid op_sel operand");
4248     return false;
4249   }
4250   if (!validateDPP(Inst, Operands)) {
4251     return false;
4252   }
4253   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4254   if (!validateMIMGD16(Inst)) {
4255     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4256       "d16 modifier is not supported on this GPU");
4257     return false;
4258   }
4259   if (!validateMIMGDim(Inst)) {
4260     Error(IDLoc, "dim modifier is required on this GPU");
4261     return false;
4262   }
4263   if (!validateMIMGMSAA(Inst)) {
4264     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4265           "invalid dim; must be MSAA type");
4266     return false;
4267   }
4268   if (!validateMIMGDataSize(Inst)) {
4269     Error(IDLoc,
4270       "image data size does not match dmask and tfe");
4271     return false;
4272   }
4273   if (!validateMIMGAddrSize(Inst)) {
4274     Error(IDLoc,
4275       "image address size does not match dim and a16");
4276     return false;
4277   }
4278   if (!validateMIMGAtomicDMask(Inst)) {
4279     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4280       "invalid atomic image dmask");
4281     return false;
4282   }
4283   if (!validateMIMGGatherDMask(Inst)) {
4284     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4285       "invalid image_gather dmask: only one bit must be set");
4286     return false;
4287   }
4288   if (!validateMovrels(Inst, Operands)) {
4289     return false;
4290   }
4291   if (!validateFlatOffset(Inst, Operands)) {
4292     return false;
4293   }
4294   if (!validateSMEMOffset(Inst, Operands)) {
4295     return false;
4296   }
4297   if (!validateMAIAccWrite(Inst, Operands)) {
4298     return false;
4299   }
4300   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4301     return false;
4302   }
4303 
4304   if (!validateAGPRLdSt(Inst)) {
4305     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4306     ? "invalid register class: data and dst should be all VGPR or AGPR"
4307     : "invalid register class: agpr loads and stores not supported on this GPU"
4308     );
4309     return false;
4310   }
4311   if (!validateVGPRAlign(Inst)) {
4312     Error(IDLoc,
4313       "invalid register class: vgpr tuples must be 64 bit aligned");
4314     return false;
4315   }
4316   if (!validateGWS(Inst, Operands)) {
4317     return false;
4318   }
4319 
4320   if (!validateDivScale(Inst)) {
4321     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4322     return false;
4323   }
4324   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4325     return false;
4326   }
4327 
4328   return true;
4329 }
4330 
4331 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4332                                             const FeatureBitset &FBS,
4333                                             unsigned VariantID = 0);
4334 
4335 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4336                                 const FeatureBitset &AvailableFeatures,
4337                                 unsigned VariantID);
4338 
4339 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4340                                        const FeatureBitset &FBS) {
4341   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4342 }
4343 
4344 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4345                                        const FeatureBitset &FBS,
4346                                        ArrayRef<unsigned> Variants) {
4347   for (auto Variant : Variants) {
4348     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4349       return true;
4350   }
4351 
4352   return false;
4353 }
4354 
4355 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4356                                                   const SMLoc &IDLoc) {
4357   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4358 
4359   // Check if requested instruction variant is supported.
4360   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4361     return false;
4362 
4363   // This instruction is not supported.
4364   // Clear any other pending errors because they are no longer relevant.
4365   getParser().clearPendingErrors();
4366 
4367   // Requested instruction variant is not supported.
4368   // Check if any other variants are supported.
4369   StringRef VariantName = getMatchedVariantName();
4370   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4371     return Error(IDLoc,
4372                  Twine(VariantName,
4373                        " variant of this instruction is not supported"));
4374   }
4375 
4376   // Finally check if this instruction is supported on any other GPU.
4377   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4378     return Error(IDLoc, "instruction not supported on this GPU");
4379   }
4380 
4381   // Instruction not supported on any GPU. Probably a typo.
4382   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4383   return Error(IDLoc, "invalid instruction" + Suggestion);
4384 }
4385 
4386 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4387                                               OperandVector &Operands,
4388                                               MCStreamer &Out,
4389                                               uint64_t &ErrorInfo,
4390                                               bool MatchingInlineAsm) {
4391   MCInst Inst;
4392   unsigned Result = Match_Success;
4393   for (auto Variant : getMatchedVariants()) {
4394     uint64_t EI;
4395     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4396                                   Variant);
4397     // We order match statuses from least to most specific. We use most specific
4398     // status as resulting
4399     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4400     if ((R == Match_Success) ||
4401         (R == Match_PreferE32) ||
4402         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4403         (R == Match_InvalidOperand && Result != Match_MissingFeature
4404                                    && Result != Match_PreferE32) ||
4405         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4406                                    && Result != Match_MissingFeature
4407                                    && Result != Match_PreferE32)) {
4408       Result = R;
4409       ErrorInfo = EI;
4410     }
4411     if (R == Match_Success)
4412       break;
4413   }
4414 
4415   if (Result == Match_Success) {
4416     if (!validateInstruction(Inst, IDLoc, Operands)) {
4417       return true;
4418     }
4419     Inst.setLoc(IDLoc);
4420     Out.emitInstruction(Inst, getSTI());
4421     return false;
4422   }
4423 
4424   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4425   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4426     return true;
4427   }
4428 
4429   switch (Result) {
4430   default: break;
4431   case Match_MissingFeature:
4432     // It has been verified that the specified instruction
4433     // mnemonic is valid. A match was found but it requires
4434     // features which are not supported on this GPU.
4435     return Error(IDLoc, "operands are not valid for this GPU or mode");
4436 
4437   case Match_InvalidOperand: {
4438     SMLoc ErrorLoc = IDLoc;
4439     if (ErrorInfo != ~0ULL) {
4440       if (ErrorInfo >= Operands.size()) {
4441         return Error(IDLoc, "too few operands for instruction");
4442       }
4443       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4444       if (ErrorLoc == SMLoc())
4445         ErrorLoc = IDLoc;
4446     }
4447     return Error(ErrorLoc, "invalid operand for instruction");
4448   }
4449 
4450   case Match_PreferE32:
4451     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4452                         "should be encoded as e32");
4453   case Match_MnemonicFail:
4454     llvm_unreachable("Invalid instructions should have been handled already");
4455   }
4456   llvm_unreachable("Implement any new match types added!");
4457 }
4458 
4459 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4460   int64_t Tmp = -1;
4461   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4462     return true;
4463   }
4464   if (getParser().parseAbsoluteExpression(Tmp)) {
4465     return true;
4466   }
4467   Ret = static_cast<uint32_t>(Tmp);
4468   return false;
4469 }
4470 
4471 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4472                                                uint32_t &Minor) {
4473   if (ParseAsAbsoluteExpression(Major))
4474     return TokError("invalid major version");
4475 
4476   if (!trySkipToken(AsmToken::Comma))
4477     return TokError("minor version number required, comma expected");
4478 
4479   if (ParseAsAbsoluteExpression(Minor))
4480     return TokError("invalid minor version");
4481 
4482   return false;
4483 }
4484 
4485 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4486   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4487     return TokError("directive only supported for amdgcn architecture");
4488 
4489   std::string TargetIDDirective;
4490   SMLoc TargetStart = getTok().getLoc();
4491   if (getParser().parseEscapedString(TargetIDDirective))
4492     return true;
4493 
4494   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4495   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4496     return getParser().Error(TargetRange.Start,
4497         (Twine(".amdgcn_target directive's target id ") +
4498          Twine(TargetIDDirective) +
4499          Twine(" does not match the specified target id ") +
4500          Twine(getTargetStreamer().getTargetID()->toString())).str());
4501 
4502   return false;
4503 }
4504 
4505 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4506   return Error(Range.Start, "value out of range", Range);
4507 }
4508 
4509 bool AMDGPUAsmParser::calculateGPRBlocks(
4510     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4511     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4512     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4513     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4514   // TODO(scott.linder): These calculations are duplicated from
4515   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4516   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4517 
4518   unsigned NumVGPRs = NextFreeVGPR;
4519   unsigned NumSGPRs = NextFreeSGPR;
4520 
4521   if (Version.Major >= 10)
4522     NumSGPRs = 0;
4523   else {
4524     unsigned MaxAddressableNumSGPRs =
4525         IsaInfo::getAddressableNumSGPRs(&getSTI());
4526 
4527     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4528         NumSGPRs > MaxAddressableNumSGPRs)
4529       return OutOfRangeError(SGPRRange);
4530 
4531     NumSGPRs +=
4532         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4533 
4534     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4535         NumSGPRs > MaxAddressableNumSGPRs)
4536       return OutOfRangeError(SGPRRange);
4537 
4538     if (Features.test(FeatureSGPRInitBug))
4539       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4540   }
4541 
4542   VGPRBlocks =
4543       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4544   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4545 
4546   return false;
4547 }
4548 
4549 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4550   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4551     return TokError("directive only supported for amdgcn architecture");
4552 
4553   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4554     return TokError("directive only supported for amdhsa OS");
4555 
4556   StringRef KernelName;
4557   if (getParser().parseIdentifier(KernelName))
4558     return true;
4559 
4560   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4561 
4562   StringSet<> Seen;
4563 
4564   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4565 
4566   SMRange VGPRRange;
4567   uint64_t NextFreeVGPR = 0;
4568   uint64_t AccumOffset = 0;
4569   SMRange SGPRRange;
4570   uint64_t NextFreeSGPR = 0;
4571 
4572   // Count the number of user SGPRs implied from the enabled feature bits.
4573   unsigned ImpliedUserSGPRCount = 0;
4574 
4575   // Track if the asm explicitly contains the directive for the user SGPR
4576   // count.
4577   Optional<unsigned> ExplicitUserSGPRCount;
4578   bool ReserveVCC = true;
4579   bool ReserveFlatScr = true;
4580   Optional<bool> EnableWavefrontSize32;
4581 
4582   while (true) {
4583     while (trySkipToken(AsmToken::EndOfStatement));
4584 
4585     StringRef ID;
4586     SMRange IDRange = getTok().getLocRange();
4587     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4588       return true;
4589 
4590     if (ID == ".end_amdhsa_kernel")
4591       break;
4592 
4593     if (Seen.find(ID) != Seen.end())
4594       return TokError(".amdhsa_ directives cannot be repeated");
4595     Seen.insert(ID);
4596 
4597     SMLoc ValStart = getLoc();
4598     int64_t IVal;
4599     if (getParser().parseAbsoluteExpression(IVal))
4600       return true;
4601     SMLoc ValEnd = getLoc();
4602     SMRange ValRange = SMRange(ValStart, ValEnd);
4603 
4604     if (IVal < 0)
4605       return OutOfRangeError(ValRange);
4606 
4607     uint64_t Val = IVal;
4608 
4609 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4610   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4611     return OutOfRangeError(RANGE);                                             \
4612   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4613 
4614     if (ID == ".amdhsa_group_segment_fixed_size") {
4615       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4616         return OutOfRangeError(ValRange);
4617       KD.group_segment_fixed_size = Val;
4618     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4619       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4620         return OutOfRangeError(ValRange);
4621       KD.private_segment_fixed_size = Val;
4622     } else if (ID == ".amdhsa_kernarg_size") {
4623       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4624         return OutOfRangeError(ValRange);
4625       KD.kernarg_size = Val;
4626     } else if (ID == ".amdhsa_user_sgpr_count") {
4627       ExplicitUserSGPRCount = Val;
4628     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4629       if (hasArchitectedFlatScratch())
4630         return Error(IDRange.Start,
4631                      "directive is not supported with architected flat scratch",
4632                      IDRange);
4633       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4634                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4635                        Val, ValRange);
4636       if (Val)
4637         ImpliedUserSGPRCount += 4;
4638     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4639       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4640                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4641                        ValRange);
4642       if (Val)
4643         ImpliedUserSGPRCount += 2;
4644     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4645       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4646                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4647                        ValRange);
4648       if (Val)
4649         ImpliedUserSGPRCount += 2;
4650     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4651       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4652                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4653                        Val, ValRange);
4654       if (Val)
4655         ImpliedUserSGPRCount += 2;
4656     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4657       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4658                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4659                        ValRange);
4660       if (Val)
4661         ImpliedUserSGPRCount += 2;
4662     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4663       if (hasArchitectedFlatScratch())
4664         return Error(IDRange.Start,
4665                      "directive is not supported with architected flat scratch",
4666                      IDRange);
4667       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4668                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4669                        ValRange);
4670       if (Val)
4671         ImpliedUserSGPRCount += 2;
4672     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4673       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4674                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4675                        Val, ValRange);
4676       if (Val)
4677         ImpliedUserSGPRCount += 1;
4678     } else if (ID == ".amdhsa_wavefront_size32") {
4679       if (IVersion.Major < 10)
4680         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4681       EnableWavefrontSize32 = Val;
4682       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4683                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4684                        Val, ValRange);
4685     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4686       if (hasArchitectedFlatScratch())
4687         return Error(IDRange.Start,
4688                      "directive is not supported with architected flat scratch",
4689                      IDRange);
4690       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4691                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4692     } else if (ID == ".amdhsa_enable_private_segment") {
4693       if (!hasArchitectedFlatScratch())
4694         return Error(
4695             IDRange.Start,
4696             "directive is not supported without architected flat scratch",
4697             IDRange);
4698       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4699                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4700     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4701       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4702                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4703                        ValRange);
4704     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4705       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4706                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4707                        ValRange);
4708     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4709       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4710                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4711                        ValRange);
4712     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4713       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4714                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4715                        ValRange);
4716     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4717       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4718                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4719                        ValRange);
4720     } else if (ID == ".amdhsa_next_free_vgpr") {
4721       VGPRRange = ValRange;
4722       NextFreeVGPR = Val;
4723     } else if (ID == ".amdhsa_next_free_sgpr") {
4724       SGPRRange = ValRange;
4725       NextFreeSGPR = Val;
4726     } else if (ID == ".amdhsa_accum_offset") {
4727       if (!isGFX90A())
4728         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4729       AccumOffset = Val;
4730     } else if (ID == ".amdhsa_reserve_vcc") {
4731       if (!isUInt<1>(Val))
4732         return OutOfRangeError(ValRange);
4733       ReserveVCC = Val;
4734     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4735       if (IVersion.Major < 7)
4736         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4737       if (hasArchitectedFlatScratch())
4738         return Error(IDRange.Start,
4739                      "directive is not supported with architected flat scratch",
4740                      IDRange);
4741       if (!isUInt<1>(Val))
4742         return OutOfRangeError(ValRange);
4743       ReserveFlatScr = Val;
4744     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4745       if (IVersion.Major < 8)
4746         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4747       if (!isUInt<1>(Val))
4748         return OutOfRangeError(ValRange);
4749       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4750         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4751                                  IDRange);
4752     } else if (ID == ".amdhsa_float_round_mode_32") {
4753       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4754                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4755     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4756       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4757                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4758     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4759       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4760                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4761     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4762       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4763                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4764                        ValRange);
4765     } else if (ID == ".amdhsa_dx10_clamp") {
4766       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4767                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4768     } else if (ID == ".amdhsa_ieee_mode") {
4769       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4770                        Val, ValRange);
4771     } else if (ID == ".amdhsa_fp16_overflow") {
4772       if (IVersion.Major < 9)
4773         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4774       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4775                        ValRange);
4776     } else if (ID == ".amdhsa_tg_split") {
4777       if (!isGFX90A())
4778         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4779       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4780                        ValRange);
4781     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4782       if (IVersion.Major < 10)
4783         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4784       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4785                        ValRange);
4786     } else if (ID == ".amdhsa_memory_ordered") {
4787       if (IVersion.Major < 10)
4788         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4789       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4790                        ValRange);
4791     } else if (ID == ".amdhsa_forward_progress") {
4792       if (IVersion.Major < 10)
4793         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4794       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4795                        ValRange);
4796     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4797       PARSE_BITS_ENTRY(
4798           KD.compute_pgm_rsrc2,
4799           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4800           ValRange);
4801     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4802       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4803                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4804                        Val, ValRange);
4805     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4806       PARSE_BITS_ENTRY(
4807           KD.compute_pgm_rsrc2,
4808           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4809           ValRange);
4810     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4811       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4812                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4813                        Val, ValRange);
4814     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4815       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4816                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4817                        Val, ValRange);
4818     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4819       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4820                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4821                        Val, ValRange);
4822     } else if (ID == ".amdhsa_exception_int_div_zero") {
4823       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4824                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4825                        Val, ValRange);
4826     } else {
4827       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4828     }
4829 
4830 #undef PARSE_BITS_ENTRY
4831   }
4832 
4833   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4834     return TokError(".amdhsa_next_free_vgpr directive is required");
4835 
4836   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4837     return TokError(".amdhsa_next_free_sgpr directive is required");
4838 
4839   unsigned VGPRBlocks;
4840   unsigned SGPRBlocks;
4841   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4842                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4843                          EnableWavefrontSize32, NextFreeVGPR,
4844                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4845                          SGPRBlocks))
4846     return true;
4847 
4848   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4849           VGPRBlocks))
4850     return OutOfRangeError(VGPRRange);
4851   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4852                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4853 
4854   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4855           SGPRBlocks))
4856     return OutOfRangeError(SGPRRange);
4857   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4858                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4859                   SGPRBlocks);
4860 
4861   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
4862     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
4863                     "enabled user SGPRs");
4864 
4865   unsigned UserSGPRCount =
4866       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
4867 
4868   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4869     return TokError("too many user SGPRs enabled");
4870   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4871                   UserSGPRCount);
4872 
4873   if (isGFX90A()) {
4874     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4875       return TokError(".amdhsa_accum_offset directive is required");
4876     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4877       return TokError("accum_offset should be in range [4..256] in "
4878                       "increments of 4");
4879     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4880       return TokError("accum_offset exceeds total VGPR allocation");
4881     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4882                     (AccumOffset / 4 - 1));
4883   }
4884 
4885   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4886       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4887       ReserveFlatScr);
4888   return false;
4889 }
4890 
4891 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4892   uint32_t Major;
4893   uint32_t Minor;
4894 
4895   if (ParseDirectiveMajorMinor(Major, Minor))
4896     return true;
4897 
4898   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4899   return false;
4900 }
4901 
4902 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4903   uint32_t Major;
4904   uint32_t Minor;
4905   uint32_t Stepping;
4906   StringRef VendorName;
4907   StringRef ArchName;
4908 
4909   // If this directive has no arguments, then use the ISA version for the
4910   // targeted GPU.
4911   if (isToken(AsmToken::EndOfStatement)) {
4912     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4913     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4914                                                         ISA.Stepping,
4915                                                         "AMD", "AMDGPU");
4916     return false;
4917   }
4918 
4919   if (ParseDirectiveMajorMinor(Major, Minor))
4920     return true;
4921 
4922   if (!trySkipToken(AsmToken::Comma))
4923     return TokError("stepping version number required, comma expected");
4924 
4925   if (ParseAsAbsoluteExpression(Stepping))
4926     return TokError("invalid stepping version");
4927 
4928   if (!trySkipToken(AsmToken::Comma))
4929     return TokError("vendor name required, comma expected");
4930 
4931   if (!parseString(VendorName, "invalid vendor name"))
4932     return true;
4933 
4934   if (!trySkipToken(AsmToken::Comma))
4935     return TokError("arch name required, comma expected");
4936 
4937   if (!parseString(ArchName, "invalid arch name"))
4938     return true;
4939 
4940   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4941                                                       VendorName, ArchName);
4942   return false;
4943 }
4944 
4945 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4946                                                amd_kernel_code_t &Header) {
4947   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4948   // assembly for backwards compatibility.
4949   if (ID == "max_scratch_backing_memory_byte_size") {
4950     Parser.eatToEndOfStatement();
4951     return false;
4952   }
4953 
4954   SmallString<40> ErrStr;
4955   raw_svector_ostream Err(ErrStr);
4956   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4957     return TokError(Err.str());
4958   }
4959   Lex();
4960 
4961   if (ID == "enable_wavefront_size32") {
4962     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4963       if (!isGFX10Plus())
4964         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4965       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4966         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4967     } else {
4968       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4969         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4970     }
4971   }
4972 
4973   if (ID == "wavefront_size") {
4974     if (Header.wavefront_size == 5) {
4975       if (!isGFX10Plus())
4976         return TokError("wavefront_size=5 is only allowed on GFX10+");
4977       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4978         return TokError("wavefront_size=5 requires +WavefrontSize32");
4979     } else if (Header.wavefront_size == 6) {
4980       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4981         return TokError("wavefront_size=6 requires +WavefrontSize64");
4982     }
4983   }
4984 
4985   if (ID == "enable_wgp_mode") {
4986     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4987         !isGFX10Plus())
4988       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4989   }
4990 
4991   if (ID == "enable_mem_ordered") {
4992     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4993         !isGFX10Plus())
4994       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4995   }
4996 
4997   if (ID == "enable_fwd_progress") {
4998     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4999         !isGFX10Plus())
5000       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5001   }
5002 
5003   return false;
5004 }
5005 
5006 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5007   amd_kernel_code_t Header;
5008   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5009 
5010   while (true) {
5011     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5012     // will set the current token to EndOfStatement.
5013     while(trySkipToken(AsmToken::EndOfStatement));
5014 
5015     StringRef ID;
5016     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5017       return true;
5018 
5019     if (ID == ".end_amd_kernel_code_t")
5020       break;
5021 
5022     if (ParseAMDKernelCodeTValue(ID, Header))
5023       return true;
5024   }
5025 
5026   getTargetStreamer().EmitAMDKernelCodeT(Header);
5027 
5028   return false;
5029 }
5030 
5031 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5032   StringRef KernelName;
5033   if (!parseId(KernelName, "expected symbol name"))
5034     return true;
5035 
5036   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5037                                            ELF::STT_AMDGPU_HSA_KERNEL);
5038 
5039   KernelScope.initialize(getContext());
5040   return false;
5041 }
5042 
5043 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5044   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5045     return Error(getLoc(),
5046                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5047                  "architectures");
5048   }
5049 
5050   auto TargetIDDirective = getLexer().getTok().getStringContents();
5051   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5052     return Error(getParser().getTok().getLoc(), "target id must match options");
5053 
5054   getTargetStreamer().EmitISAVersion();
5055   Lex();
5056 
5057   return false;
5058 }
5059 
5060 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5061   const char *AssemblerDirectiveBegin;
5062   const char *AssemblerDirectiveEnd;
5063   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5064       isHsaAbiVersion3Or4(&getSTI())
5065           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5066                             HSAMD::V3::AssemblerDirectiveEnd)
5067           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5068                             HSAMD::AssemblerDirectiveEnd);
5069 
5070   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5071     return Error(getLoc(),
5072                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5073                  "not available on non-amdhsa OSes")).str());
5074   }
5075 
5076   std::string HSAMetadataString;
5077   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5078                           HSAMetadataString))
5079     return true;
5080 
5081   if (isHsaAbiVersion3Or4(&getSTI())) {
5082     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5083       return Error(getLoc(), "invalid HSA metadata");
5084   } else {
5085     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5086       return Error(getLoc(), "invalid HSA metadata");
5087   }
5088 
5089   return false;
5090 }
5091 
5092 /// Common code to parse out a block of text (typically YAML) between start and
5093 /// end directives.
5094 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5095                                           const char *AssemblerDirectiveEnd,
5096                                           std::string &CollectString) {
5097 
5098   raw_string_ostream CollectStream(CollectString);
5099 
5100   getLexer().setSkipSpace(false);
5101 
5102   bool FoundEnd = false;
5103   while (!isToken(AsmToken::Eof)) {
5104     while (isToken(AsmToken::Space)) {
5105       CollectStream << getTokenStr();
5106       Lex();
5107     }
5108 
5109     if (trySkipId(AssemblerDirectiveEnd)) {
5110       FoundEnd = true;
5111       break;
5112     }
5113 
5114     CollectStream << Parser.parseStringToEndOfStatement()
5115                   << getContext().getAsmInfo()->getSeparatorString();
5116 
5117     Parser.eatToEndOfStatement();
5118   }
5119 
5120   getLexer().setSkipSpace(true);
5121 
5122   if (isToken(AsmToken::Eof) && !FoundEnd) {
5123     return TokError(Twine("expected directive ") +
5124                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5125   }
5126 
5127   CollectStream.flush();
5128   return false;
5129 }
5130 
5131 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5132 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5133   std::string String;
5134   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5135                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5136     return true;
5137 
5138   auto PALMetadata = getTargetStreamer().getPALMetadata();
5139   if (!PALMetadata->setFromString(String))
5140     return Error(getLoc(), "invalid PAL metadata");
5141   return false;
5142 }
5143 
5144 /// Parse the assembler directive for old linear-format PAL metadata.
5145 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5146   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5147     return Error(getLoc(),
5148                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5149                  "not available on non-amdpal OSes")).str());
5150   }
5151 
5152   auto PALMetadata = getTargetStreamer().getPALMetadata();
5153   PALMetadata->setLegacy();
5154   for (;;) {
5155     uint32_t Key, Value;
5156     if (ParseAsAbsoluteExpression(Key)) {
5157       return TokError(Twine("invalid value in ") +
5158                       Twine(PALMD::AssemblerDirective));
5159     }
5160     if (!trySkipToken(AsmToken::Comma)) {
5161       return TokError(Twine("expected an even number of values in ") +
5162                       Twine(PALMD::AssemblerDirective));
5163     }
5164     if (ParseAsAbsoluteExpression(Value)) {
5165       return TokError(Twine("invalid value in ") +
5166                       Twine(PALMD::AssemblerDirective));
5167     }
5168     PALMetadata->setRegister(Key, Value);
5169     if (!trySkipToken(AsmToken::Comma))
5170       break;
5171   }
5172   return false;
5173 }
5174 
5175 /// ParseDirectiveAMDGPULDS
5176 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5177 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5178   if (getParser().checkForValidSection())
5179     return true;
5180 
5181   StringRef Name;
5182   SMLoc NameLoc = getLoc();
5183   if (getParser().parseIdentifier(Name))
5184     return TokError("expected identifier in directive");
5185 
5186   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5187   if (parseToken(AsmToken::Comma, "expected ','"))
5188     return true;
5189 
5190   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5191 
5192   int64_t Size;
5193   SMLoc SizeLoc = getLoc();
5194   if (getParser().parseAbsoluteExpression(Size))
5195     return true;
5196   if (Size < 0)
5197     return Error(SizeLoc, "size must be non-negative");
5198   if (Size > LocalMemorySize)
5199     return Error(SizeLoc, "size is too large");
5200 
5201   int64_t Alignment = 4;
5202   if (trySkipToken(AsmToken::Comma)) {
5203     SMLoc AlignLoc = getLoc();
5204     if (getParser().parseAbsoluteExpression(Alignment))
5205       return true;
5206     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5207       return Error(AlignLoc, "alignment must be a power of two");
5208 
5209     // Alignment larger than the size of LDS is possible in theory, as long
5210     // as the linker manages to place to symbol at address 0, but we do want
5211     // to make sure the alignment fits nicely into a 32-bit integer.
5212     if (Alignment >= 1u << 31)
5213       return Error(AlignLoc, "alignment is too large");
5214   }
5215 
5216   if (parseToken(AsmToken::EndOfStatement,
5217                  "unexpected token in '.amdgpu_lds' directive"))
5218     return true;
5219 
5220   Symbol->redefineIfPossible();
5221   if (!Symbol->isUndefined())
5222     return Error(NameLoc, "invalid symbol redefinition");
5223 
5224   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5225   return false;
5226 }
5227 
5228 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5229   StringRef IDVal = DirectiveID.getString();
5230 
5231   if (isHsaAbiVersion3Or4(&getSTI())) {
5232     if (IDVal == ".amdhsa_kernel")
5233      return ParseDirectiveAMDHSAKernel();
5234 
5235     // TODO: Restructure/combine with PAL metadata directive.
5236     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5237       return ParseDirectiveHSAMetadata();
5238   } else {
5239     if (IDVal == ".hsa_code_object_version")
5240       return ParseDirectiveHSACodeObjectVersion();
5241 
5242     if (IDVal == ".hsa_code_object_isa")
5243       return ParseDirectiveHSACodeObjectISA();
5244 
5245     if (IDVal == ".amd_kernel_code_t")
5246       return ParseDirectiveAMDKernelCodeT();
5247 
5248     if (IDVal == ".amdgpu_hsa_kernel")
5249       return ParseDirectiveAMDGPUHsaKernel();
5250 
5251     if (IDVal == ".amd_amdgpu_isa")
5252       return ParseDirectiveISAVersion();
5253 
5254     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5255       return ParseDirectiveHSAMetadata();
5256   }
5257 
5258   if (IDVal == ".amdgcn_target")
5259     return ParseDirectiveAMDGCNTarget();
5260 
5261   if (IDVal == ".amdgpu_lds")
5262     return ParseDirectiveAMDGPULDS();
5263 
5264   if (IDVal == PALMD::AssemblerDirectiveBegin)
5265     return ParseDirectivePALMetadataBegin();
5266 
5267   if (IDVal == PALMD::AssemblerDirective)
5268     return ParseDirectivePALMetadata();
5269 
5270   return true;
5271 }
5272 
5273 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5274                                            unsigned RegNo) {
5275 
5276   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5277        R.isValid(); ++R) {
5278     if (*R == RegNo)
5279       return isGFX9Plus();
5280   }
5281 
5282   // GFX10 has 2 more SGPRs 104 and 105.
5283   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5284        R.isValid(); ++R) {
5285     if (*R == RegNo)
5286       return hasSGPR104_SGPR105();
5287   }
5288 
5289   switch (RegNo) {
5290   case AMDGPU::SRC_SHARED_BASE:
5291   case AMDGPU::SRC_SHARED_LIMIT:
5292   case AMDGPU::SRC_PRIVATE_BASE:
5293   case AMDGPU::SRC_PRIVATE_LIMIT:
5294   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5295     return isGFX9Plus();
5296   case AMDGPU::TBA:
5297   case AMDGPU::TBA_LO:
5298   case AMDGPU::TBA_HI:
5299   case AMDGPU::TMA:
5300   case AMDGPU::TMA_LO:
5301   case AMDGPU::TMA_HI:
5302     return !isGFX9Plus();
5303   case AMDGPU::XNACK_MASK:
5304   case AMDGPU::XNACK_MASK_LO:
5305   case AMDGPU::XNACK_MASK_HI:
5306     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5307   case AMDGPU::SGPR_NULL:
5308     return isGFX10Plus();
5309   default:
5310     break;
5311   }
5312 
5313   if (isCI())
5314     return true;
5315 
5316   if (isSI() || isGFX10Plus()) {
5317     // No flat_scr on SI.
5318     // On GFX10 flat scratch is not a valid register operand and can only be
5319     // accessed with s_setreg/s_getreg.
5320     switch (RegNo) {
5321     case AMDGPU::FLAT_SCR:
5322     case AMDGPU::FLAT_SCR_LO:
5323     case AMDGPU::FLAT_SCR_HI:
5324       return false;
5325     default:
5326       return true;
5327     }
5328   }
5329 
5330   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5331   // SI/CI have.
5332   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5333        R.isValid(); ++R) {
5334     if (*R == RegNo)
5335       return hasSGPR102_SGPR103();
5336   }
5337 
5338   return true;
5339 }
5340 
5341 OperandMatchResultTy
5342 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5343                               OperandMode Mode) {
5344   // Try to parse with a custom parser
5345   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5346 
5347   // If we successfully parsed the operand or if there as an error parsing,
5348   // we are done.
5349   //
5350   // If we are parsing after we reach EndOfStatement then this means we
5351   // are appending default values to the Operands list.  This is only done
5352   // by custom parser, so we shouldn't continue on to the generic parsing.
5353   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5354       isToken(AsmToken::EndOfStatement))
5355     return ResTy;
5356 
5357   SMLoc RBraceLoc;
5358   SMLoc LBraceLoc = getLoc();
5359   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5360     unsigned Prefix = Operands.size();
5361 
5362     for (;;) {
5363       auto Loc = getLoc();
5364       ResTy = parseReg(Operands);
5365       if (ResTy == MatchOperand_NoMatch)
5366         Error(Loc, "expected a register");
5367       if (ResTy != MatchOperand_Success)
5368         return MatchOperand_ParseFail;
5369 
5370       RBraceLoc = getLoc();
5371       if (trySkipToken(AsmToken::RBrac))
5372         break;
5373 
5374       if (!skipToken(AsmToken::Comma,
5375                      "expected a comma or a closing square bracket")) {
5376         return MatchOperand_ParseFail;
5377       }
5378     }
5379 
5380     if (Operands.size() - Prefix > 1) {
5381       Operands.insert(Operands.begin() + Prefix,
5382                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5383       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5384     }
5385 
5386     return MatchOperand_Success;
5387   }
5388 
5389   return parseRegOrImm(Operands);
5390 }
5391 
5392 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5393   // Clear any forced encodings from the previous instruction.
5394   setForcedEncodingSize(0);
5395   setForcedDPP(false);
5396   setForcedSDWA(false);
5397 
5398   if (Name.endswith("_e64")) {
5399     setForcedEncodingSize(64);
5400     return Name.substr(0, Name.size() - 4);
5401   } else if (Name.endswith("_e32")) {
5402     setForcedEncodingSize(32);
5403     return Name.substr(0, Name.size() - 4);
5404   } else if (Name.endswith("_dpp")) {
5405     setForcedDPP(true);
5406     return Name.substr(0, Name.size() - 4);
5407   } else if (Name.endswith("_sdwa")) {
5408     setForcedSDWA(true);
5409     return Name.substr(0, Name.size() - 5);
5410   }
5411   return Name;
5412 }
5413 
5414 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5415                                        StringRef Name,
5416                                        SMLoc NameLoc, OperandVector &Operands) {
5417   // Add the instruction mnemonic
5418   Name = parseMnemonicSuffix(Name);
5419   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5420 
5421   bool IsMIMG = Name.startswith("image_");
5422 
5423   while (!trySkipToken(AsmToken::EndOfStatement)) {
5424     OperandMode Mode = OperandMode_Default;
5425     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5426       Mode = OperandMode_NSA;
5427     CPolSeen = 0;
5428     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5429 
5430     if (Res != MatchOperand_Success) {
5431       checkUnsupportedInstruction(Name, NameLoc);
5432       if (!Parser.hasPendingError()) {
5433         // FIXME: use real operand location rather than the current location.
5434         StringRef Msg =
5435           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5436                                             "not a valid operand.";
5437         Error(getLoc(), Msg);
5438       }
5439       while (!trySkipToken(AsmToken::EndOfStatement)) {
5440         lex();
5441       }
5442       return true;
5443     }
5444 
5445     // Eat the comma or space if there is one.
5446     trySkipToken(AsmToken::Comma);
5447   }
5448 
5449   return false;
5450 }
5451 
5452 //===----------------------------------------------------------------------===//
5453 // Utility functions
5454 //===----------------------------------------------------------------------===//
5455 
5456 OperandMatchResultTy
5457 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5458 
5459   if (!trySkipId(Prefix, AsmToken::Colon))
5460     return MatchOperand_NoMatch;
5461 
5462   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5463 }
5464 
5465 OperandMatchResultTy
5466 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5467                                     AMDGPUOperand::ImmTy ImmTy,
5468                                     bool (*ConvertResult)(int64_t&)) {
5469   SMLoc S = getLoc();
5470   int64_t Value = 0;
5471 
5472   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5473   if (Res != MatchOperand_Success)
5474     return Res;
5475 
5476   if (ConvertResult && !ConvertResult(Value)) {
5477     Error(S, "invalid " + StringRef(Prefix) + " value.");
5478   }
5479 
5480   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5481   return MatchOperand_Success;
5482 }
5483 
5484 OperandMatchResultTy
5485 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5486                                              OperandVector &Operands,
5487                                              AMDGPUOperand::ImmTy ImmTy,
5488                                              bool (*ConvertResult)(int64_t&)) {
5489   SMLoc S = getLoc();
5490   if (!trySkipId(Prefix, AsmToken::Colon))
5491     return MatchOperand_NoMatch;
5492 
5493   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5494     return MatchOperand_ParseFail;
5495 
5496   unsigned Val = 0;
5497   const unsigned MaxSize = 4;
5498 
5499   // FIXME: How to verify the number of elements matches the number of src
5500   // operands?
5501   for (int I = 0; ; ++I) {
5502     int64_t Op;
5503     SMLoc Loc = getLoc();
5504     if (!parseExpr(Op))
5505       return MatchOperand_ParseFail;
5506 
5507     if (Op != 0 && Op != 1) {
5508       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5509       return MatchOperand_ParseFail;
5510     }
5511 
5512     Val |= (Op << I);
5513 
5514     if (trySkipToken(AsmToken::RBrac))
5515       break;
5516 
5517     if (I + 1 == MaxSize) {
5518       Error(getLoc(), "expected a closing square bracket");
5519       return MatchOperand_ParseFail;
5520     }
5521 
5522     if (!skipToken(AsmToken::Comma, "expected a comma"))
5523       return MatchOperand_ParseFail;
5524   }
5525 
5526   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5527   return MatchOperand_Success;
5528 }
5529 
5530 OperandMatchResultTy
5531 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5532                                AMDGPUOperand::ImmTy ImmTy) {
5533   int64_t Bit;
5534   SMLoc S = getLoc();
5535 
5536   if (trySkipId(Name)) {
5537     Bit = 1;
5538   } else if (trySkipId("no", Name)) {
5539     Bit = 0;
5540   } else {
5541     return MatchOperand_NoMatch;
5542   }
5543 
5544   if (Name == "r128" && !hasMIMG_R128()) {
5545     Error(S, "r128 modifier is not supported on this GPU");
5546     return MatchOperand_ParseFail;
5547   }
5548   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5549     Error(S, "a16 modifier is not supported on this GPU");
5550     return MatchOperand_ParseFail;
5551   }
5552 
5553   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5554     ImmTy = AMDGPUOperand::ImmTyR128A16;
5555 
5556   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5557   return MatchOperand_Success;
5558 }
5559 
5560 OperandMatchResultTy
5561 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5562   unsigned CPolOn = 0;
5563   unsigned CPolOff = 0;
5564   SMLoc S = getLoc();
5565 
5566   if (trySkipId("glc"))
5567     CPolOn = AMDGPU::CPol::GLC;
5568   else if (trySkipId("noglc"))
5569     CPolOff = AMDGPU::CPol::GLC;
5570   else if (trySkipId("slc"))
5571     CPolOn = AMDGPU::CPol::SLC;
5572   else if (trySkipId("noslc"))
5573     CPolOff = AMDGPU::CPol::SLC;
5574   else if (trySkipId("dlc"))
5575     CPolOn = AMDGPU::CPol::DLC;
5576   else if (trySkipId("nodlc"))
5577     CPolOff = AMDGPU::CPol::DLC;
5578   else if (trySkipId("scc"))
5579     CPolOn = AMDGPU::CPol::SCC;
5580   else if (trySkipId("noscc"))
5581     CPolOff = AMDGPU::CPol::SCC;
5582   else
5583     return MatchOperand_NoMatch;
5584 
5585   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5586     Error(S, "dlc modifier is not supported on this GPU");
5587     return MatchOperand_ParseFail;
5588   }
5589 
5590   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5591     Error(S, "scc modifier is not supported on this GPU");
5592     return MatchOperand_ParseFail;
5593   }
5594 
5595   if (CPolSeen & (CPolOn | CPolOff)) {
5596     Error(S, "duplicate cache policy modifier");
5597     return MatchOperand_ParseFail;
5598   }
5599 
5600   CPolSeen |= (CPolOn | CPolOff);
5601 
5602   for (unsigned I = 1; I != Operands.size(); ++I) {
5603     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5604     if (Op.isCPol()) {
5605       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5606       return MatchOperand_Success;
5607     }
5608   }
5609 
5610   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5611                                               AMDGPUOperand::ImmTyCPol));
5612 
5613   return MatchOperand_Success;
5614 }
5615 
5616 static void addOptionalImmOperand(
5617   MCInst& Inst, const OperandVector& Operands,
5618   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5619   AMDGPUOperand::ImmTy ImmT,
5620   int64_t Default = 0) {
5621   auto i = OptionalIdx.find(ImmT);
5622   if (i != OptionalIdx.end()) {
5623     unsigned Idx = i->second;
5624     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5625   } else {
5626     Inst.addOperand(MCOperand::createImm(Default));
5627   }
5628 }
5629 
5630 OperandMatchResultTy
5631 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5632                                        StringRef &Value,
5633                                        SMLoc &StringLoc) {
5634   if (!trySkipId(Prefix, AsmToken::Colon))
5635     return MatchOperand_NoMatch;
5636 
5637   StringLoc = getLoc();
5638   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5639                                                   : MatchOperand_ParseFail;
5640 }
5641 
5642 //===----------------------------------------------------------------------===//
5643 // MTBUF format
5644 //===----------------------------------------------------------------------===//
5645 
5646 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5647                                   int64_t MaxVal,
5648                                   int64_t &Fmt) {
5649   int64_t Val;
5650   SMLoc Loc = getLoc();
5651 
5652   auto Res = parseIntWithPrefix(Pref, Val);
5653   if (Res == MatchOperand_ParseFail)
5654     return false;
5655   if (Res == MatchOperand_NoMatch)
5656     return true;
5657 
5658   if (Val < 0 || Val > MaxVal) {
5659     Error(Loc, Twine("out of range ", StringRef(Pref)));
5660     return false;
5661   }
5662 
5663   Fmt = Val;
5664   return true;
5665 }
5666 
5667 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5668 // values to live in a joint format operand in the MCInst encoding.
5669 OperandMatchResultTy
5670 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5671   using namespace llvm::AMDGPU::MTBUFFormat;
5672 
5673   int64_t Dfmt = DFMT_UNDEF;
5674   int64_t Nfmt = NFMT_UNDEF;
5675 
5676   // dfmt and nfmt can appear in either order, and each is optional.
5677   for (int I = 0; I < 2; ++I) {
5678     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5679       return MatchOperand_ParseFail;
5680 
5681     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5682       return MatchOperand_ParseFail;
5683     }
5684     // Skip optional comma between dfmt/nfmt
5685     // but guard against 2 commas following each other.
5686     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5687         !peekToken().is(AsmToken::Comma)) {
5688       trySkipToken(AsmToken::Comma);
5689     }
5690   }
5691 
5692   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5693     return MatchOperand_NoMatch;
5694 
5695   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5696   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5697 
5698   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5699   return MatchOperand_Success;
5700 }
5701 
5702 OperandMatchResultTy
5703 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5704   using namespace llvm::AMDGPU::MTBUFFormat;
5705 
5706   int64_t Fmt = UFMT_UNDEF;
5707 
5708   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5709     return MatchOperand_ParseFail;
5710 
5711   if (Fmt == UFMT_UNDEF)
5712     return MatchOperand_NoMatch;
5713 
5714   Format = Fmt;
5715   return MatchOperand_Success;
5716 }
5717 
5718 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5719                                     int64_t &Nfmt,
5720                                     StringRef FormatStr,
5721                                     SMLoc Loc) {
5722   using namespace llvm::AMDGPU::MTBUFFormat;
5723   int64_t Format;
5724 
5725   Format = getDfmt(FormatStr);
5726   if (Format != DFMT_UNDEF) {
5727     Dfmt = Format;
5728     return true;
5729   }
5730 
5731   Format = getNfmt(FormatStr, getSTI());
5732   if (Format != NFMT_UNDEF) {
5733     Nfmt = Format;
5734     return true;
5735   }
5736 
5737   Error(Loc, "unsupported format");
5738   return false;
5739 }
5740 
5741 OperandMatchResultTy
5742 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5743                                           SMLoc FormatLoc,
5744                                           int64_t &Format) {
5745   using namespace llvm::AMDGPU::MTBUFFormat;
5746 
5747   int64_t Dfmt = DFMT_UNDEF;
5748   int64_t Nfmt = NFMT_UNDEF;
5749   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5750     return MatchOperand_ParseFail;
5751 
5752   if (trySkipToken(AsmToken::Comma)) {
5753     StringRef Str;
5754     SMLoc Loc = getLoc();
5755     if (!parseId(Str, "expected a format string") ||
5756         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5757       return MatchOperand_ParseFail;
5758     }
5759     if (Dfmt == DFMT_UNDEF) {
5760       Error(Loc, "duplicate numeric format");
5761       return MatchOperand_ParseFail;
5762     } else if (Nfmt == NFMT_UNDEF) {
5763       Error(Loc, "duplicate data format");
5764       return MatchOperand_ParseFail;
5765     }
5766   }
5767 
5768   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5769   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5770 
5771   if (isGFX10Plus()) {
5772     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5773     if (Ufmt == UFMT_UNDEF) {
5774       Error(FormatLoc, "unsupported format");
5775       return MatchOperand_ParseFail;
5776     }
5777     Format = Ufmt;
5778   } else {
5779     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5780   }
5781 
5782   return MatchOperand_Success;
5783 }
5784 
5785 OperandMatchResultTy
5786 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5787                                             SMLoc Loc,
5788                                             int64_t &Format) {
5789   using namespace llvm::AMDGPU::MTBUFFormat;
5790 
5791   auto Id = getUnifiedFormat(FormatStr);
5792   if (Id == UFMT_UNDEF)
5793     return MatchOperand_NoMatch;
5794 
5795   if (!isGFX10Plus()) {
5796     Error(Loc, "unified format is not supported on this GPU");
5797     return MatchOperand_ParseFail;
5798   }
5799 
5800   Format = Id;
5801   return MatchOperand_Success;
5802 }
5803 
5804 OperandMatchResultTy
5805 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5806   using namespace llvm::AMDGPU::MTBUFFormat;
5807   SMLoc Loc = getLoc();
5808 
5809   if (!parseExpr(Format))
5810     return MatchOperand_ParseFail;
5811   if (!isValidFormatEncoding(Format, getSTI())) {
5812     Error(Loc, "out of range format");
5813     return MatchOperand_ParseFail;
5814   }
5815 
5816   return MatchOperand_Success;
5817 }
5818 
5819 OperandMatchResultTy
5820 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5821   using namespace llvm::AMDGPU::MTBUFFormat;
5822 
5823   if (!trySkipId("format", AsmToken::Colon))
5824     return MatchOperand_NoMatch;
5825 
5826   if (trySkipToken(AsmToken::LBrac)) {
5827     StringRef FormatStr;
5828     SMLoc Loc = getLoc();
5829     if (!parseId(FormatStr, "expected a format string"))
5830       return MatchOperand_ParseFail;
5831 
5832     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5833     if (Res == MatchOperand_NoMatch)
5834       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5835     if (Res != MatchOperand_Success)
5836       return Res;
5837 
5838     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5839       return MatchOperand_ParseFail;
5840 
5841     return MatchOperand_Success;
5842   }
5843 
5844   return parseNumericFormat(Format);
5845 }
5846 
5847 OperandMatchResultTy
5848 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5849   using namespace llvm::AMDGPU::MTBUFFormat;
5850 
5851   int64_t Format = getDefaultFormatEncoding(getSTI());
5852   OperandMatchResultTy Res;
5853   SMLoc Loc = getLoc();
5854 
5855   // Parse legacy format syntax.
5856   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5857   if (Res == MatchOperand_ParseFail)
5858     return Res;
5859 
5860   bool FormatFound = (Res == MatchOperand_Success);
5861 
5862   Operands.push_back(
5863     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5864 
5865   if (FormatFound)
5866     trySkipToken(AsmToken::Comma);
5867 
5868   if (isToken(AsmToken::EndOfStatement)) {
5869     // We are expecting an soffset operand,
5870     // but let matcher handle the error.
5871     return MatchOperand_Success;
5872   }
5873 
5874   // Parse soffset.
5875   Res = parseRegOrImm(Operands);
5876   if (Res != MatchOperand_Success)
5877     return Res;
5878 
5879   trySkipToken(AsmToken::Comma);
5880 
5881   if (!FormatFound) {
5882     Res = parseSymbolicOrNumericFormat(Format);
5883     if (Res == MatchOperand_ParseFail)
5884       return Res;
5885     if (Res == MatchOperand_Success) {
5886       auto Size = Operands.size();
5887       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5888       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5889       Op.setImm(Format);
5890     }
5891     return MatchOperand_Success;
5892   }
5893 
5894   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5895     Error(getLoc(), "duplicate format");
5896     return MatchOperand_ParseFail;
5897   }
5898   return MatchOperand_Success;
5899 }
5900 
5901 //===----------------------------------------------------------------------===//
5902 // ds
5903 //===----------------------------------------------------------------------===//
5904 
5905 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5906                                     const OperandVector &Operands) {
5907   OptionalImmIndexMap OptionalIdx;
5908 
5909   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5910     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5911 
5912     // Add the register arguments
5913     if (Op.isReg()) {
5914       Op.addRegOperands(Inst, 1);
5915       continue;
5916     }
5917 
5918     // Handle optional arguments
5919     OptionalIdx[Op.getImmTy()] = i;
5920   }
5921 
5922   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5923   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5924   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5925 
5926   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5927 }
5928 
5929 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5930                                 bool IsGdsHardcoded) {
5931   OptionalImmIndexMap OptionalIdx;
5932 
5933   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5934     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5935 
5936     // Add the register arguments
5937     if (Op.isReg()) {
5938       Op.addRegOperands(Inst, 1);
5939       continue;
5940     }
5941 
5942     if (Op.isToken() && Op.getToken() == "gds") {
5943       IsGdsHardcoded = true;
5944       continue;
5945     }
5946 
5947     // Handle optional arguments
5948     OptionalIdx[Op.getImmTy()] = i;
5949   }
5950 
5951   AMDGPUOperand::ImmTy OffsetType =
5952     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5953      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5954      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5955                                                       AMDGPUOperand::ImmTyOffset;
5956 
5957   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5958 
5959   if (!IsGdsHardcoded) {
5960     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5961   }
5962   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5963 }
5964 
5965 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5966   OptionalImmIndexMap OptionalIdx;
5967 
5968   unsigned OperandIdx[4];
5969   unsigned EnMask = 0;
5970   int SrcIdx = 0;
5971 
5972   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5973     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5974 
5975     // Add the register arguments
5976     if (Op.isReg()) {
5977       assert(SrcIdx < 4);
5978       OperandIdx[SrcIdx] = Inst.size();
5979       Op.addRegOperands(Inst, 1);
5980       ++SrcIdx;
5981       continue;
5982     }
5983 
5984     if (Op.isOff()) {
5985       assert(SrcIdx < 4);
5986       OperandIdx[SrcIdx] = Inst.size();
5987       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5988       ++SrcIdx;
5989       continue;
5990     }
5991 
5992     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5993       Op.addImmOperands(Inst, 1);
5994       continue;
5995     }
5996 
5997     if (Op.isToken() && Op.getToken() == "done")
5998       continue;
5999 
6000     // Handle optional arguments
6001     OptionalIdx[Op.getImmTy()] = i;
6002   }
6003 
6004   assert(SrcIdx == 4);
6005 
6006   bool Compr = false;
6007   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6008     Compr = true;
6009     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6010     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6011     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6012   }
6013 
6014   for (auto i = 0; i < SrcIdx; ++i) {
6015     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6016       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6017     }
6018   }
6019 
6020   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6021   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6022 
6023   Inst.addOperand(MCOperand::createImm(EnMask));
6024 }
6025 
6026 //===----------------------------------------------------------------------===//
6027 // s_waitcnt
6028 //===----------------------------------------------------------------------===//
6029 
6030 static bool
6031 encodeCnt(
6032   const AMDGPU::IsaVersion ISA,
6033   int64_t &IntVal,
6034   int64_t CntVal,
6035   bool Saturate,
6036   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6037   unsigned (*decode)(const IsaVersion &Version, unsigned))
6038 {
6039   bool Failed = false;
6040 
6041   IntVal = encode(ISA, IntVal, CntVal);
6042   if (CntVal != decode(ISA, IntVal)) {
6043     if (Saturate) {
6044       IntVal = encode(ISA, IntVal, -1);
6045     } else {
6046       Failed = true;
6047     }
6048   }
6049   return Failed;
6050 }
6051 
6052 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6053 
6054   SMLoc CntLoc = getLoc();
6055   StringRef CntName = getTokenStr();
6056 
6057   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6058       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6059     return false;
6060 
6061   int64_t CntVal;
6062   SMLoc ValLoc = getLoc();
6063   if (!parseExpr(CntVal))
6064     return false;
6065 
6066   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6067 
6068   bool Failed = true;
6069   bool Sat = CntName.endswith("_sat");
6070 
6071   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6072     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6073   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6074     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6075   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6076     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6077   } else {
6078     Error(CntLoc, "invalid counter name " + CntName);
6079     return false;
6080   }
6081 
6082   if (Failed) {
6083     Error(ValLoc, "too large value for " + CntName);
6084     return false;
6085   }
6086 
6087   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6088     return false;
6089 
6090   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6091     if (isToken(AsmToken::EndOfStatement)) {
6092       Error(getLoc(), "expected a counter name");
6093       return false;
6094     }
6095   }
6096 
6097   return true;
6098 }
6099 
6100 OperandMatchResultTy
6101 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6102   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6103   int64_t Waitcnt = getWaitcntBitMask(ISA);
6104   SMLoc S = getLoc();
6105 
6106   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6107     while (!isToken(AsmToken::EndOfStatement)) {
6108       if (!parseCnt(Waitcnt))
6109         return MatchOperand_ParseFail;
6110     }
6111   } else {
6112     if (!parseExpr(Waitcnt))
6113       return MatchOperand_ParseFail;
6114   }
6115 
6116   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6117   return MatchOperand_Success;
6118 }
6119 
6120 bool
6121 AMDGPUOperand::isSWaitCnt() const {
6122   return isImm();
6123 }
6124 
6125 //===----------------------------------------------------------------------===//
6126 // hwreg
6127 //===----------------------------------------------------------------------===//
6128 
6129 bool
6130 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6131                                 OperandInfoTy &Offset,
6132                                 OperandInfoTy &Width) {
6133   using namespace llvm::AMDGPU::Hwreg;
6134 
6135   // The register may be specified by name or using a numeric code
6136   HwReg.Loc = getLoc();
6137   if (isToken(AsmToken::Identifier) &&
6138       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6139     HwReg.IsSymbolic = true;
6140     lex(); // skip register name
6141   } else if (!parseExpr(HwReg.Id, "a register name")) {
6142     return false;
6143   }
6144 
6145   if (trySkipToken(AsmToken::RParen))
6146     return true;
6147 
6148   // parse optional params
6149   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6150     return false;
6151 
6152   Offset.Loc = getLoc();
6153   if (!parseExpr(Offset.Id))
6154     return false;
6155 
6156   if (!skipToken(AsmToken::Comma, "expected a comma"))
6157     return false;
6158 
6159   Width.Loc = getLoc();
6160   return parseExpr(Width.Id) &&
6161          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6162 }
6163 
6164 bool
6165 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6166                                const OperandInfoTy &Offset,
6167                                const OperandInfoTy &Width) {
6168 
6169   using namespace llvm::AMDGPU::Hwreg;
6170 
6171   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6172     Error(HwReg.Loc,
6173           "specified hardware register is not supported on this GPU");
6174     return false;
6175   }
6176   if (!isValidHwreg(HwReg.Id)) {
6177     Error(HwReg.Loc,
6178           "invalid code of hardware register: only 6-bit values are legal");
6179     return false;
6180   }
6181   if (!isValidHwregOffset(Offset.Id)) {
6182     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6183     return false;
6184   }
6185   if (!isValidHwregWidth(Width.Id)) {
6186     Error(Width.Loc,
6187           "invalid bitfield width: only values from 1 to 32 are legal");
6188     return false;
6189   }
6190   return true;
6191 }
6192 
6193 OperandMatchResultTy
6194 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6195   using namespace llvm::AMDGPU::Hwreg;
6196 
6197   int64_t ImmVal = 0;
6198   SMLoc Loc = getLoc();
6199 
6200   if (trySkipId("hwreg", AsmToken::LParen)) {
6201     OperandInfoTy HwReg(ID_UNKNOWN_);
6202     OperandInfoTy Offset(OFFSET_DEFAULT_);
6203     OperandInfoTy Width(WIDTH_DEFAULT_);
6204     if (parseHwregBody(HwReg, Offset, Width) &&
6205         validateHwreg(HwReg, Offset, Width)) {
6206       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6207     } else {
6208       return MatchOperand_ParseFail;
6209     }
6210   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6211     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6212       Error(Loc, "invalid immediate: only 16-bit values are legal");
6213       return MatchOperand_ParseFail;
6214     }
6215   } else {
6216     return MatchOperand_ParseFail;
6217   }
6218 
6219   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6220   return MatchOperand_Success;
6221 }
6222 
6223 bool AMDGPUOperand::isHwreg() const {
6224   return isImmTy(ImmTyHwreg);
6225 }
6226 
6227 //===----------------------------------------------------------------------===//
6228 // sendmsg
6229 //===----------------------------------------------------------------------===//
6230 
6231 bool
6232 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6233                                   OperandInfoTy &Op,
6234                                   OperandInfoTy &Stream) {
6235   using namespace llvm::AMDGPU::SendMsg;
6236 
6237   Msg.Loc = getLoc();
6238   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6239     Msg.IsSymbolic = true;
6240     lex(); // skip message name
6241   } else if (!parseExpr(Msg.Id, "a message name")) {
6242     return false;
6243   }
6244 
6245   if (trySkipToken(AsmToken::Comma)) {
6246     Op.IsDefined = true;
6247     Op.Loc = getLoc();
6248     if (isToken(AsmToken::Identifier) &&
6249         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6250       lex(); // skip operation name
6251     } else if (!parseExpr(Op.Id, "an operation name")) {
6252       return false;
6253     }
6254 
6255     if (trySkipToken(AsmToken::Comma)) {
6256       Stream.IsDefined = true;
6257       Stream.Loc = getLoc();
6258       if (!parseExpr(Stream.Id))
6259         return false;
6260     }
6261   }
6262 
6263   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6264 }
6265 
6266 bool
6267 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6268                                  const OperandInfoTy &Op,
6269                                  const OperandInfoTy &Stream) {
6270   using namespace llvm::AMDGPU::SendMsg;
6271 
6272   // Validation strictness depends on whether message is specified
6273   // in a symbolc or in a numeric form. In the latter case
6274   // only encoding possibility is checked.
6275   bool Strict = Msg.IsSymbolic;
6276 
6277   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6278     Error(Msg.Loc, "invalid message id");
6279     return false;
6280   }
6281   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6282     if (Op.IsDefined) {
6283       Error(Op.Loc, "message does not support operations");
6284     } else {
6285       Error(Msg.Loc, "missing message operation");
6286     }
6287     return false;
6288   }
6289   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6290     Error(Op.Loc, "invalid operation id");
6291     return false;
6292   }
6293   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6294     Error(Stream.Loc, "message operation does not support streams");
6295     return false;
6296   }
6297   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6298     Error(Stream.Loc, "invalid message stream id");
6299     return false;
6300   }
6301   return true;
6302 }
6303 
6304 OperandMatchResultTy
6305 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6306   using namespace llvm::AMDGPU::SendMsg;
6307 
6308   int64_t ImmVal = 0;
6309   SMLoc Loc = getLoc();
6310 
6311   if (trySkipId("sendmsg", AsmToken::LParen)) {
6312     OperandInfoTy Msg(ID_UNKNOWN_);
6313     OperandInfoTy Op(OP_NONE_);
6314     OperandInfoTy Stream(STREAM_ID_NONE_);
6315     if (parseSendMsgBody(Msg, Op, Stream) &&
6316         validateSendMsg(Msg, Op, Stream)) {
6317       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6318     } else {
6319       return MatchOperand_ParseFail;
6320     }
6321   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6322     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6323       Error(Loc, "invalid immediate: only 16-bit values are legal");
6324       return MatchOperand_ParseFail;
6325     }
6326   } else {
6327     return MatchOperand_ParseFail;
6328   }
6329 
6330   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6331   return MatchOperand_Success;
6332 }
6333 
6334 bool AMDGPUOperand::isSendMsg() const {
6335   return isImmTy(ImmTySendMsg);
6336 }
6337 
6338 //===----------------------------------------------------------------------===//
6339 // v_interp
6340 //===----------------------------------------------------------------------===//
6341 
6342 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6343   StringRef Str;
6344   SMLoc S = getLoc();
6345 
6346   if (!parseId(Str))
6347     return MatchOperand_NoMatch;
6348 
6349   int Slot = StringSwitch<int>(Str)
6350     .Case("p10", 0)
6351     .Case("p20", 1)
6352     .Case("p0", 2)
6353     .Default(-1);
6354 
6355   if (Slot == -1) {
6356     Error(S, "invalid interpolation slot");
6357     return MatchOperand_ParseFail;
6358   }
6359 
6360   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6361                                               AMDGPUOperand::ImmTyInterpSlot));
6362   return MatchOperand_Success;
6363 }
6364 
6365 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6366   StringRef Str;
6367   SMLoc S = getLoc();
6368 
6369   if (!parseId(Str))
6370     return MatchOperand_NoMatch;
6371 
6372   if (!Str.startswith("attr")) {
6373     Error(S, "invalid interpolation attribute");
6374     return MatchOperand_ParseFail;
6375   }
6376 
6377   StringRef Chan = Str.take_back(2);
6378   int AttrChan = StringSwitch<int>(Chan)
6379     .Case(".x", 0)
6380     .Case(".y", 1)
6381     .Case(".z", 2)
6382     .Case(".w", 3)
6383     .Default(-1);
6384   if (AttrChan == -1) {
6385     Error(S, "invalid or missing interpolation attribute channel");
6386     return MatchOperand_ParseFail;
6387   }
6388 
6389   Str = Str.drop_back(2).drop_front(4);
6390 
6391   uint8_t Attr;
6392   if (Str.getAsInteger(10, Attr)) {
6393     Error(S, "invalid or missing interpolation attribute number");
6394     return MatchOperand_ParseFail;
6395   }
6396 
6397   if (Attr > 63) {
6398     Error(S, "out of bounds interpolation attribute number");
6399     return MatchOperand_ParseFail;
6400   }
6401 
6402   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6403 
6404   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6405                                               AMDGPUOperand::ImmTyInterpAttr));
6406   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6407                                               AMDGPUOperand::ImmTyAttrChan));
6408   return MatchOperand_Success;
6409 }
6410 
6411 //===----------------------------------------------------------------------===//
6412 // exp
6413 //===----------------------------------------------------------------------===//
6414 
6415 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6416   using namespace llvm::AMDGPU::Exp;
6417 
6418   StringRef Str;
6419   SMLoc S = getLoc();
6420 
6421   if (!parseId(Str))
6422     return MatchOperand_NoMatch;
6423 
6424   unsigned Id = getTgtId(Str);
6425   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6426     Error(S, (Id == ET_INVALID) ?
6427                 "invalid exp target" :
6428                 "exp target is not supported on this GPU");
6429     return MatchOperand_ParseFail;
6430   }
6431 
6432   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6433                                               AMDGPUOperand::ImmTyExpTgt));
6434   return MatchOperand_Success;
6435 }
6436 
6437 //===----------------------------------------------------------------------===//
6438 // parser helpers
6439 //===----------------------------------------------------------------------===//
6440 
6441 bool
6442 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6443   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6444 }
6445 
6446 bool
6447 AMDGPUAsmParser::isId(const StringRef Id) const {
6448   return isId(getToken(), Id);
6449 }
6450 
6451 bool
6452 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6453   return getTokenKind() == Kind;
6454 }
6455 
6456 bool
6457 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6458   if (isId(Id)) {
6459     lex();
6460     return true;
6461   }
6462   return false;
6463 }
6464 
6465 bool
6466 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6467   if (isToken(AsmToken::Identifier)) {
6468     StringRef Tok = getTokenStr();
6469     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6470       lex();
6471       return true;
6472     }
6473   }
6474   return false;
6475 }
6476 
6477 bool
6478 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6479   if (isId(Id) && peekToken().is(Kind)) {
6480     lex();
6481     lex();
6482     return true;
6483   }
6484   return false;
6485 }
6486 
6487 bool
6488 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6489   if (isToken(Kind)) {
6490     lex();
6491     return true;
6492   }
6493   return false;
6494 }
6495 
6496 bool
6497 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6498                            const StringRef ErrMsg) {
6499   if (!trySkipToken(Kind)) {
6500     Error(getLoc(), ErrMsg);
6501     return false;
6502   }
6503   return true;
6504 }
6505 
6506 bool
6507 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6508   SMLoc S = getLoc();
6509 
6510   const MCExpr *Expr;
6511   if (Parser.parseExpression(Expr))
6512     return false;
6513 
6514   if (Expr->evaluateAsAbsolute(Imm))
6515     return true;
6516 
6517   if (Expected.empty()) {
6518     Error(S, "expected absolute expression");
6519   } else {
6520     Error(S, Twine("expected ", Expected) +
6521              Twine(" or an absolute expression"));
6522   }
6523   return false;
6524 }
6525 
6526 bool
6527 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6528   SMLoc S = getLoc();
6529 
6530   const MCExpr *Expr;
6531   if (Parser.parseExpression(Expr))
6532     return false;
6533 
6534   int64_t IntVal;
6535   if (Expr->evaluateAsAbsolute(IntVal)) {
6536     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6537   } else {
6538     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6539   }
6540   return true;
6541 }
6542 
6543 bool
6544 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6545   if (isToken(AsmToken::String)) {
6546     Val = getToken().getStringContents();
6547     lex();
6548     return true;
6549   } else {
6550     Error(getLoc(), ErrMsg);
6551     return false;
6552   }
6553 }
6554 
6555 bool
6556 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6557   if (isToken(AsmToken::Identifier)) {
6558     Val = getTokenStr();
6559     lex();
6560     return true;
6561   } else {
6562     if (!ErrMsg.empty())
6563       Error(getLoc(), ErrMsg);
6564     return false;
6565   }
6566 }
6567 
6568 AsmToken
6569 AMDGPUAsmParser::getToken() const {
6570   return Parser.getTok();
6571 }
6572 
6573 AsmToken
6574 AMDGPUAsmParser::peekToken() {
6575   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6576 }
6577 
6578 void
6579 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6580   auto TokCount = getLexer().peekTokens(Tokens);
6581 
6582   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6583     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6584 }
6585 
6586 AsmToken::TokenKind
6587 AMDGPUAsmParser::getTokenKind() const {
6588   return getLexer().getKind();
6589 }
6590 
6591 SMLoc
6592 AMDGPUAsmParser::getLoc() const {
6593   return getToken().getLoc();
6594 }
6595 
6596 StringRef
6597 AMDGPUAsmParser::getTokenStr() const {
6598   return getToken().getString();
6599 }
6600 
6601 void
6602 AMDGPUAsmParser::lex() {
6603   Parser.Lex();
6604 }
6605 
6606 SMLoc
6607 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6608                                const OperandVector &Operands) const {
6609   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6610     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6611     if (Test(Op))
6612       return Op.getStartLoc();
6613   }
6614   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6615 }
6616 
6617 SMLoc
6618 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6619                            const OperandVector &Operands) const {
6620   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6621   return getOperandLoc(Test, Operands);
6622 }
6623 
6624 SMLoc
6625 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6626                            const OperandVector &Operands) const {
6627   auto Test = [=](const AMDGPUOperand& Op) {
6628     return Op.isRegKind() && Op.getReg() == Reg;
6629   };
6630   return getOperandLoc(Test, Operands);
6631 }
6632 
6633 SMLoc
6634 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6635   auto Test = [](const AMDGPUOperand& Op) {
6636     return Op.IsImmKindLiteral() || Op.isExpr();
6637   };
6638   return getOperandLoc(Test, Operands);
6639 }
6640 
6641 SMLoc
6642 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6643   auto Test = [](const AMDGPUOperand& Op) {
6644     return Op.isImmKindConst();
6645   };
6646   return getOperandLoc(Test, Operands);
6647 }
6648 
6649 //===----------------------------------------------------------------------===//
6650 // swizzle
6651 //===----------------------------------------------------------------------===//
6652 
6653 LLVM_READNONE
6654 static unsigned
6655 encodeBitmaskPerm(const unsigned AndMask,
6656                   const unsigned OrMask,
6657                   const unsigned XorMask) {
6658   using namespace llvm::AMDGPU::Swizzle;
6659 
6660   return BITMASK_PERM_ENC |
6661          (AndMask << BITMASK_AND_SHIFT) |
6662          (OrMask  << BITMASK_OR_SHIFT)  |
6663          (XorMask << BITMASK_XOR_SHIFT);
6664 }
6665 
6666 bool
6667 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6668                                      const unsigned MinVal,
6669                                      const unsigned MaxVal,
6670                                      const StringRef ErrMsg,
6671                                      SMLoc &Loc) {
6672   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6673     return false;
6674   }
6675   Loc = getLoc();
6676   if (!parseExpr(Op)) {
6677     return false;
6678   }
6679   if (Op < MinVal || Op > MaxVal) {
6680     Error(Loc, ErrMsg);
6681     return false;
6682   }
6683 
6684   return true;
6685 }
6686 
6687 bool
6688 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6689                                       const unsigned MinVal,
6690                                       const unsigned MaxVal,
6691                                       const StringRef ErrMsg) {
6692   SMLoc Loc;
6693   for (unsigned i = 0; i < OpNum; ++i) {
6694     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6695       return false;
6696   }
6697 
6698   return true;
6699 }
6700 
6701 bool
6702 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6703   using namespace llvm::AMDGPU::Swizzle;
6704 
6705   int64_t Lane[LANE_NUM];
6706   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6707                            "expected a 2-bit lane id")) {
6708     Imm = QUAD_PERM_ENC;
6709     for (unsigned I = 0; I < LANE_NUM; ++I) {
6710       Imm |= Lane[I] << (LANE_SHIFT * I);
6711     }
6712     return true;
6713   }
6714   return false;
6715 }
6716 
6717 bool
6718 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6719   using namespace llvm::AMDGPU::Swizzle;
6720 
6721   SMLoc Loc;
6722   int64_t GroupSize;
6723   int64_t LaneIdx;
6724 
6725   if (!parseSwizzleOperand(GroupSize,
6726                            2, 32,
6727                            "group size must be in the interval [2,32]",
6728                            Loc)) {
6729     return false;
6730   }
6731   if (!isPowerOf2_64(GroupSize)) {
6732     Error(Loc, "group size must be a power of two");
6733     return false;
6734   }
6735   if (parseSwizzleOperand(LaneIdx,
6736                           0, GroupSize - 1,
6737                           "lane id must be in the interval [0,group size - 1]",
6738                           Loc)) {
6739     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6740     return true;
6741   }
6742   return false;
6743 }
6744 
6745 bool
6746 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6747   using namespace llvm::AMDGPU::Swizzle;
6748 
6749   SMLoc Loc;
6750   int64_t GroupSize;
6751 
6752   if (!parseSwizzleOperand(GroupSize,
6753                            2, 32,
6754                            "group size must be in the interval [2,32]",
6755                            Loc)) {
6756     return false;
6757   }
6758   if (!isPowerOf2_64(GroupSize)) {
6759     Error(Loc, "group size must be a power of two");
6760     return false;
6761   }
6762 
6763   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6764   return true;
6765 }
6766 
6767 bool
6768 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6769   using namespace llvm::AMDGPU::Swizzle;
6770 
6771   SMLoc Loc;
6772   int64_t GroupSize;
6773 
6774   if (!parseSwizzleOperand(GroupSize,
6775                            1, 16,
6776                            "group size must be in the interval [1,16]",
6777                            Loc)) {
6778     return false;
6779   }
6780   if (!isPowerOf2_64(GroupSize)) {
6781     Error(Loc, "group size must be a power of two");
6782     return false;
6783   }
6784 
6785   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6786   return true;
6787 }
6788 
6789 bool
6790 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6791   using namespace llvm::AMDGPU::Swizzle;
6792 
6793   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6794     return false;
6795   }
6796 
6797   StringRef Ctl;
6798   SMLoc StrLoc = getLoc();
6799   if (!parseString(Ctl)) {
6800     return false;
6801   }
6802   if (Ctl.size() != BITMASK_WIDTH) {
6803     Error(StrLoc, "expected a 5-character mask");
6804     return false;
6805   }
6806 
6807   unsigned AndMask = 0;
6808   unsigned OrMask = 0;
6809   unsigned XorMask = 0;
6810 
6811   for (size_t i = 0; i < Ctl.size(); ++i) {
6812     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6813     switch(Ctl[i]) {
6814     default:
6815       Error(StrLoc, "invalid mask");
6816       return false;
6817     case '0':
6818       break;
6819     case '1':
6820       OrMask |= Mask;
6821       break;
6822     case 'p':
6823       AndMask |= Mask;
6824       break;
6825     case 'i':
6826       AndMask |= Mask;
6827       XorMask |= Mask;
6828       break;
6829     }
6830   }
6831 
6832   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6833   return true;
6834 }
6835 
6836 bool
6837 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6838 
6839   SMLoc OffsetLoc = getLoc();
6840 
6841   if (!parseExpr(Imm, "a swizzle macro")) {
6842     return false;
6843   }
6844   if (!isUInt<16>(Imm)) {
6845     Error(OffsetLoc, "expected a 16-bit offset");
6846     return false;
6847   }
6848   return true;
6849 }
6850 
6851 bool
6852 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6853   using namespace llvm::AMDGPU::Swizzle;
6854 
6855   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6856 
6857     SMLoc ModeLoc = getLoc();
6858     bool Ok = false;
6859 
6860     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6861       Ok = parseSwizzleQuadPerm(Imm);
6862     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6863       Ok = parseSwizzleBitmaskPerm(Imm);
6864     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6865       Ok = parseSwizzleBroadcast(Imm);
6866     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6867       Ok = parseSwizzleSwap(Imm);
6868     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6869       Ok = parseSwizzleReverse(Imm);
6870     } else {
6871       Error(ModeLoc, "expected a swizzle mode");
6872     }
6873 
6874     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6875   }
6876 
6877   return false;
6878 }
6879 
6880 OperandMatchResultTy
6881 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6882   SMLoc S = getLoc();
6883   int64_t Imm = 0;
6884 
6885   if (trySkipId("offset")) {
6886 
6887     bool Ok = false;
6888     if (skipToken(AsmToken::Colon, "expected a colon")) {
6889       if (trySkipId("swizzle")) {
6890         Ok = parseSwizzleMacro(Imm);
6891       } else {
6892         Ok = parseSwizzleOffset(Imm);
6893       }
6894     }
6895 
6896     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6897 
6898     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6899   } else {
6900     // Swizzle "offset" operand is optional.
6901     // If it is omitted, try parsing other optional operands.
6902     return parseOptionalOpr(Operands);
6903   }
6904 }
6905 
6906 bool
6907 AMDGPUOperand::isSwizzle() const {
6908   return isImmTy(ImmTySwizzle);
6909 }
6910 
6911 //===----------------------------------------------------------------------===//
6912 // VGPR Index Mode
6913 //===----------------------------------------------------------------------===//
6914 
6915 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6916 
6917   using namespace llvm::AMDGPU::VGPRIndexMode;
6918 
6919   if (trySkipToken(AsmToken::RParen)) {
6920     return OFF;
6921   }
6922 
6923   int64_t Imm = 0;
6924 
6925   while (true) {
6926     unsigned Mode = 0;
6927     SMLoc S = getLoc();
6928 
6929     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6930       if (trySkipId(IdSymbolic[ModeId])) {
6931         Mode = 1 << ModeId;
6932         break;
6933       }
6934     }
6935 
6936     if (Mode == 0) {
6937       Error(S, (Imm == 0)?
6938                "expected a VGPR index mode or a closing parenthesis" :
6939                "expected a VGPR index mode");
6940       return UNDEF;
6941     }
6942 
6943     if (Imm & Mode) {
6944       Error(S, "duplicate VGPR index mode");
6945       return UNDEF;
6946     }
6947     Imm |= Mode;
6948 
6949     if (trySkipToken(AsmToken::RParen))
6950       break;
6951     if (!skipToken(AsmToken::Comma,
6952                    "expected a comma or a closing parenthesis"))
6953       return UNDEF;
6954   }
6955 
6956   return Imm;
6957 }
6958 
6959 OperandMatchResultTy
6960 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6961 
6962   using namespace llvm::AMDGPU::VGPRIndexMode;
6963 
6964   int64_t Imm = 0;
6965   SMLoc S = getLoc();
6966 
6967   if (trySkipId("gpr_idx", AsmToken::LParen)) {
6968     Imm = parseGPRIdxMacro();
6969     if (Imm == UNDEF)
6970       return MatchOperand_ParseFail;
6971   } else {
6972     if (getParser().parseAbsoluteExpression(Imm))
6973       return MatchOperand_ParseFail;
6974     if (Imm < 0 || !isUInt<4>(Imm)) {
6975       Error(S, "invalid immediate: only 4-bit values are legal");
6976       return MatchOperand_ParseFail;
6977     }
6978   }
6979 
6980   Operands.push_back(
6981       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6982   return MatchOperand_Success;
6983 }
6984 
6985 bool AMDGPUOperand::isGPRIdxMode() const {
6986   return isImmTy(ImmTyGprIdxMode);
6987 }
6988 
6989 //===----------------------------------------------------------------------===//
6990 // sopp branch targets
6991 //===----------------------------------------------------------------------===//
6992 
6993 OperandMatchResultTy
6994 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6995 
6996   // Make sure we are not parsing something
6997   // that looks like a label or an expression but is not.
6998   // This will improve error messages.
6999   if (isRegister() || isModifier())
7000     return MatchOperand_NoMatch;
7001 
7002   if (!parseExpr(Operands))
7003     return MatchOperand_ParseFail;
7004 
7005   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7006   assert(Opr.isImm() || Opr.isExpr());
7007   SMLoc Loc = Opr.getStartLoc();
7008 
7009   // Currently we do not support arbitrary expressions as branch targets.
7010   // Only labels and absolute expressions are accepted.
7011   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7012     Error(Loc, "expected an absolute expression or a label");
7013   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7014     Error(Loc, "expected a 16-bit signed jump offset");
7015   }
7016 
7017   return MatchOperand_Success;
7018 }
7019 
7020 //===----------------------------------------------------------------------===//
7021 // Boolean holding registers
7022 //===----------------------------------------------------------------------===//
7023 
7024 OperandMatchResultTy
7025 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7026   return parseReg(Operands);
7027 }
7028 
7029 //===----------------------------------------------------------------------===//
7030 // mubuf
7031 //===----------------------------------------------------------------------===//
7032 
7033 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7034   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7035 }
7036 
7037 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7038                                    const OperandVector &Operands,
7039                                    bool IsAtomic,
7040                                    bool IsLds) {
7041   bool IsLdsOpcode = IsLds;
7042   bool HasLdsModifier = false;
7043   OptionalImmIndexMap OptionalIdx;
7044   unsigned FirstOperandIdx = 1;
7045   bool IsAtomicReturn = false;
7046 
7047   if (IsAtomic) {
7048     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7049       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7050       if (!Op.isCPol())
7051         continue;
7052       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7053       break;
7054     }
7055 
7056     if (!IsAtomicReturn) {
7057       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7058       if (NewOpc != -1)
7059         Inst.setOpcode(NewOpc);
7060     }
7061 
7062     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7063                       SIInstrFlags::IsAtomicRet;
7064   }
7065 
7066   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7067     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7068 
7069     // Add the register arguments
7070     if (Op.isReg()) {
7071       Op.addRegOperands(Inst, 1);
7072       // Insert a tied src for atomic return dst.
7073       // This cannot be postponed as subsequent calls to
7074       // addImmOperands rely on correct number of MC operands.
7075       if (IsAtomicReturn && i == FirstOperandIdx)
7076         Op.addRegOperands(Inst, 1);
7077       continue;
7078     }
7079 
7080     // Handle the case where soffset is an immediate
7081     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7082       Op.addImmOperands(Inst, 1);
7083       continue;
7084     }
7085 
7086     HasLdsModifier |= Op.isLDS();
7087 
7088     // Handle tokens like 'offen' which are sometimes hard-coded into the
7089     // asm string.  There are no MCInst operands for these.
7090     if (Op.isToken()) {
7091       continue;
7092     }
7093     assert(Op.isImm());
7094 
7095     // Handle optional arguments
7096     OptionalIdx[Op.getImmTy()] = i;
7097   }
7098 
7099   // This is a workaround for an llvm quirk which may result in an
7100   // incorrect instruction selection. Lds and non-lds versions of
7101   // MUBUF instructions are identical except that lds versions
7102   // have mandatory 'lds' modifier. However this modifier follows
7103   // optional modifiers and llvm asm matcher regards this 'lds'
7104   // modifier as an optional one. As a result, an lds version
7105   // of opcode may be selected even if it has no 'lds' modifier.
7106   if (IsLdsOpcode && !HasLdsModifier) {
7107     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7108     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7109       Inst.setOpcode(NoLdsOpcode);
7110       IsLdsOpcode = false;
7111     }
7112   }
7113 
7114   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7115   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7116 
7117   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7118     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7119   }
7120   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7121 }
7122 
7123 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7124   OptionalImmIndexMap OptionalIdx;
7125 
7126   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7127     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7128 
7129     // Add the register arguments
7130     if (Op.isReg()) {
7131       Op.addRegOperands(Inst, 1);
7132       continue;
7133     }
7134 
7135     // Handle the case where soffset is an immediate
7136     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7137       Op.addImmOperands(Inst, 1);
7138       continue;
7139     }
7140 
7141     // Handle tokens like 'offen' which are sometimes hard-coded into the
7142     // asm string.  There are no MCInst operands for these.
7143     if (Op.isToken()) {
7144       continue;
7145     }
7146     assert(Op.isImm());
7147 
7148     // Handle optional arguments
7149     OptionalIdx[Op.getImmTy()] = i;
7150   }
7151 
7152   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7153                         AMDGPUOperand::ImmTyOffset);
7154   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7155   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7156   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7157   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7158 }
7159 
7160 //===----------------------------------------------------------------------===//
7161 // mimg
7162 //===----------------------------------------------------------------------===//
7163 
7164 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7165                               bool IsAtomic) {
7166   unsigned I = 1;
7167   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7168   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7169     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7170   }
7171 
7172   if (IsAtomic) {
7173     // Add src, same as dst
7174     assert(Desc.getNumDefs() == 1);
7175     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7176   }
7177 
7178   OptionalImmIndexMap OptionalIdx;
7179 
7180   for (unsigned E = Operands.size(); I != E; ++I) {
7181     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7182 
7183     // Add the register arguments
7184     if (Op.isReg()) {
7185       Op.addRegOperands(Inst, 1);
7186     } else if (Op.isImmModifier()) {
7187       OptionalIdx[Op.getImmTy()] = I;
7188     } else if (!Op.isToken()) {
7189       llvm_unreachable("unexpected operand type");
7190     }
7191   }
7192 
7193   bool IsGFX10Plus = isGFX10Plus();
7194 
7195   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7196   if (IsGFX10Plus)
7197     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7198   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7199   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7200   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7201   if (IsGFX10Plus)
7202     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7203   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7204     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7205   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7206   if (!IsGFX10Plus)
7207     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7208   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7209 }
7210 
7211 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7212   cvtMIMG(Inst, Operands, true);
7213 }
7214 
7215 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7216   OptionalImmIndexMap OptionalIdx;
7217   bool IsAtomicReturn = false;
7218 
7219   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7220     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7221     if (!Op.isCPol())
7222       continue;
7223     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7224     break;
7225   }
7226 
7227   if (!IsAtomicReturn) {
7228     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7229     if (NewOpc != -1)
7230       Inst.setOpcode(NewOpc);
7231   }
7232 
7233   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7234                     SIInstrFlags::IsAtomicRet;
7235 
7236   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7237     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7238 
7239     // Add the register arguments
7240     if (Op.isReg()) {
7241       Op.addRegOperands(Inst, 1);
7242       if (IsAtomicReturn && i == 1)
7243         Op.addRegOperands(Inst, 1);
7244       continue;
7245     }
7246 
7247     // Handle the case where soffset is an immediate
7248     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7249       Op.addImmOperands(Inst, 1);
7250       continue;
7251     }
7252 
7253     // Handle tokens like 'offen' which are sometimes hard-coded into the
7254     // asm string.  There are no MCInst operands for these.
7255     if (Op.isToken()) {
7256       continue;
7257     }
7258     assert(Op.isImm());
7259 
7260     // Handle optional arguments
7261     OptionalIdx[Op.getImmTy()] = i;
7262   }
7263 
7264   if ((int)Inst.getNumOperands() <=
7265       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7266     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7267   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7268 }
7269 
7270 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7271                                       const OperandVector &Operands) {
7272   for (unsigned I = 1; I < Operands.size(); ++I) {
7273     auto &Operand = (AMDGPUOperand &)*Operands[I];
7274     if (Operand.isReg())
7275       Operand.addRegOperands(Inst, 1);
7276   }
7277 
7278   Inst.addOperand(MCOperand::createImm(1)); // a16
7279 }
7280 
7281 //===----------------------------------------------------------------------===//
7282 // smrd
7283 //===----------------------------------------------------------------------===//
7284 
7285 bool AMDGPUOperand::isSMRDOffset8() const {
7286   return isImm() && isUInt<8>(getImm());
7287 }
7288 
7289 bool AMDGPUOperand::isSMEMOffset() const {
7290   return isImm(); // Offset range is checked later by validator.
7291 }
7292 
7293 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7294   // 32-bit literals are only supported on CI and we only want to use them
7295   // when the offset is > 8-bits.
7296   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7297 }
7298 
7299 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7300   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7301 }
7302 
7303 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7304   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7305 }
7306 
7307 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7308   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7309 }
7310 
7311 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7312   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7313 }
7314 
7315 //===----------------------------------------------------------------------===//
7316 // vop3
7317 //===----------------------------------------------------------------------===//
7318 
7319 static bool ConvertOmodMul(int64_t &Mul) {
7320   if (Mul != 1 && Mul != 2 && Mul != 4)
7321     return false;
7322 
7323   Mul >>= 1;
7324   return true;
7325 }
7326 
7327 static bool ConvertOmodDiv(int64_t &Div) {
7328   if (Div == 1) {
7329     Div = 0;
7330     return true;
7331   }
7332 
7333   if (Div == 2) {
7334     Div = 3;
7335     return true;
7336   }
7337 
7338   return false;
7339 }
7340 
7341 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7342 // This is intentional and ensures compatibility with sp3.
7343 // See bug 35397 for details.
7344 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7345   if (BoundCtrl == 0 || BoundCtrl == 1) {
7346     BoundCtrl = 1;
7347     return true;
7348   }
7349   return false;
7350 }
7351 
7352 // Note: the order in this table matches the order of operands in AsmString.
7353 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7354   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7355   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7356   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7357   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7358   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7359   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7360   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7361   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7362   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7363   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7364   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7365   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7366   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7367   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7368   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7369   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7370   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7371   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7372   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7373   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7374   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7375   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7376   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7377   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7378   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7379   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7380   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7381   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7382   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7383   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7384   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7385   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7386   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7387   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7388   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7389   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7390   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7391   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7392   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7393   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7394   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7395 };
7396 
7397 void AMDGPUAsmParser::onBeginOfFile() {
7398   if (!getParser().getStreamer().getTargetStreamer() ||
7399       getSTI().getTargetTriple().getArch() == Triple::r600)
7400     return;
7401 
7402   if (!getTargetStreamer().getTargetID())
7403     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7404 
7405   if (isHsaAbiVersion3Or4(&getSTI()))
7406     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7407 }
7408 
7409 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7410 
7411   OperandMatchResultTy res = parseOptionalOpr(Operands);
7412 
7413   // This is a hack to enable hardcoded mandatory operands which follow
7414   // optional operands.
7415   //
7416   // Current design assumes that all operands after the first optional operand
7417   // are also optional. However implementation of some instructions violates
7418   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7419   //
7420   // To alleviate this problem, we have to (implicitly) parse extra operands
7421   // to make sure autogenerated parser of custom operands never hit hardcoded
7422   // mandatory operands.
7423 
7424   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7425     if (res != MatchOperand_Success ||
7426         isToken(AsmToken::EndOfStatement))
7427       break;
7428 
7429     trySkipToken(AsmToken::Comma);
7430     res = parseOptionalOpr(Operands);
7431   }
7432 
7433   return res;
7434 }
7435 
7436 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7437   OperandMatchResultTy res;
7438   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7439     // try to parse any optional operand here
7440     if (Op.IsBit) {
7441       res = parseNamedBit(Op.Name, Operands, Op.Type);
7442     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7443       res = parseOModOperand(Operands);
7444     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7445                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7446                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7447       res = parseSDWASel(Operands, Op.Name, Op.Type);
7448     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7449       res = parseSDWADstUnused(Operands);
7450     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7451                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7452                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7453                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7454       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7455                                         Op.ConvertResult);
7456     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7457       res = parseDim(Operands);
7458     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7459       res = parseCPol(Operands);
7460     } else {
7461       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7462     }
7463     if (res != MatchOperand_NoMatch) {
7464       return res;
7465     }
7466   }
7467   return MatchOperand_NoMatch;
7468 }
7469 
7470 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7471   StringRef Name = getTokenStr();
7472   if (Name == "mul") {
7473     return parseIntWithPrefix("mul", Operands,
7474                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7475   }
7476 
7477   if (Name == "div") {
7478     return parseIntWithPrefix("div", Operands,
7479                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7480   }
7481 
7482   return MatchOperand_NoMatch;
7483 }
7484 
7485 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7486   cvtVOP3P(Inst, Operands);
7487 
7488   int Opc = Inst.getOpcode();
7489 
7490   int SrcNum;
7491   const int Ops[] = { AMDGPU::OpName::src0,
7492                       AMDGPU::OpName::src1,
7493                       AMDGPU::OpName::src2 };
7494   for (SrcNum = 0;
7495        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7496        ++SrcNum);
7497   assert(SrcNum > 0);
7498 
7499   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7500   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7501 
7502   if ((OpSel & (1 << SrcNum)) != 0) {
7503     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7504     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7505     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7506   }
7507 }
7508 
7509 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7510       // 1. This operand is input modifiers
7511   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7512       // 2. This is not last operand
7513       && Desc.NumOperands > (OpNum + 1)
7514       // 3. Next operand is register class
7515       && Desc.OpInfo[OpNum + 1].RegClass != -1
7516       // 4. Next register is not tied to any other operand
7517       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7518 }
7519 
7520 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7521 {
7522   OptionalImmIndexMap OptionalIdx;
7523   unsigned Opc = Inst.getOpcode();
7524 
7525   unsigned I = 1;
7526   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7527   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7528     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7529   }
7530 
7531   for (unsigned E = Operands.size(); I != E; ++I) {
7532     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7533     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7534       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7535     } else if (Op.isInterpSlot() ||
7536                Op.isInterpAttr() ||
7537                Op.isAttrChan()) {
7538       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7539     } else if (Op.isImmModifier()) {
7540       OptionalIdx[Op.getImmTy()] = I;
7541     } else {
7542       llvm_unreachable("unhandled operand type");
7543     }
7544   }
7545 
7546   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7547     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7548   }
7549 
7550   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7551     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7552   }
7553 
7554   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7555     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7556   }
7557 }
7558 
7559 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7560                               OptionalImmIndexMap &OptionalIdx) {
7561   unsigned Opc = Inst.getOpcode();
7562 
7563   unsigned I = 1;
7564   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7565   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7566     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7567   }
7568 
7569   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7570     // This instruction has src modifiers
7571     for (unsigned E = Operands.size(); I != E; ++I) {
7572       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7573       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7574         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7575       } else if (Op.isImmModifier()) {
7576         OptionalIdx[Op.getImmTy()] = I;
7577       } else if (Op.isRegOrImm()) {
7578         Op.addRegOrImmOperands(Inst, 1);
7579       } else {
7580         llvm_unreachable("unhandled operand type");
7581       }
7582     }
7583   } else {
7584     // No src modifiers
7585     for (unsigned E = Operands.size(); I != E; ++I) {
7586       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7587       if (Op.isMod()) {
7588         OptionalIdx[Op.getImmTy()] = I;
7589       } else {
7590         Op.addRegOrImmOperands(Inst, 1);
7591       }
7592     }
7593   }
7594 
7595   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7596     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7597   }
7598 
7599   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7600     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7601   }
7602 
7603   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7604   // it has src2 register operand that is tied to dst operand
7605   // we don't allow modifiers for this operand in assembler so src2_modifiers
7606   // should be 0.
7607   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7608       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7609       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7610       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7611       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7612       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7613       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7614       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7615       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7616       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7617       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7618     auto it = Inst.begin();
7619     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7620     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7621     ++it;
7622     // Copy the operand to ensure it's not invalidated when Inst grows.
7623     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7624   }
7625 }
7626 
7627 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7628   OptionalImmIndexMap OptionalIdx;
7629   cvtVOP3(Inst, Operands, OptionalIdx);
7630 }
7631 
7632 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7633                                OptionalImmIndexMap &OptIdx) {
7634   const int Opc = Inst.getOpcode();
7635   const MCInstrDesc &Desc = MII.get(Opc);
7636 
7637   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7638 
7639   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7640     assert(!IsPacked);
7641     Inst.addOperand(Inst.getOperand(0));
7642   }
7643 
7644   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7645   // instruction, and then figure out where to actually put the modifiers
7646 
7647   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7648   if (OpSelIdx != -1) {
7649     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7650   }
7651 
7652   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7653   if (OpSelHiIdx != -1) {
7654     int DefaultVal = IsPacked ? -1 : 0;
7655     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7656                           DefaultVal);
7657   }
7658 
7659   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7660   if (NegLoIdx != -1) {
7661     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7662     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7663   }
7664 
7665   const int Ops[] = { AMDGPU::OpName::src0,
7666                       AMDGPU::OpName::src1,
7667                       AMDGPU::OpName::src2 };
7668   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7669                          AMDGPU::OpName::src1_modifiers,
7670                          AMDGPU::OpName::src2_modifiers };
7671 
7672   unsigned OpSel = 0;
7673   unsigned OpSelHi = 0;
7674   unsigned NegLo = 0;
7675   unsigned NegHi = 0;
7676 
7677   if (OpSelIdx != -1)
7678     OpSel = Inst.getOperand(OpSelIdx).getImm();
7679 
7680   if (OpSelHiIdx != -1)
7681     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7682 
7683   if (NegLoIdx != -1) {
7684     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7685     NegLo = Inst.getOperand(NegLoIdx).getImm();
7686     NegHi = Inst.getOperand(NegHiIdx).getImm();
7687   }
7688 
7689   for (int J = 0; J < 3; ++J) {
7690     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7691     if (OpIdx == -1)
7692       break;
7693 
7694     uint32_t ModVal = 0;
7695 
7696     if ((OpSel & (1 << J)) != 0)
7697       ModVal |= SISrcMods::OP_SEL_0;
7698 
7699     if ((OpSelHi & (1 << J)) != 0)
7700       ModVal |= SISrcMods::OP_SEL_1;
7701 
7702     if ((NegLo & (1 << J)) != 0)
7703       ModVal |= SISrcMods::NEG;
7704 
7705     if ((NegHi & (1 << J)) != 0)
7706       ModVal |= SISrcMods::NEG_HI;
7707 
7708     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7709 
7710     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7711   }
7712 }
7713 
7714 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7715   OptionalImmIndexMap OptIdx;
7716   cvtVOP3(Inst, Operands, OptIdx);
7717   cvtVOP3P(Inst, Operands, OptIdx);
7718 }
7719 
7720 //===----------------------------------------------------------------------===//
7721 // dpp
7722 //===----------------------------------------------------------------------===//
7723 
7724 bool AMDGPUOperand::isDPP8() const {
7725   return isImmTy(ImmTyDPP8);
7726 }
7727 
7728 bool AMDGPUOperand::isDPPCtrl() const {
7729   using namespace AMDGPU::DPP;
7730 
7731   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7732   if (result) {
7733     int64_t Imm = getImm();
7734     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7735            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7736            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7737            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7738            (Imm == DppCtrl::WAVE_SHL1) ||
7739            (Imm == DppCtrl::WAVE_ROL1) ||
7740            (Imm == DppCtrl::WAVE_SHR1) ||
7741            (Imm == DppCtrl::WAVE_ROR1) ||
7742            (Imm == DppCtrl::ROW_MIRROR) ||
7743            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7744            (Imm == DppCtrl::BCAST15) ||
7745            (Imm == DppCtrl::BCAST31) ||
7746            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7747            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7748   }
7749   return false;
7750 }
7751 
7752 //===----------------------------------------------------------------------===//
7753 // mAI
7754 //===----------------------------------------------------------------------===//
7755 
7756 bool AMDGPUOperand::isBLGP() const {
7757   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7758 }
7759 
7760 bool AMDGPUOperand::isCBSZ() const {
7761   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7762 }
7763 
7764 bool AMDGPUOperand::isABID() const {
7765   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7766 }
7767 
7768 bool AMDGPUOperand::isS16Imm() const {
7769   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7770 }
7771 
7772 bool AMDGPUOperand::isU16Imm() const {
7773   return isImm() && isUInt<16>(getImm());
7774 }
7775 
7776 //===----------------------------------------------------------------------===//
7777 // dim
7778 //===----------------------------------------------------------------------===//
7779 
7780 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7781   // We want to allow "dim:1D" etc.,
7782   // but the initial 1 is tokenized as an integer.
7783   std::string Token;
7784   if (isToken(AsmToken::Integer)) {
7785     SMLoc Loc = getToken().getEndLoc();
7786     Token = std::string(getTokenStr());
7787     lex();
7788     if (getLoc() != Loc)
7789       return false;
7790   }
7791 
7792   StringRef Suffix;
7793   if (!parseId(Suffix))
7794     return false;
7795   Token += Suffix;
7796 
7797   StringRef DimId = Token;
7798   if (DimId.startswith("SQ_RSRC_IMG_"))
7799     DimId = DimId.drop_front(12);
7800 
7801   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7802   if (!DimInfo)
7803     return false;
7804 
7805   Encoding = DimInfo->Encoding;
7806   return true;
7807 }
7808 
7809 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7810   if (!isGFX10Plus())
7811     return MatchOperand_NoMatch;
7812 
7813   SMLoc S = getLoc();
7814 
7815   if (!trySkipId("dim", AsmToken::Colon))
7816     return MatchOperand_NoMatch;
7817 
7818   unsigned Encoding;
7819   SMLoc Loc = getLoc();
7820   if (!parseDimId(Encoding)) {
7821     Error(Loc, "invalid dim value");
7822     return MatchOperand_ParseFail;
7823   }
7824 
7825   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7826                                               AMDGPUOperand::ImmTyDim));
7827   return MatchOperand_Success;
7828 }
7829 
7830 //===----------------------------------------------------------------------===//
7831 // dpp
7832 //===----------------------------------------------------------------------===//
7833 
7834 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7835   SMLoc S = getLoc();
7836 
7837   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7838     return MatchOperand_NoMatch;
7839 
7840   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7841 
7842   int64_t Sels[8];
7843 
7844   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7845     return MatchOperand_ParseFail;
7846 
7847   for (size_t i = 0; i < 8; ++i) {
7848     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7849       return MatchOperand_ParseFail;
7850 
7851     SMLoc Loc = getLoc();
7852     if (getParser().parseAbsoluteExpression(Sels[i]))
7853       return MatchOperand_ParseFail;
7854     if (0 > Sels[i] || 7 < Sels[i]) {
7855       Error(Loc, "expected a 3-bit value");
7856       return MatchOperand_ParseFail;
7857     }
7858   }
7859 
7860   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7861     return MatchOperand_ParseFail;
7862 
7863   unsigned DPP8 = 0;
7864   for (size_t i = 0; i < 8; ++i)
7865     DPP8 |= (Sels[i] << (i * 3));
7866 
7867   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7868   return MatchOperand_Success;
7869 }
7870 
7871 bool
7872 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7873                                     const OperandVector &Operands) {
7874   if (Ctrl == "row_newbcast")
7875     return isGFX90A();
7876 
7877   if (Ctrl == "row_share" ||
7878       Ctrl == "row_xmask")
7879     return isGFX10Plus();
7880 
7881   if (Ctrl == "wave_shl" ||
7882       Ctrl == "wave_shr" ||
7883       Ctrl == "wave_rol" ||
7884       Ctrl == "wave_ror" ||
7885       Ctrl == "row_bcast")
7886     return isVI() || isGFX9();
7887 
7888   return Ctrl == "row_mirror" ||
7889          Ctrl == "row_half_mirror" ||
7890          Ctrl == "quad_perm" ||
7891          Ctrl == "row_shl" ||
7892          Ctrl == "row_shr" ||
7893          Ctrl == "row_ror";
7894 }
7895 
7896 int64_t
7897 AMDGPUAsmParser::parseDPPCtrlPerm() {
7898   // quad_perm:[%d,%d,%d,%d]
7899 
7900   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7901     return -1;
7902 
7903   int64_t Val = 0;
7904   for (int i = 0; i < 4; ++i) {
7905     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7906       return -1;
7907 
7908     int64_t Temp;
7909     SMLoc Loc = getLoc();
7910     if (getParser().parseAbsoluteExpression(Temp))
7911       return -1;
7912     if (Temp < 0 || Temp > 3) {
7913       Error(Loc, "expected a 2-bit value");
7914       return -1;
7915     }
7916 
7917     Val += (Temp << i * 2);
7918   }
7919 
7920   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7921     return -1;
7922 
7923   return Val;
7924 }
7925 
7926 int64_t
7927 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7928   using namespace AMDGPU::DPP;
7929 
7930   // sel:%d
7931 
7932   int64_t Val;
7933   SMLoc Loc = getLoc();
7934 
7935   if (getParser().parseAbsoluteExpression(Val))
7936     return -1;
7937 
7938   struct DppCtrlCheck {
7939     int64_t Ctrl;
7940     int Lo;
7941     int Hi;
7942   };
7943 
7944   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7945     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7946     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7947     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7948     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7949     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7950     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7951     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7952     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7953     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7954     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7955     .Default({-1, 0, 0});
7956 
7957   bool Valid;
7958   if (Check.Ctrl == -1) {
7959     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7960     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7961   } else {
7962     Valid = Check.Lo <= Val && Val <= Check.Hi;
7963     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7964   }
7965 
7966   if (!Valid) {
7967     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7968     return -1;
7969   }
7970 
7971   return Val;
7972 }
7973 
7974 OperandMatchResultTy
7975 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7976   using namespace AMDGPU::DPP;
7977 
7978   if (!isToken(AsmToken::Identifier) ||
7979       !isSupportedDPPCtrl(getTokenStr(), Operands))
7980     return MatchOperand_NoMatch;
7981 
7982   SMLoc S = getLoc();
7983   int64_t Val = -1;
7984   StringRef Ctrl;
7985 
7986   parseId(Ctrl);
7987 
7988   if (Ctrl == "row_mirror") {
7989     Val = DppCtrl::ROW_MIRROR;
7990   } else if (Ctrl == "row_half_mirror") {
7991     Val = DppCtrl::ROW_HALF_MIRROR;
7992   } else {
7993     if (skipToken(AsmToken::Colon, "expected a colon")) {
7994       if (Ctrl == "quad_perm") {
7995         Val = parseDPPCtrlPerm();
7996       } else {
7997         Val = parseDPPCtrlSel(Ctrl);
7998       }
7999     }
8000   }
8001 
8002   if (Val == -1)
8003     return MatchOperand_ParseFail;
8004 
8005   Operands.push_back(
8006     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8007   return MatchOperand_Success;
8008 }
8009 
8010 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8011   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8012 }
8013 
8014 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8015   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8016 }
8017 
8018 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8019   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8020 }
8021 
8022 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8023   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8024 }
8025 
8026 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8027   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8028 }
8029 
8030 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8031   OptionalImmIndexMap OptionalIdx;
8032 
8033   unsigned Opc = Inst.getOpcode();
8034   bool HasModifiers =
8035       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8036   unsigned I = 1;
8037   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8038   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8039     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8040   }
8041 
8042   int Fi = 0;
8043   for (unsigned E = Operands.size(); I != E; ++I) {
8044     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8045                                             MCOI::TIED_TO);
8046     if (TiedTo != -1) {
8047       assert((unsigned)TiedTo < Inst.getNumOperands());
8048       // handle tied old or src2 for MAC instructions
8049       Inst.addOperand(Inst.getOperand(TiedTo));
8050     }
8051     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8052     // Add the register arguments
8053     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8054       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8055       // Skip it.
8056       continue;
8057     }
8058 
8059     if (IsDPP8) {
8060       if (Op.isDPP8()) {
8061         Op.addImmOperands(Inst, 1);
8062       } else if (HasModifiers &&
8063                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8064         Op.addRegWithFPInputModsOperands(Inst, 2);
8065       } else if (Op.isFI()) {
8066         Fi = Op.getImm();
8067       } else if (Op.isReg()) {
8068         Op.addRegOperands(Inst, 1);
8069       } else {
8070         llvm_unreachable("Invalid operand type");
8071       }
8072     } else {
8073       if (HasModifiers &&
8074           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8075         Op.addRegWithFPInputModsOperands(Inst, 2);
8076       } else if (Op.isReg()) {
8077         Op.addRegOperands(Inst, 1);
8078       } else if (Op.isDPPCtrl()) {
8079         Op.addImmOperands(Inst, 1);
8080       } else if (Op.isImm()) {
8081         // Handle optional arguments
8082         OptionalIdx[Op.getImmTy()] = I;
8083       } else {
8084         llvm_unreachable("Invalid operand type");
8085       }
8086     }
8087   }
8088 
8089   if (IsDPP8) {
8090     using namespace llvm::AMDGPU::DPP;
8091     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8092   } else {
8093     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8094     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8095     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8096     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8097       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8098     }
8099   }
8100 }
8101 
8102 //===----------------------------------------------------------------------===//
8103 // sdwa
8104 //===----------------------------------------------------------------------===//
8105 
8106 OperandMatchResultTy
8107 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8108                               AMDGPUOperand::ImmTy Type) {
8109   using namespace llvm::AMDGPU::SDWA;
8110 
8111   SMLoc S = getLoc();
8112   StringRef Value;
8113   OperandMatchResultTy res;
8114 
8115   SMLoc StringLoc;
8116   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8117   if (res != MatchOperand_Success) {
8118     return res;
8119   }
8120 
8121   int64_t Int;
8122   Int = StringSwitch<int64_t>(Value)
8123         .Case("BYTE_0", SdwaSel::BYTE_0)
8124         .Case("BYTE_1", SdwaSel::BYTE_1)
8125         .Case("BYTE_2", SdwaSel::BYTE_2)
8126         .Case("BYTE_3", SdwaSel::BYTE_3)
8127         .Case("WORD_0", SdwaSel::WORD_0)
8128         .Case("WORD_1", SdwaSel::WORD_1)
8129         .Case("DWORD", SdwaSel::DWORD)
8130         .Default(0xffffffff);
8131 
8132   if (Int == 0xffffffff) {
8133     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8134     return MatchOperand_ParseFail;
8135   }
8136 
8137   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8138   return MatchOperand_Success;
8139 }
8140 
8141 OperandMatchResultTy
8142 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8143   using namespace llvm::AMDGPU::SDWA;
8144 
8145   SMLoc S = getLoc();
8146   StringRef Value;
8147   OperandMatchResultTy res;
8148 
8149   SMLoc StringLoc;
8150   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8151   if (res != MatchOperand_Success) {
8152     return res;
8153   }
8154 
8155   int64_t Int;
8156   Int = StringSwitch<int64_t>(Value)
8157         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8158         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8159         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8160         .Default(0xffffffff);
8161 
8162   if (Int == 0xffffffff) {
8163     Error(StringLoc, "invalid dst_unused value");
8164     return MatchOperand_ParseFail;
8165   }
8166 
8167   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8168   return MatchOperand_Success;
8169 }
8170 
8171 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8172   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8173 }
8174 
8175 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8176   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8177 }
8178 
8179 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8180   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8181 }
8182 
8183 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8184   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8185 }
8186 
8187 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8188   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8189 }
8190 
8191 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8192                               uint64_t BasicInstType,
8193                               bool SkipDstVcc,
8194                               bool SkipSrcVcc) {
8195   using namespace llvm::AMDGPU::SDWA;
8196 
8197   OptionalImmIndexMap OptionalIdx;
8198   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8199   bool SkippedVcc = false;
8200 
8201   unsigned I = 1;
8202   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8203   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8204     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8205   }
8206 
8207   for (unsigned E = Operands.size(); I != E; ++I) {
8208     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8209     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8210         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8211       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8212       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8213       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8214       // Skip VCC only if we didn't skip it on previous iteration.
8215       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8216       if (BasicInstType == SIInstrFlags::VOP2 &&
8217           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8218            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8219         SkippedVcc = true;
8220         continue;
8221       } else if (BasicInstType == SIInstrFlags::VOPC &&
8222                  Inst.getNumOperands() == 0) {
8223         SkippedVcc = true;
8224         continue;
8225       }
8226     }
8227     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8228       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8229     } else if (Op.isImm()) {
8230       // Handle optional arguments
8231       OptionalIdx[Op.getImmTy()] = I;
8232     } else {
8233       llvm_unreachable("Invalid operand type");
8234     }
8235     SkippedVcc = false;
8236   }
8237 
8238   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8239       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8240       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8241     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8242     switch (BasicInstType) {
8243     case SIInstrFlags::VOP1:
8244       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8245       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8246         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8247       }
8248       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8249       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8250       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8251       break;
8252 
8253     case SIInstrFlags::VOP2:
8254       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8255       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8256         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8257       }
8258       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8259       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8260       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8261       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8262       break;
8263 
8264     case SIInstrFlags::VOPC:
8265       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8266         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8267       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8268       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8269       break;
8270 
8271     default:
8272       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8273     }
8274   }
8275 
8276   // special case v_mac_{f16, f32}:
8277   // it has src2 register operand that is tied to dst operand
8278   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8279       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8280     auto it = Inst.begin();
8281     std::advance(
8282       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8283     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8284   }
8285 }
8286 
8287 //===----------------------------------------------------------------------===//
8288 // mAI
8289 //===----------------------------------------------------------------------===//
8290 
8291 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8292   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8293 }
8294 
8295 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8296   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8297 }
8298 
8299 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8300   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8301 }
8302 
8303 /// Force static initialization.
8304 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8305   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8306   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8307 }
8308 
8309 #define GET_REGISTER_MATCHER
8310 #define GET_MATCHER_IMPLEMENTATION
8311 #define GET_MNEMONIC_SPELL_CHECKER
8312 #define GET_MNEMONIC_CHECKER
8313 #include "AMDGPUGenAsmMatcher.inc"
8314 
8315 // This fuction should be defined after auto-generated include so that we have
8316 // MatchClassKind enum defined
8317 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8318                                                      unsigned Kind) {
8319   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8320   // But MatchInstructionImpl() expects to meet token and fails to validate
8321   // operand. This method checks if we are given immediate operand but expect to
8322   // get corresponding token.
8323   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8324   switch (Kind) {
8325   case MCK_addr64:
8326     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8327   case MCK_gds:
8328     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8329   case MCK_lds:
8330     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8331   case MCK_idxen:
8332     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8333   case MCK_offen:
8334     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8335   case MCK_SSrcB32:
8336     // When operands have expression values, they will return true for isToken,
8337     // because it is not possible to distinguish between a token and an
8338     // expression at parse time. MatchInstructionImpl() will always try to
8339     // match an operand as a token, when isToken returns true, and when the
8340     // name of the expression is not a valid token, the match will fail,
8341     // so we need to handle it here.
8342     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8343   case MCK_SSrcF32:
8344     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8345   case MCK_SoppBrTarget:
8346     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8347   case MCK_VReg32OrOff:
8348     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8349   case MCK_InterpSlot:
8350     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8351   case MCK_Attr:
8352     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8353   case MCK_AttrChan:
8354     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8355   case MCK_ImmSMEMOffset:
8356     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8357   case MCK_SReg_64:
8358   case MCK_SReg_64_XEXEC:
8359     // Null is defined as a 32-bit register but
8360     // it should also be enabled with 64-bit operands.
8361     // The following code enables it for SReg_64 operands
8362     // used as source and destination. Remaining source
8363     // operands are handled in isInlinableImm.
8364     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8365   default:
8366     return Match_InvalidOperand;
8367   }
8368 }
8369 
8370 //===----------------------------------------------------------------------===//
8371 // endpgm
8372 //===----------------------------------------------------------------------===//
8373 
8374 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8375   SMLoc S = getLoc();
8376   int64_t Imm = 0;
8377 
8378   if (!parseExpr(Imm)) {
8379     // The operand is optional, if not present default to 0
8380     Imm = 0;
8381   }
8382 
8383   if (!isUInt<16>(Imm)) {
8384     Error(S, "expected a 16-bit value");
8385     return MatchOperand_ParseFail;
8386   }
8387 
8388   Operands.push_back(
8389       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8390   return MatchOperand_Success;
8391 }
8392 
8393 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8394