1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/Support/AMDGPUMetadata.h"
32 #include "llvm/Support/AMDHSAKernelDescriptor.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/MachineValueType.h"
35 #include "llvm/Support/TargetParser.h"
36 #include "llvm/Support/TargetRegistry.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53   enum KindTy {
54     Token,
55     Immediate,
56     Register,
57     Expression
58   } Kind;
59 
60   SMLoc StartLoc, EndLoc;
61   const AMDGPUAsmParser *AsmParser;
62 
63 public:
64   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66 
67   using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69   struct Modifiers {
70     bool Abs = false;
71     bool Neg = false;
72     bool Sext = false;
73 
74     bool hasFPModifiers() const { return Abs || Neg; }
75     bool hasIntModifiers() const { return Sext; }
76     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78     int64_t getFPModifiersOperand() const {
79       int64_t Operand = 0;
80       Operand |= Abs ? SISrcMods::ABS : 0u;
81       Operand |= Neg ? SISrcMods::NEG : 0u;
82       return Operand;
83     }
84 
85     int64_t getIntModifiersOperand() const {
86       int64_t Operand = 0;
87       Operand |= Sext ? SISrcMods::SEXT : 0u;
88       return Operand;
89     }
90 
91     int64_t getModifiersOperand() const {
92       assert(!(hasFPModifiers() && hasIntModifiers())
93            && "fp and int modifiers should not be used simultaneously");
94       if (hasFPModifiers()) {
95         return getFPModifiersOperand();
96       } else if (hasIntModifiers()) {
97         return getIntModifiersOperand();
98       } else {
99         return 0;
100       }
101     }
102 
103     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104   };
105 
106   enum ImmTy {
107     ImmTyNone,
108     ImmTyGDS,
109     ImmTyLDS,
110     ImmTyOffen,
111     ImmTyIdxen,
112     ImmTyAddr64,
113     ImmTyOffset,
114     ImmTyInstOffset,
115     ImmTyOffset0,
116     ImmTyOffset1,
117     ImmTyCPol,
118     ImmTySWZ,
119     ImmTyTFE,
120     ImmTyD16,
121     ImmTyClampSI,
122     ImmTyOModSI,
123     ImmTyDPP8,
124     ImmTyDppCtrl,
125     ImmTyDppRowMask,
126     ImmTyDppBankMask,
127     ImmTyDppBoundCtrl,
128     ImmTyDppFi,
129     ImmTySdwaDstSel,
130     ImmTySdwaSrc0Sel,
131     ImmTySdwaSrc1Sel,
132     ImmTySdwaDstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTySwizzle,
155     ImmTyGprIdxMode,
156     ImmTyHigh,
157     ImmTyBLGP,
158     ImmTyCBSZ,
159     ImmTyABID,
160     ImmTyEndpgm,
161   };
162 
163   enum ImmKindTy {
164     ImmKindTyNone,
165     ImmKindTyLiteral,
166     ImmKindTyConst,
167   };
168 
169 private:
170   struct TokOp {
171     const char *Data;
172     unsigned Length;
173   };
174 
175   struct ImmOp {
176     int64_t Val;
177     ImmTy Type;
178     bool IsFPImm;
179     mutable ImmKindTy Kind;
180     Modifiers Mods;
181   };
182 
183   struct RegOp {
184     unsigned RegNo;
185     Modifiers Mods;
186   };
187 
188   union {
189     TokOp Tok;
190     ImmOp Imm;
191     RegOp Reg;
192     const MCExpr *Expr;
193   };
194 
195 public:
196   bool isToken() const override {
197     if (Kind == Token)
198       return true;
199 
200     // When parsing operands, we can't always tell if something was meant to be
201     // a token, like 'gds', or an expression that references a global variable.
202     // In this case, we assume the string is an expression, and if we need to
203     // interpret is a token, then we treat the symbol name as the token.
204     return isSymbolRefExpr();
205   }
206 
207   bool isSymbolRefExpr() const {
208     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209   }
210 
211   bool isImm() const override {
212     return Kind == Immediate;
213   }
214 
215   void setImmKindNone() const {
216     assert(isImm());
217     Imm.Kind = ImmKindTyNone;
218   }
219 
220   void setImmKindLiteral() const {
221     assert(isImm());
222     Imm.Kind = ImmKindTyLiteral;
223   }
224 
225   void setImmKindConst() const {
226     assert(isImm());
227     Imm.Kind = ImmKindTyConst;
228   }
229 
230   bool IsImmKindLiteral() const {
231     return isImm() && Imm.Kind == ImmKindTyLiteral;
232   }
233 
234   bool isImmKindConst() const {
235     return isImm() && Imm.Kind == ImmKindTyConst;
236   }
237 
238   bool isInlinableImm(MVT type) const;
239   bool isLiteralImm(MVT type) const;
240 
241   bool isRegKind() const {
242     return Kind == Register;
243   }
244 
245   bool isReg() const override {
246     return isRegKind() && !hasModifiers();
247   }
248 
249   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251   }
252 
253   bool isRegOrImmWithInt16InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255   }
256 
257   bool isRegOrImmWithInt32InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259   }
260 
261   bool isRegOrImmWithInt64InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263   }
264 
265   bool isRegOrImmWithFP16InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267   }
268 
269   bool isRegOrImmWithFP32InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271   }
272 
273   bool isRegOrImmWithFP64InputMods() const {
274     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275   }
276 
277   bool isVReg() const {
278     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279            isRegClass(AMDGPU::VReg_64RegClassID) ||
280            isRegClass(AMDGPU::VReg_96RegClassID) ||
281            isRegClass(AMDGPU::VReg_128RegClassID) ||
282            isRegClass(AMDGPU::VReg_160RegClassID) ||
283            isRegClass(AMDGPU::VReg_192RegClassID) ||
284            isRegClass(AMDGPU::VReg_256RegClassID) ||
285            isRegClass(AMDGPU::VReg_512RegClassID) ||
286            isRegClass(AMDGPU::VReg_1024RegClassID);
287   }
288 
289   bool isVReg32() const {
290     return isRegClass(AMDGPU::VGPR_32RegClassID);
291   }
292 
293   bool isVReg32OrOff() const {
294     return isOff() || isVReg32();
295   }
296 
297   bool isNull() const {
298     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299   }
300 
301   bool isVRegWithInputMods() const;
302 
303   bool isSDWAOperand(MVT type) const;
304   bool isSDWAFP16Operand() const;
305   bool isSDWAFP32Operand() const;
306   bool isSDWAInt16Operand() const;
307   bool isSDWAInt32Operand() const;
308 
309   bool isImmTy(ImmTy ImmT) const {
310     return isImm() && Imm.Type == ImmT;
311   }
312 
313   bool isImmModifier() const {
314     return isImm() && Imm.Type != ImmTyNone;
315   }
316 
317   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
318   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
319   bool isDMask() const { return isImmTy(ImmTyDMask); }
320   bool isDim() const { return isImmTy(ImmTyDim); }
321   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
322   bool isDA() const { return isImmTy(ImmTyDA); }
323   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
324   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
325   bool isLWE() const { return isImmTy(ImmTyLWE); }
326   bool isOff() const { return isImmTy(ImmTyOff); }
327   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
328   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
329   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
330   bool isOffen() const { return isImmTy(ImmTyOffen); }
331   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
332   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
333   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
334   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
335   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336 
337   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
338   bool isGDS() const { return isImmTy(ImmTyGDS); }
339   bool isLDS() const { return isImmTy(ImmTyLDS); }
340   bool isCPol() const { return isImmTy(ImmTyCPol); }
341   bool isSWZ() const { return isImmTy(ImmTySWZ); }
342   bool isTFE() const { return isImmTy(ImmTyTFE); }
343   bool isD16() const { return isImmTy(ImmTyD16); }
344   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
345   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
346   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
347   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
348   bool isFI() const { return isImmTy(ImmTyDppFi); }
349   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
350   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
351   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
352   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
353   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
354   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
355   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
356   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
357   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
358   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
359   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
360   bool isHigh() const { return isImmTy(ImmTyHigh); }
361 
362   bool isMod() const {
363     return isClampSI() || isOModSI();
364   }
365 
366   bool isRegOrImm() const {
367     return isReg() || isImm();
368   }
369 
370   bool isRegClass(unsigned RCID) const;
371 
372   bool isInlineValue() const;
373 
374   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
375     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
376   }
377 
378   bool isSCSrcB16() const {
379     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
380   }
381 
382   bool isSCSrcV2B16() const {
383     return isSCSrcB16();
384   }
385 
386   bool isSCSrcB32() const {
387     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
388   }
389 
390   bool isSCSrcB64() const {
391     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
392   }
393 
394   bool isBoolReg() const;
395 
396   bool isSCSrcF16() const {
397     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
398   }
399 
400   bool isSCSrcV2F16() const {
401     return isSCSrcF16();
402   }
403 
404   bool isSCSrcF32() const {
405     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
406   }
407 
408   bool isSCSrcF64() const {
409     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
410   }
411 
412   bool isSSrcB32() const {
413     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
414   }
415 
416   bool isSSrcB16() const {
417     return isSCSrcB16() || isLiteralImm(MVT::i16);
418   }
419 
420   bool isSSrcV2B16() const {
421     llvm_unreachable("cannot happen");
422     return isSSrcB16();
423   }
424 
425   bool isSSrcB64() const {
426     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
427     // See isVSrc64().
428     return isSCSrcB64() || isLiteralImm(MVT::i64);
429   }
430 
431   bool isSSrcF32() const {
432     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
433   }
434 
435   bool isSSrcF64() const {
436     return isSCSrcB64() || isLiteralImm(MVT::f64);
437   }
438 
439   bool isSSrcF16() const {
440     return isSCSrcB16() || isLiteralImm(MVT::f16);
441   }
442 
443   bool isSSrcV2F16() const {
444     llvm_unreachable("cannot happen");
445     return isSSrcF16();
446   }
447 
448   bool isSSrcV2FP32() const {
449     llvm_unreachable("cannot happen");
450     return isSSrcF32();
451   }
452 
453   bool isSCSrcV2FP32() const {
454     llvm_unreachable("cannot happen");
455     return isSCSrcF32();
456   }
457 
458   bool isSSrcV2INT32() const {
459     llvm_unreachable("cannot happen");
460     return isSSrcB32();
461   }
462 
463   bool isSCSrcV2INT32() const {
464     llvm_unreachable("cannot happen");
465     return isSCSrcB32();
466   }
467 
468   bool isSSrcOrLdsB32() const {
469     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
470            isLiteralImm(MVT::i32) || isExpr();
471   }
472 
473   bool isVCSrcB32() const {
474     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
475   }
476 
477   bool isVCSrcB64() const {
478     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
479   }
480 
481   bool isVCSrcB16() const {
482     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
483   }
484 
485   bool isVCSrcV2B16() const {
486     return isVCSrcB16();
487   }
488 
489   bool isVCSrcF32() const {
490     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
491   }
492 
493   bool isVCSrcF64() const {
494     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
495   }
496 
497   bool isVCSrcF16() const {
498     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
499   }
500 
501   bool isVCSrcV2F16() const {
502     return isVCSrcF16();
503   }
504 
505   bool isVSrcB32() const {
506     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
507   }
508 
509   bool isVSrcB64() const {
510     return isVCSrcF64() || isLiteralImm(MVT::i64);
511   }
512 
513   bool isVSrcB16() const {
514     return isVCSrcB16() || isLiteralImm(MVT::i16);
515   }
516 
517   bool isVSrcV2B16() const {
518     return isVSrcB16() || isLiteralImm(MVT::v2i16);
519   }
520 
521   bool isVCSrcV2FP32() const {
522     return isVCSrcF64();
523   }
524 
525   bool isVSrcV2FP32() const {
526     return isVSrcF64() || isLiteralImm(MVT::v2f32);
527   }
528 
529   bool isVCSrcV2INT32() const {
530     return isVCSrcB64();
531   }
532 
533   bool isVSrcV2INT32() const {
534     return isVSrcB64() || isLiteralImm(MVT::v2i32);
535   }
536 
537   bool isVSrcF32() const {
538     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
539   }
540 
541   bool isVSrcF64() const {
542     return isVCSrcF64() || isLiteralImm(MVT::f64);
543   }
544 
545   bool isVSrcF16() const {
546     return isVCSrcF16() || isLiteralImm(MVT::f16);
547   }
548 
549   bool isVSrcV2F16() const {
550     return isVSrcF16() || isLiteralImm(MVT::v2f16);
551   }
552 
553   bool isVISrcB32() const {
554     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
555   }
556 
557   bool isVISrcB16() const {
558     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
559   }
560 
561   bool isVISrcV2B16() const {
562     return isVISrcB16();
563   }
564 
565   bool isVISrcF32() const {
566     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
567   }
568 
569   bool isVISrcF16() const {
570     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
571   }
572 
573   bool isVISrcV2F16() const {
574     return isVISrcF16() || isVISrcB32();
575   }
576 
577   bool isVISrc_64B64() const {
578     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
579   }
580 
581   bool isVISrc_64F64() const {
582     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
583   }
584 
585   bool isVISrc_64V2FP32() const {
586     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
587   }
588 
589   bool isVISrc_64V2INT32() const {
590     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
591   }
592 
593   bool isVISrc_256B64() const {
594     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
595   }
596 
597   bool isVISrc_256F64() const {
598     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
599   }
600 
601   bool isVISrc_128B16() const {
602     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
603   }
604 
605   bool isVISrc_128V2B16() const {
606     return isVISrc_128B16();
607   }
608 
609   bool isVISrc_128B32() const {
610     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
611   }
612 
613   bool isVISrc_128F32() const {
614     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
615   }
616 
617   bool isVISrc_256V2FP32() const {
618     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
619   }
620 
621   bool isVISrc_256V2INT32() const {
622     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
623   }
624 
625   bool isVISrc_512B32() const {
626     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
627   }
628 
629   bool isVISrc_512B16() const {
630     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
631   }
632 
633   bool isVISrc_512V2B16() const {
634     return isVISrc_512B16();
635   }
636 
637   bool isVISrc_512F32() const {
638     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
639   }
640 
641   bool isVISrc_512F16() const {
642     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
643   }
644 
645   bool isVISrc_512V2F16() const {
646     return isVISrc_512F16() || isVISrc_512B32();
647   }
648 
649   bool isVISrc_1024B32() const {
650     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
651   }
652 
653   bool isVISrc_1024B16() const {
654     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
655   }
656 
657   bool isVISrc_1024V2B16() const {
658     return isVISrc_1024B16();
659   }
660 
661   bool isVISrc_1024F32() const {
662     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
663   }
664 
665   bool isVISrc_1024F16() const {
666     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
667   }
668 
669   bool isVISrc_1024V2F16() const {
670     return isVISrc_1024F16() || isVISrc_1024B32();
671   }
672 
673   bool isAISrcB32() const {
674     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
675   }
676 
677   bool isAISrcB16() const {
678     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
679   }
680 
681   bool isAISrcV2B16() const {
682     return isAISrcB16();
683   }
684 
685   bool isAISrcF32() const {
686     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
687   }
688 
689   bool isAISrcF16() const {
690     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
691   }
692 
693   bool isAISrcV2F16() const {
694     return isAISrcF16() || isAISrcB32();
695   }
696 
697   bool isAISrc_64B64() const {
698     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
699   }
700 
701   bool isAISrc_64F64() const {
702     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
703   }
704 
705   bool isAISrc_128B32() const {
706     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
707   }
708 
709   bool isAISrc_128B16() const {
710     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
711   }
712 
713   bool isAISrc_128V2B16() const {
714     return isAISrc_128B16();
715   }
716 
717   bool isAISrc_128F32() const {
718     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
719   }
720 
721   bool isAISrc_128F16() const {
722     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
723   }
724 
725   bool isAISrc_128V2F16() const {
726     return isAISrc_128F16() || isAISrc_128B32();
727   }
728 
729   bool isVISrc_128F16() const {
730     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
731   }
732 
733   bool isVISrc_128V2F16() const {
734     return isVISrc_128F16() || isVISrc_128B32();
735   }
736 
737   bool isAISrc_256B64() const {
738     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
739   }
740 
741   bool isAISrc_256F64() const {
742     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
743   }
744 
745   bool isAISrc_512B32() const {
746     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
747   }
748 
749   bool isAISrc_512B16() const {
750     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
751   }
752 
753   bool isAISrc_512V2B16() const {
754     return isAISrc_512B16();
755   }
756 
757   bool isAISrc_512F32() const {
758     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
759   }
760 
761   bool isAISrc_512F16() const {
762     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
763   }
764 
765   bool isAISrc_512V2F16() const {
766     return isAISrc_512F16() || isAISrc_512B32();
767   }
768 
769   bool isAISrc_1024B32() const {
770     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
771   }
772 
773   bool isAISrc_1024B16() const {
774     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
775   }
776 
777   bool isAISrc_1024V2B16() const {
778     return isAISrc_1024B16();
779   }
780 
781   bool isAISrc_1024F32() const {
782     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
783   }
784 
785   bool isAISrc_1024F16() const {
786     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
787   }
788 
789   bool isAISrc_1024V2F16() const {
790     return isAISrc_1024F16() || isAISrc_1024B32();
791   }
792 
793   bool isKImmFP32() const {
794     return isLiteralImm(MVT::f32);
795   }
796 
797   bool isKImmFP16() const {
798     return isLiteralImm(MVT::f16);
799   }
800 
801   bool isMem() const override {
802     return false;
803   }
804 
805   bool isExpr() const {
806     return Kind == Expression;
807   }
808 
809   bool isSoppBrTarget() const {
810     return isExpr() || isImm();
811   }
812 
813   bool isSWaitCnt() const;
814   bool isHwreg() const;
815   bool isSendMsg() const;
816   bool isSwizzle() const;
817   bool isSMRDOffset8() const;
818   bool isSMEMOffset() const;
819   bool isSMRDLiteralOffset() const;
820   bool isDPP8() const;
821   bool isDPPCtrl() const;
822   bool isBLGP() const;
823   bool isCBSZ() const;
824   bool isABID() const;
825   bool isGPRIdxMode() const;
826   bool isS16Imm() const;
827   bool isU16Imm() const;
828   bool isEndpgm() const;
829 
830   StringRef getExpressionAsToken() const {
831     assert(isExpr());
832     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
833     return S->getSymbol().getName();
834   }
835 
836   StringRef getToken() const {
837     assert(isToken());
838 
839     if (Kind == Expression)
840       return getExpressionAsToken();
841 
842     return StringRef(Tok.Data, Tok.Length);
843   }
844 
845   int64_t getImm() const {
846     assert(isImm());
847     return Imm.Val;
848   }
849 
850   void setImm(int64_t Val) {
851     assert(isImm());
852     Imm.Val = Val;
853   }
854 
855   ImmTy getImmTy() const {
856     assert(isImm());
857     return Imm.Type;
858   }
859 
860   unsigned getReg() const override {
861     assert(isRegKind());
862     return Reg.RegNo;
863   }
864 
865   SMLoc getStartLoc() const override {
866     return StartLoc;
867   }
868 
869   SMLoc getEndLoc() const override {
870     return EndLoc;
871   }
872 
873   SMRange getLocRange() const {
874     return SMRange(StartLoc, EndLoc);
875   }
876 
877   Modifiers getModifiers() const {
878     assert(isRegKind() || isImmTy(ImmTyNone));
879     return isRegKind() ? Reg.Mods : Imm.Mods;
880   }
881 
882   void setModifiers(Modifiers Mods) {
883     assert(isRegKind() || isImmTy(ImmTyNone));
884     if (isRegKind())
885       Reg.Mods = Mods;
886     else
887       Imm.Mods = Mods;
888   }
889 
890   bool hasModifiers() const {
891     return getModifiers().hasModifiers();
892   }
893 
894   bool hasFPModifiers() const {
895     return getModifiers().hasFPModifiers();
896   }
897 
898   bool hasIntModifiers() const {
899     return getModifiers().hasIntModifiers();
900   }
901 
902   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
903 
904   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
905 
906   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
907 
908   template <unsigned Bitwidth>
909   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
910 
911   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
912     addKImmFPOperands<16>(Inst, N);
913   }
914 
915   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
916     addKImmFPOperands<32>(Inst, N);
917   }
918 
919   void addRegOperands(MCInst &Inst, unsigned N) const;
920 
921   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
922     addRegOperands(Inst, N);
923   }
924 
925   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
926     if (isRegKind())
927       addRegOperands(Inst, N);
928     else if (isExpr())
929       Inst.addOperand(MCOperand::createExpr(Expr));
930     else
931       addImmOperands(Inst, N);
932   }
933 
934   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
935     Modifiers Mods = getModifiers();
936     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
937     if (isRegKind()) {
938       addRegOperands(Inst, N);
939     } else {
940       addImmOperands(Inst, N, false);
941     }
942   }
943 
944   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
945     assert(!hasIntModifiers());
946     addRegOrImmWithInputModsOperands(Inst, N);
947   }
948 
949   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
950     assert(!hasFPModifiers());
951     addRegOrImmWithInputModsOperands(Inst, N);
952   }
953 
954   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
955     Modifiers Mods = getModifiers();
956     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
957     assert(isRegKind());
958     addRegOperands(Inst, N);
959   }
960 
961   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
962     assert(!hasIntModifiers());
963     addRegWithInputModsOperands(Inst, N);
964   }
965 
966   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
967     assert(!hasFPModifiers());
968     addRegWithInputModsOperands(Inst, N);
969   }
970 
971   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
972     if (isImm())
973       addImmOperands(Inst, N);
974     else {
975       assert(isExpr());
976       Inst.addOperand(MCOperand::createExpr(Expr));
977     }
978   }
979 
980   static void printImmTy(raw_ostream& OS, ImmTy Type) {
981     switch (Type) {
982     case ImmTyNone: OS << "None"; break;
983     case ImmTyGDS: OS << "GDS"; break;
984     case ImmTyLDS: OS << "LDS"; break;
985     case ImmTyOffen: OS << "Offen"; break;
986     case ImmTyIdxen: OS << "Idxen"; break;
987     case ImmTyAddr64: OS << "Addr64"; break;
988     case ImmTyOffset: OS << "Offset"; break;
989     case ImmTyInstOffset: OS << "InstOffset"; break;
990     case ImmTyOffset0: OS << "Offset0"; break;
991     case ImmTyOffset1: OS << "Offset1"; break;
992     case ImmTyCPol: OS << "CPol"; break;
993     case ImmTySWZ: OS << "SWZ"; break;
994     case ImmTyTFE: OS << "TFE"; break;
995     case ImmTyD16: OS << "D16"; break;
996     case ImmTyFORMAT: OS << "FORMAT"; break;
997     case ImmTyClampSI: OS << "ClampSI"; break;
998     case ImmTyOModSI: OS << "OModSI"; break;
999     case ImmTyDPP8: OS << "DPP8"; break;
1000     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1001     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1002     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1003     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1004     case ImmTyDppFi: OS << "FI"; break;
1005     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1006     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1007     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1008     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1009     case ImmTyDMask: OS << "DMask"; break;
1010     case ImmTyDim: OS << "Dim"; break;
1011     case ImmTyUNorm: OS << "UNorm"; break;
1012     case ImmTyDA: OS << "DA"; break;
1013     case ImmTyR128A16: OS << "R128A16"; break;
1014     case ImmTyA16: OS << "A16"; break;
1015     case ImmTyLWE: OS << "LWE"; break;
1016     case ImmTyOff: OS << "Off"; break;
1017     case ImmTyExpTgt: OS << "ExpTgt"; break;
1018     case ImmTyExpCompr: OS << "ExpCompr"; break;
1019     case ImmTyExpVM: OS << "ExpVM"; break;
1020     case ImmTyHwreg: OS << "Hwreg"; break;
1021     case ImmTySendMsg: OS << "SendMsg"; break;
1022     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1023     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1024     case ImmTyAttrChan: OS << "AttrChan"; break;
1025     case ImmTyOpSel: OS << "OpSel"; break;
1026     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1027     case ImmTyNegLo: OS << "NegLo"; break;
1028     case ImmTyNegHi: OS << "NegHi"; break;
1029     case ImmTySwizzle: OS << "Swizzle"; break;
1030     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1031     case ImmTyHigh: OS << "High"; break;
1032     case ImmTyBLGP: OS << "BLGP"; break;
1033     case ImmTyCBSZ: OS << "CBSZ"; break;
1034     case ImmTyABID: OS << "ABID"; break;
1035     case ImmTyEndpgm: OS << "Endpgm"; break;
1036     }
1037   }
1038 
1039   void print(raw_ostream &OS) const override {
1040     switch (Kind) {
1041     case Register:
1042       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1043       break;
1044     case Immediate:
1045       OS << '<' << getImm();
1046       if (getImmTy() != ImmTyNone) {
1047         OS << " type: "; printImmTy(OS, getImmTy());
1048       }
1049       OS << " mods: " << Imm.Mods << '>';
1050       break;
1051     case Token:
1052       OS << '\'' << getToken() << '\'';
1053       break;
1054     case Expression:
1055       OS << "<expr " << *Expr << '>';
1056       break;
1057     }
1058   }
1059 
1060   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1061                                       int64_t Val, SMLoc Loc,
1062                                       ImmTy Type = ImmTyNone,
1063                                       bool IsFPImm = false) {
1064     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1065     Op->Imm.Val = Val;
1066     Op->Imm.IsFPImm = IsFPImm;
1067     Op->Imm.Kind = ImmKindTyNone;
1068     Op->Imm.Type = Type;
1069     Op->Imm.Mods = Modifiers();
1070     Op->StartLoc = Loc;
1071     Op->EndLoc = Loc;
1072     return Op;
1073   }
1074 
1075   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1076                                         StringRef Str, SMLoc Loc,
1077                                         bool HasExplicitEncodingSize = true) {
1078     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1079     Res->Tok.Data = Str.data();
1080     Res->Tok.Length = Str.size();
1081     Res->StartLoc = Loc;
1082     Res->EndLoc = Loc;
1083     return Res;
1084   }
1085 
1086   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1087                                       unsigned RegNo, SMLoc S,
1088                                       SMLoc E) {
1089     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1090     Op->Reg.RegNo = RegNo;
1091     Op->Reg.Mods = Modifiers();
1092     Op->StartLoc = S;
1093     Op->EndLoc = E;
1094     return Op;
1095   }
1096 
1097   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1098                                        const class MCExpr *Expr, SMLoc S) {
1099     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1100     Op->Expr = Expr;
1101     Op->StartLoc = S;
1102     Op->EndLoc = S;
1103     return Op;
1104   }
1105 };
1106 
1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1108   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1109   return OS;
1110 }
1111 
1112 //===----------------------------------------------------------------------===//
1113 // AsmParser
1114 //===----------------------------------------------------------------------===//
1115 
1116 // Holds info related to the current kernel, e.g. count of SGPRs used.
1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118 // .amdgpu_hsa_kernel or at EOF.
1119 class KernelScopeInfo {
1120   int SgprIndexUnusedMin = -1;
1121   int VgprIndexUnusedMin = -1;
1122   MCContext *Ctx = nullptr;
1123 
1124   void usesSgprAt(int i) {
1125     if (i >= SgprIndexUnusedMin) {
1126       SgprIndexUnusedMin = ++i;
1127       if (Ctx) {
1128         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1130       }
1131     }
1132   }
1133 
1134   void usesVgprAt(int i) {
1135     if (i >= VgprIndexUnusedMin) {
1136       VgprIndexUnusedMin = ++i;
1137       if (Ctx) {
1138         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1140       }
1141     }
1142   }
1143 
1144 public:
1145   KernelScopeInfo() = default;
1146 
1147   void initialize(MCContext &Context) {
1148     Ctx = &Context;
1149     usesSgprAt(SgprIndexUnusedMin = -1);
1150     usesVgprAt(VgprIndexUnusedMin = -1);
1151   }
1152 
1153   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1154     switch (RegKind) {
1155       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1156       case IS_AGPR: // fall through
1157       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1158       default: break;
1159     }
1160   }
1161 };
1162 
1163 class AMDGPUAsmParser : public MCTargetAsmParser {
1164   MCAsmParser &Parser;
1165 
1166   // Number of extra operands parsed after the first optional operand.
1167   // This may be necessary to skip hardcoded mandatory operands.
1168   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1169 
1170   unsigned ForcedEncodingSize = 0;
1171   bool ForcedDPP = false;
1172   bool ForcedSDWA = false;
1173   KernelScopeInfo KernelScope;
1174   unsigned CPolSeen;
1175 
1176   /// @name Auto-generated Match Functions
1177   /// {
1178 
1179 #define GET_ASSEMBLER_HEADER
1180 #include "AMDGPUGenAsmMatcher.inc"
1181 
1182   /// }
1183 
1184 private:
1185   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1186   bool OutOfRangeError(SMRange Range);
1187   /// Calculate VGPR/SGPR blocks required for given target, reserved
1188   /// registers, and user-specified NextFreeXGPR values.
1189   ///
1190   /// \param Features [in] Target features, used for bug corrections.
1191   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1192   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1193   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1194   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1195   /// descriptor field, if valid.
1196   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1197   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1198   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1199   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1200   /// \param VGPRBlocks [out] Result VGPR block count.
1201   /// \param SGPRBlocks [out] Result SGPR block count.
1202   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1203                           bool FlatScrUsed, bool XNACKUsed,
1204                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1205                           SMRange VGPRRange, unsigned NextFreeSGPR,
1206                           SMRange SGPRRange, unsigned &VGPRBlocks,
1207                           unsigned &SGPRBlocks);
1208   bool ParseDirectiveAMDGCNTarget();
1209   bool ParseDirectiveAMDHSAKernel();
1210   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1211   bool ParseDirectiveHSACodeObjectVersion();
1212   bool ParseDirectiveHSACodeObjectISA();
1213   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1214   bool ParseDirectiveAMDKernelCodeT();
1215   // TODO: Possibly make subtargetHasRegister const.
1216   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1217   bool ParseDirectiveAMDGPUHsaKernel();
1218 
1219   bool ParseDirectiveISAVersion();
1220   bool ParseDirectiveHSAMetadata();
1221   bool ParseDirectivePALMetadataBegin();
1222   bool ParseDirectivePALMetadata();
1223   bool ParseDirectiveAMDGPULDS();
1224 
1225   /// Common code to parse out a block of text (typically YAML) between start and
1226   /// end directives.
1227   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1228                            const char *AssemblerDirectiveEnd,
1229                            std::string &CollectString);
1230 
1231   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1232                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1233   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1234                            unsigned &RegNum, unsigned &RegWidth,
1235                            bool RestoreOnFailure = false);
1236   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1237                            unsigned &RegNum, unsigned &RegWidth,
1238                            SmallVectorImpl<AsmToken> &Tokens);
1239   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1240                            unsigned &RegWidth,
1241                            SmallVectorImpl<AsmToken> &Tokens);
1242   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1243                            unsigned &RegWidth,
1244                            SmallVectorImpl<AsmToken> &Tokens);
1245   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1246                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1247   bool ParseRegRange(unsigned& Num, unsigned& Width);
1248   unsigned getRegularReg(RegisterKind RegKind,
1249                          unsigned RegNum,
1250                          unsigned RegWidth,
1251                          SMLoc Loc);
1252 
1253   bool isRegister();
1254   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1255   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1256   void initializeGprCountSymbol(RegisterKind RegKind);
1257   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1258                              unsigned RegWidth);
1259   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1260                     bool IsAtomic, bool IsLds = false);
1261   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1262                  bool IsGdsHardcoded);
1263 
1264 public:
1265   enum AMDGPUMatchResultTy {
1266     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1267   };
1268   enum OperandMode {
1269     OperandMode_Default,
1270     OperandMode_NSA,
1271   };
1272 
1273   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1274 
1275   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1276                const MCInstrInfo &MII,
1277                const MCTargetOptions &Options)
1278       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1279     MCAsmParserExtension::Initialize(Parser);
1280 
1281     if (getFeatureBits().none()) {
1282       // Set default features.
1283       copySTI().ToggleFeature("southern-islands");
1284     }
1285 
1286     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1287 
1288     {
1289       // TODO: make those pre-defined variables read-only.
1290       // Currently there is none suitable machinery in the core llvm-mc for this.
1291       // MCSymbol::isRedefinable is intended for another purpose, and
1292       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1293       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1294       MCContext &Ctx = getContext();
1295       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1296         MCSymbol *Sym =
1297             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1298         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1299         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1300         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1301         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1302         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1303       } else {
1304         MCSymbol *Sym =
1305             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1306         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1308         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1310         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1311       }
1312       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1313         initializeGprCountSymbol(IS_VGPR);
1314         initializeGprCountSymbol(IS_SGPR);
1315       } else
1316         KernelScope.initialize(getContext());
1317     }
1318   }
1319 
1320   bool hasMIMG_R128() const {
1321     return AMDGPU::hasMIMG_R128(getSTI());
1322   }
1323 
1324   bool hasPackedD16() const {
1325     return AMDGPU::hasPackedD16(getSTI());
1326   }
1327 
1328   bool hasGFX10A16() const {
1329     return AMDGPU::hasGFX10A16(getSTI());
1330   }
1331 
1332   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1333 
1334   bool isSI() const {
1335     return AMDGPU::isSI(getSTI());
1336   }
1337 
1338   bool isCI() const {
1339     return AMDGPU::isCI(getSTI());
1340   }
1341 
1342   bool isVI() const {
1343     return AMDGPU::isVI(getSTI());
1344   }
1345 
1346   bool isGFX9() const {
1347     return AMDGPU::isGFX9(getSTI());
1348   }
1349 
1350   bool isGFX90A() const {
1351     return AMDGPU::isGFX90A(getSTI());
1352   }
1353 
1354   bool isGFX9Plus() const {
1355     return AMDGPU::isGFX9Plus(getSTI());
1356   }
1357 
1358   bool isGFX10() const {
1359     return AMDGPU::isGFX10(getSTI());
1360   }
1361 
1362   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1363 
1364   bool isGFX10_BEncoding() const {
1365     return AMDGPU::isGFX10_BEncoding(getSTI());
1366   }
1367 
1368   bool hasInv2PiInlineImm() const {
1369     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1370   }
1371 
1372   bool hasFlatOffsets() const {
1373     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1374   }
1375 
1376   bool hasArchitectedFlatScratch() const {
1377     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1378   }
1379 
1380   bool hasSGPR102_SGPR103() const {
1381     return !isVI() && !isGFX9();
1382   }
1383 
1384   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1385 
1386   bool hasIntClamp() const {
1387     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1388   }
1389 
1390   AMDGPUTargetStreamer &getTargetStreamer() {
1391     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1392     return static_cast<AMDGPUTargetStreamer &>(TS);
1393   }
1394 
1395   const MCRegisterInfo *getMRI() const {
1396     // We need this const_cast because for some reason getContext() is not const
1397     // in MCAsmParser.
1398     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1399   }
1400 
1401   const MCInstrInfo *getMII() const {
1402     return &MII;
1403   }
1404 
1405   const FeatureBitset &getFeatureBits() const {
1406     return getSTI().getFeatureBits();
1407   }
1408 
1409   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1410   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1411   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1412 
1413   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1414   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1415   bool isForcedDPP() const { return ForcedDPP; }
1416   bool isForcedSDWA() const { return ForcedSDWA; }
1417   ArrayRef<unsigned> getMatchedVariants() const;
1418   StringRef getMatchedVariantName() const;
1419 
1420   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1421   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1422                      bool RestoreOnFailure);
1423   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1424   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1425                                         SMLoc &EndLoc) override;
1426   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1427   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1428                                       unsigned Kind) override;
1429   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1430                                OperandVector &Operands, MCStreamer &Out,
1431                                uint64_t &ErrorInfo,
1432                                bool MatchingInlineAsm) override;
1433   bool ParseDirective(AsmToken DirectiveID) override;
1434   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1435                                     OperandMode Mode = OperandMode_Default);
1436   StringRef parseMnemonicSuffix(StringRef Name);
1437   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1438                         SMLoc NameLoc, OperandVector &Operands) override;
1439   //bool ProcessInstruction(MCInst &Inst);
1440 
1441   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1442 
1443   OperandMatchResultTy
1444   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1445                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1446                      bool (*ConvertResult)(int64_t &) = nullptr);
1447 
1448   OperandMatchResultTy
1449   parseOperandArrayWithPrefix(const char *Prefix,
1450                               OperandVector &Operands,
1451                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1452                               bool (*ConvertResult)(int64_t&) = nullptr);
1453 
1454   OperandMatchResultTy
1455   parseNamedBit(StringRef Name, OperandVector &Operands,
1456                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1457   OperandMatchResultTy parseCPol(OperandVector &Operands);
1458   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1459                                              StringRef &Value,
1460                                              SMLoc &StringLoc);
1461 
1462   bool isModifier();
1463   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1464   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1465   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1466   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1467   bool parseSP3NegModifier();
1468   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1469   OperandMatchResultTy parseReg(OperandVector &Operands);
1470   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1471   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1472   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1473   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1474   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1475   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1476   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1477   OperandMatchResultTy parseUfmt(int64_t &Format);
1478   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1479   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1480   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1481   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1482   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1483   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1484   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1485 
1486   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1487   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1488   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1489   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1490 
1491   bool parseCnt(int64_t &IntVal);
1492   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1493   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1494 
1495 private:
1496   struct OperandInfoTy {
1497     SMLoc Loc;
1498     int64_t Id;
1499     bool IsSymbolic = false;
1500     bool IsDefined = false;
1501 
1502     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1503   };
1504 
1505   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1506   bool validateSendMsg(const OperandInfoTy &Msg,
1507                        const OperandInfoTy &Op,
1508                        const OperandInfoTy &Stream);
1509 
1510   bool parseHwregBody(OperandInfoTy &HwReg,
1511                       OperandInfoTy &Offset,
1512                       OperandInfoTy &Width);
1513   bool validateHwreg(const OperandInfoTy &HwReg,
1514                      const OperandInfoTy &Offset,
1515                      const OperandInfoTy &Width);
1516 
1517   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1518   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1519 
1520   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1521                       const OperandVector &Operands) const;
1522   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1523   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1524   SMLoc getLitLoc(const OperandVector &Operands) const;
1525   SMLoc getConstLoc(const OperandVector &Operands) const;
1526 
1527   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1528   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1529   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1530   bool validateSOPLiteral(const MCInst &Inst) const;
1531   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1532   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1533   bool validateIntClampSupported(const MCInst &Inst);
1534   bool validateMIMGAtomicDMask(const MCInst &Inst);
1535   bool validateMIMGGatherDMask(const MCInst &Inst);
1536   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1537   bool validateMIMGDataSize(const MCInst &Inst);
1538   bool validateMIMGAddrSize(const MCInst &Inst);
1539   bool validateMIMGD16(const MCInst &Inst);
1540   bool validateMIMGDim(const MCInst &Inst);
1541   bool validateMIMGMSAA(const MCInst &Inst);
1542   bool validateOpSel(const MCInst &Inst);
1543   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1544   bool validateVccOperand(unsigned Reg) const;
1545   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1546   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1547   bool validateAGPRLdSt(const MCInst &Inst) const;
1548   bool validateVGPRAlign(const MCInst &Inst) const;
1549   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1550   bool validateDivScale(const MCInst &Inst);
1551   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1552                              const SMLoc &IDLoc);
1553   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1554   unsigned getConstantBusLimit(unsigned Opcode) const;
1555   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1556   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1557   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1558 
1559   bool isSupportedMnemo(StringRef Mnemo,
1560                         const FeatureBitset &FBS);
1561   bool isSupportedMnemo(StringRef Mnemo,
1562                         const FeatureBitset &FBS,
1563                         ArrayRef<unsigned> Variants);
1564   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1565 
1566   bool isId(const StringRef Id) const;
1567   bool isId(const AsmToken &Token, const StringRef Id) const;
1568   bool isToken(const AsmToken::TokenKind Kind) const;
1569   bool trySkipId(const StringRef Id);
1570   bool trySkipId(const StringRef Pref, const StringRef Id);
1571   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1572   bool trySkipToken(const AsmToken::TokenKind Kind);
1573   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1574   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1575   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1576 
1577   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1578   AsmToken::TokenKind getTokenKind() const;
1579   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1580   bool parseExpr(OperandVector &Operands);
1581   StringRef getTokenStr() const;
1582   AsmToken peekToken();
1583   AsmToken getToken() const;
1584   SMLoc getLoc() const;
1585   void lex();
1586 
1587 public:
1588   void onBeginOfFile() override;
1589 
1590   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1591   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1592 
1593   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1594   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1595   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1596   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1597   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1598   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1599 
1600   bool parseSwizzleOperand(int64_t &Op,
1601                            const unsigned MinVal,
1602                            const unsigned MaxVal,
1603                            const StringRef ErrMsg,
1604                            SMLoc &Loc);
1605   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1606                             const unsigned MinVal,
1607                             const unsigned MaxVal,
1608                             const StringRef ErrMsg);
1609   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1610   bool parseSwizzleOffset(int64_t &Imm);
1611   bool parseSwizzleMacro(int64_t &Imm);
1612   bool parseSwizzleQuadPerm(int64_t &Imm);
1613   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1614   bool parseSwizzleBroadcast(int64_t &Imm);
1615   bool parseSwizzleSwap(int64_t &Imm);
1616   bool parseSwizzleReverse(int64_t &Imm);
1617 
1618   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1619   int64_t parseGPRIdxMacro();
1620 
1621   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1622   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1623   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1624   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1625 
1626   AMDGPUOperand::Ptr defaultCPol() const;
1627 
1628   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1629   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1630   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1631   AMDGPUOperand::Ptr defaultFlatOffset() const;
1632 
1633   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1634 
1635   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1636                OptionalImmIndexMap &OptionalIdx);
1637   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1638   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1639   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1640   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1641                 OptionalImmIndexMap &OptionalIdx);
1642 
1643   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1644 
1645   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1646                bool IsAtomic = false);
1647   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1648   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1649 
1650   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1651 
1652   bool parseDimId(unsigned &Encoding);
1653   OperandMatchResultTy parseDim(OperandVector &Operands);
1654   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1655   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1656   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1657   int64_t parseDPPCtrlSel(StringRef Ctrl);
1658   int64_t parseDPPCtrlPerm();
1659   AMDGPUOperand::Ptr defaultRowMask() const;
1660   AMDGPUOperand::Ptr defaultBankMask() const;
1661   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1662   AMDGPUOperand::Ptr defaultFI() const;
1663   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1664   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1665 
1666   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1667                                     AMDGPUOperand::ImmTy Type);
1668   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1669   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1670   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1671   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1672   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1673   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1674   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1675                uint64_t BasicInstType,
1676                bool SkipDstVcc = false,
1677                bool SkipSrcVcc = false);
1678 
1679   AMDGPUOperand::Ptr defaultBLGP() const;
1680   AMDGPUOperand::Ptr defaultCBSZ() const;
1681   AMDGPUOperand::Ptr defaultABID() const;
1682 
1683   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1684   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1685 };
1686 
1687 struct OptionalOperand {
1688   const char *Name;
1689   AMDGPUOperand::ImmTy Type;
1690   bool IsBit;
1691   bool (*ConvertResult)(int64_t&);
1692 };
1693 
1694 } // end anonymous namespace
1695 
1696 // May be called with integer type with equivalent bitwidth.
1697 static const fltSemantics *getFltSemantics(unsigned Size) {
1698   switch (Size) {
1699   case 4:
1700     return &APFloat::IEEEsingle();
1701   case 8:
1702     return &APFloat::IEEEdouble();
1703   case 2:
1704     return &APFloat::IEEEhalf();
1705   default:
1706     llvm_unreachable("unsupported fp type");
1707   }
1708 }
1709 
1710 static const fltSemantics *getFltSemantics(MVT VT) {
1711   return getFltSemantics(VT.getSizeInBits() / 8);
1712 }
1713 
1714 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1715   switch (OperandType) {
1716   case AMDGPU::OPERAND_REG_IMM_INT32:
1717   case AMDGPU::OPERAND_REG_IMM_FP32:
1718   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1719   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1720   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1721   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1722   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1723   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1724   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1725   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1726     return &APFloat::IEEEsingle();
1727   case AMDGPU::OPERAND_REG_IMM_INT64:
1728   case AMDGPU::OPERAND_REG_IMM_FP64:
1729   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1730   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1731   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1732     return &APFloat::IEEEdouble();
1733   case AMDGPU::OPERAND_REG_IMM_INT16:
1734   case AMDGPU::OPERAND_REG_IMM_FP16:
1735   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1736   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1737   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1738   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1739   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1740   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1741   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1742   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1743   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1744   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1745     return &APFloat::IEEEhalf();
1746   default:
1747     llvm_unreachable("unsupported fp type");
1748   }
1749 }
1750 
1751 //===----------------------------------------------------------------------===//
1752 // Operand
1753 //===----------------------------------------------------------------------===//
1754 
1755 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1756   bool Lost;
1757 
1758   // Convert literal to single precision
1759   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1760                                                APFloat::rmNearestTiesToEven,
1761                                                &Lost);
1762   // We allow precision lost but not overflow or underflow
1763   if (Status != APFloat::opOK &&
1764       Lost &&
1765       ((Status & APFloat::opOverflow)  != 0 ||
1766        (Status & APFloat::opUnderflow) != 0)) {
1767     return false;
1768   }
1769 
1770   return true;
1771 }
1772 
1773 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1774   return isUIntN(Size, Val) || isIntN(Size, Val);
1775 }
1776 
1777 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1778   if (VT.getScalarType() == MVT::i16) {
1779     // FP immediate values are broken.
1780     return isInlinableIntLiteral(Val);
1781   }
1782 
1783   // f16/v2f16 operands work correctly for all values.
1784   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1785 }
1786 
1787 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1788 
1789   // This is a hack to enable named inline values like
1790   // shared_base with both 32-bit and 64-bit operands.
1791   // Note that these values are defined as
1792   // 32-bit operands only.
1793   if (isInlineValue()) {
1794     return true;
1795   }
1796 
1797   if (!isImmTy(ImmTyNone)) {
1798     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1799     return false;
1800   }
1801   // TODO: We should avoid using host float here. It would be better to
1802   // check the float bit values which is what a few other places do.
1803   // We've had bot failures before due to weird NaN support on mips hosts.
1804 
1805   APInt Literal(64, Imm.Val);
1806 
1807   if (Imm.IsFPImm) { // We got fp literal token
1808     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1809       return AMDGPU::isInlinableLiteral64(Imm.Val,
1810                                           AsmParser->hasInv2PiInlineImm());
1811     }
1812 
1813     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1814     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1815       return false;
1816 
1817     if (type.getScalarSizeInBits() == 16) {
1818       return isInlineableLiteralOp16(
1819         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1820         type, AsmParser->hasInv2PiInlineImm());
1821     }
1822 
1823     // Check if single precision literal is inlinable
1824     return AMDGPU::isInlinableLiteral32(
1825       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1826       AsmParser->hasInv2PiInlineImm());
1827   }
1828 
1829   // We got int literal token.
1830   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1831     return AMDGPU::isInlinableLiteral64(Imm.Val,
1832                                         AsmParser->hasInv2PiInlineImm());
1833   }
1834 
1835   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1836     return false;
1837   }
1838 
1839   if (type.getScalarSizeInBits() == 16) {
1840     return isInlineableLiteralOp16(
1841       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1842       type, AsmParser->hasInv2PiInlineImm());
1843   }
1844 
1845   return AMDGPU::isInlinableLiteral32(
1846     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1847     AsmParser->hasInv2PiInlineImm());
1848 }
1849 
1850 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1851   // Check that this immediate can be added as literal
1852   if (!isImmTy(ImmTyNone)) {
1853     return false;
1854   }
1855 
1856   if (!Imm.IsFPImm) {
1857     // We got int literal token.
1858 
1859     if (type == MVT::f64 && hasFPModifiers()) {
1860       // Cannot apply fp modifiers to int literals preserving the same semantics
1861       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1862       // disable these cases.
1863       return false;
1864     }
1865 
1866     unsigned Size = type.getSizeInBits();
1867     if (Size == 64)
1868       Size = 32;
1869 
1870     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1871     // types.
1872     return isSafeTruncation(Imm.Val, Size);
1873   }
1874 
1875   // We got fp literal token
1876   if (type == MVT::f64) { // Expected 64-bit fp operand
1877     // We would set low 64-bits of literal to zeroes but we accept this literals
1878     return true;
1879   }
1880 
1881   if (type == MVT::i64) { // Expected 64-bit int operand
1882     // We don't allow fp literals in 64-bit integer instructions. It is
1883     // unclear how we should encode them.
1884     return false;
1885   }
1886 
1887   // We allow fp literals with f16x2 operands assuming that the specified
1888   // literal goes into the lower half and the upper half is zero. We also
1889   // require that the literal may be losslesly converted to f16.
1890   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1891                      (type == MVT::v2i16)? MVT::i16 :
1892                      (type == MVT::v2f32)? MVT::f32 : type;
1893 
1894   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1895   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1896 }
1897 
1898 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1899   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1900 }
1901 
1902 bool AMDGPUOperand::isVRegWithInputMods() const {
1903   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1904          // GFX90A allows DPP on 64-bit operands.
1905          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1906           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1907 }
1908 
1909 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1910   if (AsmParser->isVI())
1911     return isVReg32();
1912   else if (AsmParser->isGFX9Plus())
1913     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1914   else
1915     return false;
1916 }
1917 
1918 bool AMDGPUOperand::isSDWAFP16Operand() const {
1919   return isSDWAOperand(MVT::f16);
1920 }
1921 
1922 bool AMDGPUOperand::isSDWAFP32Operand() const {
1923   return isSDWAOperand(MVT::f32);
1924 }
1925 
1926 bool AMDGPUOperand::isSDWAInt16Operand() const {
1927   return isSDWAOperand(MVT::i16);
1928 }
1929 
1930 bool AMDGPUOperand::isSDWAInt32Operand() const {
1931   return isSDWAOperand(MVT::i32);
1932 }
1933 
1934 bool AMDGPUOperand::isBoolReg() const {
1935   auto FB = AsmParser->getFeatureBits();
1936   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1937                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1938 }
1939 
1940 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1941 {
1942   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1943   assert(Size == 2 || Size == 4 || Size == 8);
1944 
1945   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1946 
1947   if (Imm.Mods.Abs) {
1948     Val &= ~FpSignMask;
1949   }
1950   if (Imm.Mods.Neg) {
1951     Val ^= FpSignMask;
1952   }
1953 
1954   return Val;
1955 }
1956 
1957 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1958   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1959                              Inst.getNumOperands())) {
1960     addLiteralImmOperand(Inst, Imm.Val,
1961                          ApplyModifiers &
1962                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1963   } else {
1964     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1965     Inst.addOperand(MCOperand::createImm(Imm.Val));
1966     setImmKindNone();
1967   }
1968 }
1969 
1970 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1971   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1972   auto OpNum = Inst.getNumOperands();
1973   // Check that this operand accepts literals
1974   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1975 
1976   if (ApplyModifiers) {
1977     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1978     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1979     Val = applyInputFPModifiers(Val, Size);
1980   }
1981 
1982   APInt Literal(64, Val);
1983   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1984 
1985   if (Imm.IsFPImm) { // We got fp literal token
1986     switch (OpTy) {
1987     case AMDGPU::OPERAND_REG_IMM_INT64:
1988     case AMDGPU::OPERAND_REG_IMM_FP64:
1989     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1990     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1991     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1992       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1993                                        AsmParser->hasInv2PiInlineImm())) {
1994         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1995         setImmKindConst();
1996         return;
1997       }
1998 
1999       // Non-inlineable
2000       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2001         // For fp operands we check if low 32 bits are zeros
2002         if (Literal.getLoBits(32) != 0) {
2003           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2004           "Can't encode literal as exact 64-bit floating-point operand. "
2005           "Low 32-bits will be set to zero");
2006         }
2007 
2008         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2009         setImmKindLiteral();
2010         return;
2011       }
2012 
2013       // We don't allow fp literals in 64-bit integer instructions. It is
2014       // unclear how we should encode them. This case should be checked earlier
2015       // in predicate methods (isLiteralImm())
2016       llvm_unreachable("fp literal in 64-bit integer instruction.");
2017 
2018     case AMDGPU::OPERAND_REG_IMM_INT32:
2019     case AMDGPU::OPERAND_REG_IMM_FP32:
2020     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2021     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2022     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2023     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2024     case AMDGPU::OPERAND_REG_IMM_INT16:
2025     case AMDGPU::OPERAND_REG_IMM_FP16:
2026     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2027     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2028     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2029     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2030     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2031     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2032     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2033     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2034     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2035     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2036     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2037     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2038     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2039     case AMDGPU::OPERAND_REG_IMM_V2INT32: {
2040       bool lost;
2041       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2042       // Convert literal to single precision
2043       FPLiteral.convert(*getOpFltSemantics(OpTy),
2044                         APFloat::rmNearestTiesToEven, &lost);
2045       // We allow precision lost but not overflow or underflow. This should be
2046       // checked earlier in isLiteralImm()
2047 
2048       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2049       Inst.addOperand(MCOperand::createImm(ImmVal));
2050       setImmKindLiteral();
2051       return;
2052     }
2053     default:
2054       llvm_unreachable("invalid operand size");
2055     }
2056 
2057     return;
2058   }
2059 
2060   // We got int literal token.
2061   // Only sign extend inline immediates.
2062   switch (OpTy) {
2063   case AMDGPU::OPERAND_REG_IMM_INT32:
2064   case AMDGPU::OPERAND_REG_IMM_FP32:
2065   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2066   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2067   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2068   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2069   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2070   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2071   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2072   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2073   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2074   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2075     if (isSafeTruncation(Val, 32) &&
2076         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2077                                      AsmParser->hasInv2PiInlineImm())) {
2078       Inst.addOperand(MCOperand::createImm(Val));
2079       setImmKindConst();
2080       return;
2081     }
2082 
2083     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2084     setImmKindLiteral();
2085     return;
2086 
2087   case AMDGPU::OPERAND_REG_IMM_INT64:
2088   case AMDGPU::OPERAND_REG_IMM_FP64:
2089   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2090   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2091   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2092     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2093       Inst.addOperand(MCOperand::createImm(Val));
2094       setImmKindConst();
2095       return;
2096     }
2097 
2098     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2099     setImmKindLiteral();
2100     return;
2101 
2102   case AMDGPU::OPERAND_REG_IMM_INT16:
2103   case AMDGPU::OPERAND_REG_IMM_FP16:
2104   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2105   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2106   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2107   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2108     if (isSafeTruncation(Val, 16) &&
2109         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2110                                      AsmParser->hasInv2PiInlineImm())) {
2111       Inst.addOperand(MCOperand::createImm(Val));
2112       setImmKindConst();
2113       return;
2114     }
2115 
2116     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2117     setImmKindLiteral();
2118     return;
2119 
2120   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2121   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2122   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2123   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2124     assert(isSafeTruncation(Val, 16));
2125     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2126                                         AsmParser->hasInv2PiInlineImm()));
2127 
2128     Inst.addOperand(MCOperand::createImm(Val));
2129     return;
2130   }
2131   default:
2132     llvm_unreachable("invalid operand size");
2133   }
2134 }
2135 
2136 template <unsigned Bitwidth>
2137 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2138   APInt Literal(64, Imm.Val);
2139   setImmKindNone();
2140 
2141   if (!Imm.IsFPImm) {
2142     // We got int literal token.
2143     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2144     return;
2145   }
2146 
2147   bool Lost;
2148   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2149   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2150                     APFloat::rmNearestTiesToEven, &Lost);
2151   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2152 }
2153 
2154 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2155   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2156 }
2157 
2158 static bool isInlineValue(unsigned Reg) {
2159   switch (Reg) {
2160   case AMDGPU::SRC_SHARED_BASE:
2161   case AMDGPU::SRC_SHARED_LIMIT:
2162   case AMDGPU::SRC_PRIVATE_BASE:
2163   case AMDGPU::SRC_PRIVATE_LIMIT:
2164   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2165     return true;
2166   case AMDGPU::SRC_VCCZ:
2167   case AMDGPU::SRC_EXECZ:
2168   case AMDGPU::SRC_SCC:
2169     return true;
2170   case AMDGPU::SGPR_NULL:
2171     return true;
2172   default:
2173     return false;
2174   }
2175 }
2176 
2177 bool AMDGPUOperand::isInlineValue() const {
2178   return isRegKind() && ::isInlineValue(getReg());
2179 }
2180 
2181 //===----------------------------------------------------------------------===//
2182 // AsmParser
2183 //===----------------------------------------------------------------------===//
2184 
2185 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2186   if (Is == IS_VGPR) {
2187     switch (RegWidth) {
2188       default: return -1;
2189       case 1: return AMDGPU::VGPR_32RegClassID;
2190       case 2: return AMDGPU::VReg_64RegClassID;
2191       case 3: return AMDGPU::VReg_96RegClassID;
2192       case 4: return AMDGPU::VReg_128RegClassID;
2193       case 5: return AMDGPU::VReg_160RegClassID;
2194       case 6: return AMDGPU::VReg_192RegClassID;
2195       case 7: return AMDGPU::VReg_224RegClassID;
2196       case 8: return AMDGPU::VReg_256RegClassID;
2197       case 16: return AMDGPU::VReg_512RegClassID;
2198       case 32: return AMDGPU::VReg_1024RegClassID;
2199     }
2200   } else if (Is == IS_TTMP) {
2201     switch (RegWidth) {
2202       default: return -1;
2203       case 1: return AMDGPU::TTMP_32RegClassID;
2204       case 2: return AMDGPU::TTMP_64RegClassID;
2205       case 4: return AMDGPU::TTMP_128RegClassID;
2206       case 8: return AMDGPU::TTMP_256RegClassID;
2207       case 16: return AMDGPU::TTMP_512RegClassID;
2208     }
2209   } else if (Is == IS_SGPR) {
2210     switch (RegWidth) {
2211       default: return -1;
2212       case 1: return AMDGPU::SGPR_32RegClassID;
2213       case 2: return AMDGPU::SGPR_64RegClassID;
2214       case 3: return AMDGPU::SGPR_96RegClassID;
2215       case 4: return AMDGPU::SGPR_128RegClassID;
2216       case 5: return AMDGPU::SGPR_160RegClassID;
2217       case 6: return AMDGPU::SGPR_192RegClassID;
2218       case 7: return AMDGPU::SGPR_224RegClassID;
2219       case 8: return AMDGPU::SGPR_256RegClassID;
2220       case 16: return AMDGPU::SGPR_512RegClassID;
2221     }
2222   } else if (Is == IS_AGPR) {
2223     switch (RegWidth) {
2224       default: return -1;
2225       case 1: return AMDGPU::AGPR_32RegClassID;
2226       case 2: return AMDGPU::AReg_64RegClassID;
2227       case 3: return AMDGPU::AReg_96RegClassID;
2228       case 4: return AMDGPU::AReg_128RegClassID;
2229       case 5: return AMDGPU::AReg_160RegClassID;
2230       case 6: return AMDGPU::AReg_192RegClassID;
2231       case 7: return AMDGPU::AReg_224RegClassID;
2232       case 8: return AMDGPU::AReg_256RegClassID;
2233       case 16: return AMDGPU::AReg_512RegClassID;
2234       case 32: return AMDGPU::AReg_1024RegClassID;
2235     }
2236   }
2237   return -1;
2238 }
2239 
2240 static unsigned getSpecialRegForName(StringRef RegName) {
2241   return StringSwitch<unsigned>(RegName)
2242     .Case("exec", AMDGPU::EXEC)
2243     .Case("vcc", AMDGPU::VCC)
2244     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2245     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2246     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2247     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2248     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2249     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2250     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2251     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2252     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2253     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2254     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2255     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2256     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2257     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2258     .Case("m0", AMDGPU::M0)
2259     .Case("vccz", AMDGPU::SRC_VCCZ)
2260     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2261     .Case("execz", AMDGPU::SRC_EXECZ)
2262     .Case("src_execz", AMDGPU::SRC_EXECZ)
2263     .Case("scc", AMDGPU::SRC_SCC)
2264     .Case("src_scc", AMDGPU::SRC_SCC)
2265     .Case("tba", AMDGPU::TBA)
2266     .Case("tma", AMDGPU::TMA)
2267     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2268     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2269     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2270     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2271     .Case("vcc_lo", AMDGPU::VCC_LO)
2272     .Case("vcc_hi", AMDGPU::VCC_HI)
2273     .Case("exec_lo", AMDGPU::EXEC_LO)
2274     .Case("exec_hi", AMDGPU::EXEC_HI)
2275     .Case("tma_lo", AMDGPU::TMA_LO)
2276     .Case("tma_hi", AMDGPU::TMA_HI)
2277     .Case("tba_lo", AMDGPU::TBA_LO)
2278     .Case("tba_hi", AMDGPU::TBA_HI)
2279     .Case("pc", AMDGPU::PC_REG)
2280     .Case("null", AMDGPU::SGPR_NULL)
2281     .Default(AMDGPU::NoRegister);
2282 }
2283 
2284 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2285                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2286   auto R = parseRegister();
2287   if (!R) return true;
2288   assert(R->isReg());
2289   RegNo = R->getReg();
2290   StartLoc = R->getStartLoc();
2291   EndLoc = R->getEndLoc();
2292   return false;
2293 }
2294 
2295 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2296                                     SMLoc &EndLoc) {
2297   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2298 }
2299 
2300 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2301                                                        SMLoc &StartLoc,
2302                                                        SMLoc &EndLoc) {
2303   bool Result =
2304       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2305   bool PendingErrors = getParser().hasPendingError();
2306   getParser().clearPendingErrors();
2307   if (PendingErrors)
2308     return MatchOperand_ParseFail;
2309   if (Result)
2310     return MatchOperand_NoMatch;
2311   return MatchOperand_Success;
2312 }
2313 
2314 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2315                                             RegisterKind RegKind, unsigned Reg1,
2316                                             SMLoc Loc) {
2317   switch (RegKind) {
2318   case IS_SPECIAL:
2319     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2320       Reg = AMDGPU::EXEC;
2321       RegWidth = 2;
2322       return true;
2323     }
2324     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2325       Reg = AMDGPU::FLAT_SCR;
2326       RegWidth = 2;
2327       return true;
2328     }
2329     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2330       Reg = AMDGPU::XNACK_MASK;
2331       RegWidth = 2;
2332       return true;
2333     }
2334     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2335       Reg = AMDGPU::VCC;
2336       RegWidth = 2;
2337       return true;
2338     }
2339     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2340       Reg = AMDGPU::TBA;
2341       RegWidth = 2;
2342       return true;
2343     }
2344     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2345       Reg = AMDGPU::TMA;
2346       RegWidth = 2;
2347       return true;
2348     }
2349     Error(Loc, "register does not fit in the list");
2350     return false;
2351   case IS_VGPR:
2352   case IS_SGPR:
2353   case IS_AGPR:
2354   case IS_TTMP:
2355     if (Reg1 != Reg + RegWidth) {
2356       Error(Loc, "registers in a list must have consecutive indices");
2357       return false;
2358     }
2359     RegWidth++;
2360     return true;
2361   default:
2362     llvm_unreachable("unexpected register kind");
2363   }
2364 }
2365 
2366 struct RegInfo {
2367   StringLiteral Name;
2368   RegisterKind Kind;
2369 };
2370 
2371 static constexpr RegInfo RegularRegisters[] = {
2372   {{"v"},    IS_VGPR},
2373   {{"s"},    IS_SGPR},
2374   {{"ttmp"}, IS_TTMP},
2375   {{"acc"},  IS_AGPR},
2376   {{"a"},    IS_AGPR},
2377 };
2378 
2379 static bool isRegularReg(RegisterKind Kind) {
2380   return Kind == IS_VGPR ||
2381          Kind == IS_SGPR ||
2382          Kind == IS_TTMP ||
2383          Kind == IS_AGPR;
2384 }
2385 
2386 static const RegInfo* getRegularRegInfo(StringRef Str) {
2387   for (const RegInfo &Reg : RegularRegisters)
2388     if (Str.startswith(Reg.Name))
2389       return &Reg;
2390   return nullptr;
2391 }
2392 
2393 static bool getRegNum(StringRef Str, unsigned& Num) {
2394   return !Str.getAsInteger(10, Num);
2395 }
2396 
2397 bool
2398 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2399                             const AsmToken &NextToken) const {
2400 
2401   // A list of consecutive registers: [s0,s1,s2,s3]
2402   if (Token.is(AsmToken::LBrac))
2403     return true;
2404 
2405   if (!Token.is(AsmToken::Identifier))
2406     return false;
2407 
2408   // A single register like s0 or a range of registers like s[0:1]
2409 
2410   StringRef Str = Token.getString();
2411   const RegInfo *Reg = getRegularRegInfo(Str);
2412   if (Reg) {
2413     StringRef RegName = Reg->Name;
2414     StringRef RegSuffix = Str.substr(RegName.size());
2415     if (!RegSuffix.empty()) {
2416       unsigned Num;
2417       // A single register with an index: rXX
2418       if (getRegNum(RegSuffix, Num))
2419         return true;
2420     } else {
2421       // A range of registers: r[XX:YY].
2422       if (NextToken.is(AsmToken::LBrac))
2423         return true;
2424     }
2425   }
2426 
2427   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2428 }
2429 
2430 bool
2431 AMDGPUAsmParser::isRegister()
2432 {
2433   return isRegister(getToken(), peekToken());
2434 }
2435 
2436 unsigned
2437 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2438                                unsigned RegNum,
2439                                unsigned RegWidth,
2440                                SMLoc Loc) {
2441 
2442   assert(isRegularReg(RegKind));
2443 
2444   unsigned AlignSize = 1;
2445   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2446     // SGPR and TTMP registers must be aligned.
2447     // Max required alignment is 4 dwords.
2448     AlignSize = std::min(RegWidth, 4u);
2449   }
2450 
2451   if (RegNum % AlignSize != 0) {
2452     Error(Loc, "invalid register alignment");
2453     return AMDGPU::NoRegister;
2454   }
2455 
2456   unsigned RegIdx = RegNum / AlignSize;
2457   int RCID = getRegClass(RegKind, RegWidth);
2458   if (RCID == -1) {
2459     Error(Loc, "invalid or unsupported register size");
2460     return AMDGPU::NoRegister;
2461   }
2462 
2463   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2464   const MCRegisterClass RC = TRI->getRegClass(RCID);
2465   if (RegIdx >= RC.getNumRegs()) {
2466     Error(Loc, "register index is out of range");
2467     return AMDGPU::NoRegister;
2468   }
2469 
2470   return RC.getRegister(RegIdx);
2471 }
2472 
2473 bool
2474 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2475   int64_t RegLo, RegHi;
2476   if (!skipToken(AsmToken::LBrac, "missing register index"))
2477     return false;
2478 
2479   SMLoc FirstIdxLoc = getLoc();
2480   SMLoc SecondIdxLoc;
2481 
2482   if (!parseExpr(RegLo))
2483     return false;
2484 
2485   if (trySkipToken(AsmToken::Colon)) {
2486     SecondIdxLoc = getLoc();
2487     if (!parseExpr(RegHi))
2488       return false;
2489   } else {
2490     RegHi = RegLo;
2491   }
2492 
2493   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2494     return false;
2495 
2496   if (!isUInt<32>(RegLo)) {
2497     Error(FirstIdxLoc, "invalid register index");
2498     return false;
2499   }
2500 
2501   if (!isUInt<32>(RegHi)) {
2502     Error(SecondIdxLoc, "invalid register index");
2503     return false;
2504   }
2505 
2506   if (RegLo > RegHi) {
2507     Error(FirstIdxLoc, "first register index should not exceed second index");
2508     return false;
2509   }
2510 
2511   Num = static_cast<unsigned>(RegLo);
2512   Width = (RegHi - RegLo) + 1;
2513   return true;
2514 }
2515 
2516 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2517                                           unsigned &RegNum, unsigned &RegWidth,
2518                                           SmallVectorImpl<AsmToken> &Tokens) {
2519   assert(isToken(AsmToken::Identifier));
2520   unsigned Reg = getSpecialRegForName(getTokenStr());
2521   if (Reg) {
2522     RegNum = 0;
2523     RegWidth = 1;
2524     RegKind = IS_SPECIAL;
2525     Tokens.push_back(getToken());
2526     lex(); // skip register name
2527   }
2528   return Reg;
2529 }
2530 
2531 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2532                                           unsigned &RegNum, unsigned &RegWidth,
2533                                           SmallVectorImpl<AsmToken> &Tokens) {
2534   assert(isToken(AsmToken::Identifier));
2535   StringRef RegName = getTokenStr();
2536   auto Loc = getLoc();
2537 
2538   const RegInfo *RI = getRegularRegInfo(RegName);
2539   if (!RI) {
2540     Error(Loc, "invalid register name");
2541     return AMDGPU::NoRegister;
2542   }
2543 
2544   Tokens.push_back(getToken());
2545   lex(); // skip register name
2546 
2547   RegKind = RI->Kind;
2548   StringRef RegSuffix = RegName.substr(RI->Name.size());
2549   if (!RegSuffix.empty()) {
2550     // Single 32-bit register: vXX.
2551     if (!getRegNum(RegSuffix, RegNum)) {
2552       Error(Loc, "invalid register index");
2553       return AMDGPU::NoRegister;
2554     }
2555     RegWidth = 1;
2556   } else {
2557     // Range of registers: v[XX:YY]. ":YY" is optional.
2558     if (!ParseRegRange(RegNum, RegWidth))
2559       return AMDGPU::NoRegister;
2560   }
2561 
2562   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2563 }
2564 
2565 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2566                                        unsigned &RegWidth,
2567                                        SmallVectorImpl<AsmToken> &Tokens) {
2568   unsigned Reg = AMDGPU::NoRegister;
2569   auto ListLoc = getLoc();
2570 
2571   if (!skipToken(AsmToken::LBrac,
2572                  "expected a register or a list of registers")) {
2573     return AMDGPU::NoRegister;
2574   }
2575 
2576   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2577 
2578   auto Loc = getLoc();
2579   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2580     return AMDGPU::NoRegister;
2581   if (RegWidth != 1) {
2582     Error(Loc, "expected a single 32-bit register");
2583     return AMDGPU::NoRegister;
2584   }
2585 
2586   for (; trySkipToken(AsmToken::Comma); ) {
2587     RegisterKind NextRegKind;
2588     unsigned NextReg, NextRegNum, NextRegWidth;
2589     Loc = getLoc();
2590 
2591     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2592                              NextRegNum, NextRegWidth,
2593                              Tokens)) {
2594       return AMDGPU::NoRegister;
2595     }
2596     if (NextRegWidth != 1) {
2597       Error(Loc, "expected a single 32-bit register");
2598       return AMDGPU::NoRegister;
2599     }
2600     if (NextRegKind != RegKind) {
2601       Error(Loc, "registers in a list must be of the same kind");
2602       return AMDGPU::NoRegister;
2603     }
2604     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2605       return AMDGPU::NoRegister;
2606   }
2607 
2608   if (!skipToken(AsmToken::RBrac,
2609                  "expected a comma or a closing square bracket")) {
2610     return AMDGPU::NoRegister;
2611   }
2612 
2613   if (isRegularReg(RegKind))
2614     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2615 
2616   return Reg;
2617 }
2618 
2619 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2620                                           unsigned &RegNum, unsigned &RegWidth,
2621                                           SmallVectorImpl<AsmToken> &Tokens) {
2622   auto Loc = getLoc();
2623   Reg = AMDGPU::NoRegister;
2624 
2625   if (isToken(AsmToken::Identifier)) {
2626     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2627     if (Reg == AMDGPU::NoRegister)
2628       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2629   } else {
2630     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2631   }
2632 
2633   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2634   if (Reg == AMDGPU::NoRegister) {
2635     assert(Parser.hasPendingError());
2636     return false;
2637   }
2638 
2639   if (!subtargetHasRegister(*TRI, Reg)) {
2640     if (Reg == AMDGPU::SGPR_NULL) {
2641       Error(Loc, "'null' operand is not supported on this GPU");
2642     } else {
2643       Error(Loc, "register not available on this GPU");
2644     }
2645     return false;
2646   }
2647 
2648   return true;
2649 }
2650 
2651 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2652                                           unsigned &RegNum, unsigned &RegWidth,
2653                                           bool RestoreOnFailure /*=false*/) {
2654   Reg = AMDGPU::NoRegister;
2655 
2656   SmallVector<AsmToken, 1> Tokens;
2657   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2658     if (RestoreOnFailure) {
2659       while (!Tokens.empty()) {
2660         getLexer().UnLex(Tokens.pop_back_val());
2661       }
2662     }
2663     return true;
2664   }
2665   return false;
2666 }
2667 
2668 Optional<StringRef>
2669 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2670   switch (RegKind) {
2671   case IS_VGPR:
2672     return StringRef(".amdgcn.next_free_vgpr");
2673   case IS_SGPR:
2674     return StringRef(".amdgcn.next_free_sgpr");
2675   default:
2676     return None;
2677   }
2678 }
2679 
2680 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2681   auto SymbolName = getGprCountSymbolName(RegKind);
2682   assert(SymbolName && "initializing invalid register kind");
2683   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2684   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2685 }
2686 
2687 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2688                                             unsigned DwordRegIndex,
2689                                             unsigned RegWidth) {
2690   // Symbols are only defined for GCN targets
2691   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2692     return true;
2693 
2694   auto SymbolName = getGprCountSymbolName(RegKind);
2695   if (!SymbolName)
2696     return true;
2697   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2698 
2699   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2700   int64_t OldCount;
2701 
2702   if (!Sym->isVariable())
2703     return !Error(getLoc(),
2704                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2705   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2706     return !Error(
2707         getLoc(),
2708         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2709 
2710   if (OldCount <= NewMax)
2711     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2712 
2713   return true;
2714 }
2715 
2716 std::unique_ptr<AMDGPUOperand>
2717 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2718   const auto &Tok = getToken();
2719   SMLoc StartLoc = Tok.getLoc();
2720   SMLoc EndLoc = Tok.getEndLoc();
2721   RegisterKind RegKind;
2722   unsigned Reg, RegNum, RegWidth;
2723 
2724   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2725     return nullptr;
2726   }
2727   if (isHsaAbiVersion3Or4(&getSTI())) {
2728     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2729       return nullptr;
2730   } else
2731     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2732   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2733 }
2734 
2735 OperandMatchResultTy
2736 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2737   // TODO: add syntactic sugar for 1/(2*PI)
2738 
2739   assert(!isRegister());
2740   assert(!isModifier());
2741 
2742   const auto& Tok = getToken();
2743   const auto& NextTok = peekToken();
2744   bool IsReal = Tok.is(AsmToken::Real);
2745   SMLoc S = getLoc();
2746   bool Negate = false;
2747 
2748   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2749     lex();
2750     IsReal = true;
2751     Negate = true;
2752   }
2753 
2754   if (IsReal) {
2755     // Floating-point expressions are not supported.
2756     // Can only allow floating-point literals with an
2757     // optional sign.
2758 
2759     StringRef Num = getTokenStr();
2760     lex();
2761 
2762     APFloat RealVal(APFloat::IEEEdouble());
2763     auto roundMode = APFloat::rmNearestTiesToEven;
2764     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2765       return MatchOperand_ParseFail;
2766     }
2767     if (Negate)
2768       RealVal.changeSign();
2769 
2770     Operands.push_back(
2771       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2772                                AMDGPUOperand::ImmTyNone, true));
2773 
2774     return MatchOperand_Success;
2775 
2776   } else {
2777     int64_t IntVal;
2778     const MCExpr *Expr;
2779     SMLoc S = getLoc();
2780 
2781     if (HasSP3AbsModifier) {
2782       // This is a workaround for handling expressions
2783       // as arguments of SP3 'abs' modifier, for example:
2784       //     |1.0|
2785       //     |-1|
2786       //     |1+x|
2787       // This syntax is not compatible with syntax of standard
2788       // MC expressions (due to the trailing '|').
2789       SMLoc EndLoc;
2790       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2791         return MatchOperand_ParseFail;
2792     } else {
2793       if (Parser.parseExpression(Expr))
2794         return MatchOperand_ParseFail;
2795     }
2796 
2797     if (Expr->evaluateAsAbsolute(IntVal)) {
2798       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2799     } else {
2800       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2801     }
2802 
2803     return MatchOperand_Success;
2804   }
2805 
2806   return MatchOperand_NoMatch;
2807 }
2808 
2809 OperandMatchResultTy
2810 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2811   if (!isRegister())
2812     return MatchOperand_NoMatch;
2813 
2814   if (auto R = parseRegister()) {
2815     assert(R->isReg());
2816     Operands.push_back(std::move(R));
2817     return MatchOperand_Success;
2818   }
2819   return MatchOperand_ParseFail;
2820 }
2821 
2822 OperandMatchResultTy
2823 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2824   auto res = parseReg(Operands);
2825   if (res != MatchOperand_NoMatch) {
2826     return res;
2827   } else if (isModifier()) {
2828     return MatchOperand_NoMatch;
2829   } else {
2830     return parseImm(Operands, HasSP3AbsMod);
2831   }
2832 }
2833 
2834 bool
2835 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2836   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2837     const auto &str = Token.getString();
2838     return str == "abs" || str == "neg" || str == "sext";
2839   }
2840   return false;
2841 }
2842 
2843 bool
2844 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2845   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2846 }
2847 
2848 bool
2849 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2850   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2851 }
2852 
2853 bool
2854 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2855   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2856 }
2857 
2858 // Check if this is an operand modifier or an opcode modifier
2859 // which may look like an expression but it is not. We should
2860 // avoid parsing these modifiers as expressions. Currently
2861 // recognized sequences are:
2862 //   |...|
2863 //   abs(...)
2864 //   neg(...)
2865 //   sext(...)
2866 //   -reg
2867 //   -|...|
2868 //   -abs(...)
2869 //   name:...
2870 // Note that simple opcode modifiers like 'gds' may be parsed as
2871 // expressions; this is a special case. See getExpressionAsToken.
2872 //
2873 bool
2874 AMDGPUAsmParser::isModifier() {
2875 
2876   AsmToken Tok = getToken();
2877   AsmToken NextToken[2];
2878   peekTokens(NextToken);
2879 
2880   return isOperandModifier(Tok, NextToken[0]) ||
2881          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2882          isOpcodeModifierWithVal(Tok, NextToken[0]);
2883 }
2884 
2885 // Check if the current token is an SP3 'neg' modifier.
2886 // Currently this modifier is allowed in the following context:
2887 //
2888 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2889 // 2. Before an 'abs' modifier: -abs(...)
2890 // 3. Before an SP3 'abs' modifier: -|...|
2891 //
2892 // In all other cases "-" is handled as a part
2893 // of an expression that follows the sign.
2894 //
2895 // Note: When "-" is followed by an integer literal,
2896 // this is interpreted as integer negation rather
2897 // than a floating-point NEG modifier applied to N.
2898 // Beside being contr-intuitive, such use of floating-point
2899 // NEG modifier would have resulted in different meaning
2900 // of integer literals used with VOP1/2/C and VOP3,
2901 // for example:
2902 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2903 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2904 // Negative fp literals with preceding "-" are
2905 // handled likewise for unifomtity
2906 //
2907 bool
2908 AMDGPUAsmParser::parseSP3NegModifier() {
2909 
2910   AsmToken NextToken[2];
2911   peekTokens(NextToken);
2912 
2913   if (isToken(AsmToken::Minus) &&
2914       (isRegister(NextToken[0], NextToken[1]) ||
2915        NextToken[0].is(AsmToken::Pipe) ||
2916        isId(NextToken[0], "abs"))) {
2917     lex();
2918     return true;
2919   }
2920 
2921   return false;
2922 }
2923 
2924 OperandMatchResultTy
2925 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2926                                               bool AllowImm) {
2927   bool Neg, SP3Neg;
2928   bool Abs, SP3Abs;
2929   SMLoc Loc;
2930 
2931   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2932   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2933     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2934     return MatchOperand_ParseFail;
2935   }
2936 
2937   SP3Neg = parseSP3NegModifier();
2938 
2939   Loc = getLoc();
2940   Neg = trySkipId("neg");
2941   if (Neg && SP3Neg) {
2942     Error(Loc, "expected register or immediate");
2943     return MatchOperand_ParseFail;
2944   }
2945   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2946     return MatchOperand_ParseFail;
2947 
2948   Abs = trySkipId("abs");
2949   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2950     return MatchOperand_ParseFail;
2951 
2952   Loc = getLoc();
2953   SP3Abs = trySkipToken(AsmToken::Pipe);
2954   if (Abs && SP3Abs) {
2955     Error(Loc, "expected register or immediate");
2956     return MatchOperand_ParseFail;
2957   }
2958 
2959   OperandMatchResultTy Res;
2960   if (AllowImm) {
2961     Res = parseRegOrImm(Operands, SP3Abs);
2962   } else {
2963     Res = parseReg(Operands);
2964   }
2965   if (Res != MatchOperand_Success) {
2966     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2967   }
2968 
2969   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2970     return MatchOperand_ParseFail;
2971   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2972     return MatchOperand_ParseFail;
2973   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2974     return MatchOperand_ParseFail;
2975 
2976   AMDGPUOperand::Modifiers Mods;
2977   Mods.Abs = Abs || SP3Abs;
2978   Mods.Neg = Neg || SP3Neg;
2979 
2980   if (Mods.hasFPModifiers()) {
2981     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2982     if (Op.isExpr()) {
2983       Error(Op.getStartLoc(), "expected an absolute expression");
2984       return MatchOperand_ParseFail;
2985     }
2986     Op.setModifiers(Mods);
2987   }
2988   return MatchOperand_Success;
2989 }
2990 
2991 OperandMatchResultTy
2992 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2993                                                bool AllowImm) {
2994   bool Sext = trySkipId("sext");
2995   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2996     return MatchOperand_ParseFail;
2997 
2998   OperandMatchResultTy Res;
2999   if (AllowImm) {
3000     Res = parseRegOrImm(Operands);
3001   } else {
3002     Res = parseReg(Operands);
3003   }
3004   if (Res != MatchOperand_Success) {
3005     return Sext? MatchOperand_ParseFail : Res;
3006   }
3007 
3008   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3009     return MatchOperand_ParseFail;
3010 
3011   AMDGPUOperand::Modifiers Mods;
3012   Mods.Sext = Sext;
3013 
3014   if (Mods.hasIntModifiers()) {
3015     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3016     if (Op.isExpr()) {
3017       Error(Op.getStartLoc(), "expected an absolute expression");
3018       return MatchOperand_ParseFail;
3019     }
3020     Op.setModifiers(Mods);
3021   }
3022 
3023   return MatchOperand_Success;
3024 }
3025 
3026 OperandMatchResultTy
3027 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3028   return parseRegOrImmWithFPInputMods(Operands, false);
3029 }
3030 
3031 OperandMatchResultTy
3032 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3033   return parseRegOrImmWithIntInputMods(Operands, false);
3034 }
3035 
3036 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3037   auto Loc = getLoc();
3038   if (trySkipId("off")) {
3039     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3040                                                 AMDGPUOperand::ImmTyOff, false));
3041     return MatchOperand_Success;
3042   }
3043 
3044   if (!isRegister())
3045     return MatchOperand_NoMatch;
3046 
3047   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3048   if (Reg) {
3049     Operands.push_back(std::move(Reg));
3050     return MatchOperand_Success;
3051   }
3052 
3053   return MatchOperand_ParseFail;
3054 
3055 }
3056 
3057 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3058   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3059 
3060   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3061       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3062       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3063       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3064     return Match_InvalidOperand;
3065 
3066   if ((TSFlags & SIInstrFlags::VOP3) &&
3067       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3068       getForcedEncodingSize() != 64)
3069     return Match_PreferE32;
3070 
3071   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3072       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3073     // v_mac_f32/16 allow only dst_sel == DWORD;
3074     auto OpNum =
3075         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3076     const auto &Op = Inst.getOperand(OpNum);
3077     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3078       return Match_InvalidOperand;
3079     }
3080   }
3081 
3082   return Match_Success;
3083 }
3084 
3085 static ArrayRef<unsigned> getAllVariants() {
3086   static const unsigned Variants[] = {
3087     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3088     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3089   };
3090 
3091   return makeArrayRef(Variants);
3092 }
3093 
3094 // What asm variants we should check
3095 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3096   if (getForcedEncodingSize() == 32) {
3097     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3098     return makeArrayRef(Variants);
3099   }
3100 
3101   if (isForcedVOP3()) {
3102     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3103     return makeArrayRef(Variants);
3104   }
3105 
3106   if (isForcedSDWA()) {
3107     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3108                                         AMDGPUAsmVariants::SDWA9};
3109     return makeArrayRef(Variants);
3110   }
3111 
3112   if (isForcedDPP()) {
3113     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3114     return makeArrayRef(Variants);
3115   }
3116 
3117   return getAllVariants();
3118 }
3119 
3120 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3121   if (getForcedEncodingSize() == 32)
3122     return "e32";
3123 
3124   if (isForcedVOP3())
3125     return "e64";
3126 
3127   if (isForcedSDWA())
3128     return "sdwa";
3129 
3130   if (isForcedDPP())
3131     return "dpp";
3132 
3133   return "";
3134 }
3135 
3136 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3137   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3138   const unsigned Num = Desc.getNumImplicitUses();
3139   for (unsigned i = 0; i < Num; ++i) {
3140     unsigned Reg = Desc.ImplicitUses[i];
3141     switch (Reg) {
3142     case AMDGPU::FLAT_SCR:
3143     case AMDGPU::VCC:
3144     case AMDGPU::VCC_LO:
3145     case AMDGPU::VCC_HI:
3146     case AMDGPU::M0:
3147       return Reg;
3148     default:
3149       break;
3150     }
3151   }
3152   return AMDGPU::NoRegister;
3153 }
3154 
3155 // NB: This code is correct only when used to check constant
3156 // bus limitations because GFX7 support no f16 inline constants.
3157 // Note that there are no cases when a GFX7 opcode violates
3158 // constant bus limitations due to the use of an f16 constant.
3159 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3160                                        unsigned OpIdx) const {
3161   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3162 
3163   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3164     return false;
3165   }
3166 
3167   const MCOperand &MO = Inst.getOperand(OpIdx);
3168 
3169   int64_t Val = MO.getImm();
3170   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3171 
3172   switch (OpSize) { // expected operand size
3173   case 8:
3174     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3175   case 4:
3176     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3177   case 2: {
3178     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3179     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3180         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3181         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3182       return AMDGPU::isInlinableIntLiteral(Val);
3183 
3184     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3185         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3186         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3187       return AMDGPU::isInlinableIntLiteralV216(Val);
3188 
3189     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3190         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3191         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3192       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3193 
3194     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3195   }
3196   default:
3197     llvm_unreachable("invalid operand size");
3198   }
3199 }
3200 
3201 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3202   if (!isGFX10Plus())
3203     return 1;
3204 
3205   switch (Opcode) {
3206   // 64-bit shift instructions can use only one scalar value input
3207   case AMDGPU::V_LSHLREV_B64_e64:
3208   case AMDGPU::V_LSHLREV_B64_gfx10:
3209   case AMDGPU::V_LSHRREV_B64_e64:
3210   case AMDGPU::V_LSHRREV_B64_gfx10:
3211   case AMDGPU::V_ASHRREV_I64_e64:
3212   case AMDGPU::V_ASHRREV_I64_gfx10:
3213   case AMDGPU::V_LSHL_B64_e64:
3214   case AMDGPU::V_LSHR_B64_e64:
3215   case AMDGPU::V_ASHR_I64_e64:
3216     return 1;
3217   default:
3218     return 2;
3219   }
3220 }
3221 
3222 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3223   const MCOperand &MO = Inst.getOperand(OpIdx);
3224   if (MO.isImm()) {
3225     return !isInlineConstant(Inst, OpIdx);
3226   } else if (MO.isReg()) {
3227     auto Reg = MO.getReg();
3228     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3229     auto PReg = mc2PseudoReg(Reg);
3230     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3231   } else {
3232     return true;
3233   }
3234 }
3235 
3236 bool
3237 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3238                                                 const OperandVector &Operands) {
3239   const unsigned Opcode = Inst.getOpcode();
3240   const MCInstrDesc &Desc = MII.get(Opcode);
3241   unsigned LastSGPR = AMDGPU::NoRegister;
3242   unsigned ConstantBusUseCount = 0;
3243   unsigned NumLiterals = 0;
3244   unsigned LiteralSize;
3245 
3246   if (Desc.TSFlags &
3247       (SIInstrFlags::VOPC |
3248        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3249        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3250        SIInstrFlags::SDWA)) {
3251     // Check special imm operands (used by madmk, etc)
3252     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3253       ++ConstantBusUseCount;
3254     }
3255 
3256     SmallDenseSet<unsigned> SGPRsUsed;
3257     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3258     if (SGPRUsed != AMDGPU::NoRegister) {
3259       SGPRsUsed.insert(SGPRUsed);
3260       ++ConstantBusUseCount;
3261     }
3262 
3263     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3264     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3265     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3266 
3267     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3268 
3269     for (int OpIdx : OpIndices) {
3270       if (OpIdx == -1) break;
3271 
3272       const MCOperand &MO = Inst.getOperand(OpIdx);
3273       if (usesConstantBus(Inst, OpIdx)) {
3274         if (MO.isReg()) {
3275           LastSGPR = mc2PseudoReg(MO.getReg());
3276           // Pairs of registers with a partial intersections like these
3277           //   s0, s[0:1]
3278           //   flat_scratch_lo, flat_scratch
3279           //   flat_scratch_lo, flat_scratch_hi
3280           // are theoretically valid but they are disabled anyway.
3281           // Note that this code mimics SIInstrInfo::verifyInstruction
3282           if (!SGPRsUsed.count(LastSGPR)) {
3283             SGPRsUsed.insert(LastSGPR);
3284             ++ConstantBusUseCount;
3285           }
3286         } else { // Expression or a literal
3287 
3288           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3289             continue; // special operand like VINTERP attr_chan
3290 
3291           // An instruction may use only one literal.
3292           // This has been validated on the previous step.
3293           // See validateVOP3Literal.
3294           // This literal may be used as more than one operand.
3295           // If all these operands are of the same size,
3296           // this literal counts as one scalar value.
3297           // Otherwise it counts as 2 scalar values.
3298           // See "GFX10 Shader Programming", section 3.6.2.3.
3299 
3300           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3301           if (Size < 4) Size = 4;
3302 
3303           if (NumLiterals == 0) {
3304             NumLiterals = 1;
3305             LiteralSize = Size;
3306           } else if (LiteralSize != Size) {
3307             NumLiterals = 2;
3308           }
3309         }
3310       }
3311     }
3312   }
3313   ConstantBusUseCount += NumLiterals;
3314 
3315   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3316     return true;
3317 
3318   SMLoc LitLoc = getLitLoc(Operands);
3319   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3320   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3321   Error(Loc, "invalid operand (violates constant bus restrictions)");
3322   return false;
3323 }
3324 
3325 bool
3326 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3327                                                  const OperandVector &Operands) {
3328   const unsigned Opcode = Inst.getOpcode();
3329   const MCInstrDesc &Desc = MII.get(Opcode);
3330 
3331   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3332   if (DstIdx == -1 ||
3333       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3334     return true;
3335   }
3336 
3337   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3338 
3339   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3340   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3341   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3342 
3343   assert(DstIdx != -1);
3344   const MCOperand &Dst = Inst.getOperand(DstIdx);
3345   assert(Dst.isReg());
3346   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3347 
3348   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3349 
3350   for (int SrcIdx : SrcIndices) {
3351     if (SrcIdx == -1) break;
3352     const MCOperand &Src = Inst.getOperand(SrcIdx);
3353     if (Src.isReg()) {
3354       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3355       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3356         Error(getRegLoc(SrcReg, Operands),
3357           "destination must be different than all sources");
3358         return false;
3359       }
3360     }
3361   }
3362 
3363   return true;
3364 }
3365 
3366 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3367 
3368   const unsigned Opc = Inst.getOpcode();
3369   const MCInstrDesc &Desc = MII.get(Opc);
3370 
3371   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3372     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3373     assert(ClampIdx != -1);
3374     return Inst.getOperand(ClampIdx).getImm() == 0;
3375   }
3376 
3377   return true;
3378 }
3379 
3380 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3381 
3382   const unsigned Opc = Inst.getOpcode();
3383   const MCInstrDesc &Desc = MII.get(Opc);
3384 
3385   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3386     return true;
3387 
3388   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3389   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3390   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3391 
3392   assert(VDataIdx != -1);
3393 
3394   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3395     return true;
3396 
3397   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3398   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3399   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3400   if (DMask == 0)
3401     DMask = 1;
3402 
3403   unsigned DataSize =
3404     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3405   if (hasPackedD16()) {
3406     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3407     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3408       DataSize = (DataSize + 1) / 2;
3409   }
3410 
3411   return (VDataSize / 4) == DataSize + TFESize;
3412 }
3413 
3414 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3415   const unsigned Opc = Inst.getOpcode();
3416   const MCInstrDesc &Desc = MII.get(Opc);
3417 
3418   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3419     return true;
3420 
3421   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3422 
3423   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3424       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3425   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3426   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3427   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3428   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3429 
3430   assert(VAddr0Idx != -1);
3431   assert(SrsrcIdx != -1);
3432   assert(SrsrcIdx > VAddr0Idx);
3433 
3434   if (DimIdx == -1)
3435     return true; // intersect_ray
3436 
3437   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3438   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3439   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3440   unsigned ActualAddrSize =
3441       IsNSA ? SrsrcIdx - VAddr0Idx
3442             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3443   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3444 
3445   unsigned ExpectedAddrSize =
3446       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3447 
3448   if (!IsNSA) {
3449     if (ExpectedAddrSize > 8)
3450       ExpectedAddrSize = 16;
3451     else if (ExpectedAddrSize > 5)
3452       ExpectedAddrSize = 8;
3453 
3454     // Allow oversized 8 VGPR vaddr when only 5 VGPR are required.
3455     // This provides backward compatibility for assembly created
3456     // before 160b types were directly supported.
3457     if (ExpectedAddrSize == 5 && ActualAddrSize == 8)
3458       return true;
3459   }
3460 
3461   return ActualAddrSize == ExpectedAddrSize;
3462 }
3463 
3464 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3465 
3466   const unsigned Opc = Inst.getOpcode();
3467   const MCInstrDesc &Desc = MII.get(Opc);
3468 
3469   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3470     return true;
3471   if (!Desc.mayLoad() || !Desc.mayStore())
3472     return true; // Not atomic
3473 
3474   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3475   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3476 
3477   // This is an incomplete check because image_atomic_cmpswap
3478   // may only use 0x3 and 0xf while other atomic operations
3479   // may use 0x1 and 0x3. However these limitations are
3480   // verified when we check that dmask matches dst size.
3481   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3482 }
3483 
3484 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3485 
3486   const unsigned Opc = Inst.getOpcode();
3487   const MCInstrDesc &Desc = MII.get(Opc);
3488 
3489   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3490     return true;
3491 
3492   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3493   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3494 
3495   // GATHER4 instructions use dmask in a different fashion compared to
3496   // other MIMG instructions. The only useful DMASK values are
3497   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3498   // (red,red,red,red) etc.) The ISA document doesn't mention
3499   // this.
3500   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3501 }
3502 
3503 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3504   const unsigned Opc = Inst.getOpcode();
3505   const MCInstrDesc &Desc = MII.get(Opc);
3506 
3507   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3508     return true;
3509 
3510   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3511   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3512       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3513 
3514   if (!BaseOpcode->MSAA)
3515     return true;
3516 
3517   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3518   assert(DimIdx != -1);
3519 
3520   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3521   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3522 
3523   return DimInfo->MSAA;
3524 }
3525 
3526 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3527 {
3528   switch (Opcode) {
3529   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3530   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3531   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3532     return true;
3533   default:
3534     return false;
3535   }
3536 }
3537 
3538 // movrels* opcodes should only allow VGPRS as src0.
3539 // This is specified in .td description for vop1/vop3,
3540 // but sdwa is handled differently. See isSDWAOperand.
3541 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3542                                       const OperandVector &Operands) {
3543 
3544   const unsigned Opc = Inst.getOpcode();
3545   const MCInstrDesc &Desc = MII.get(Opc);
3546 
3547   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3548     return true;
3549 
3550   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3551   assert(Src0Idx != -1);
3552 
3553   SMLoc ErrLoc;
3554   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3555   if (Src0.isReg()) {
3556     auto Reg = mc2PseudoReg(Src0.getReg());
3557     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3558     if (!isSGPR(Reg, TRI))
3559       return true;
3560     ErrLoc = getRegLoc(Reg, Operands);
3561   } else {
3562     ErrLoc = getConstLoc(Operands);
3563   }
3564 
3565   Error(ErrLoc, "source operand must be a VGPR");
3566   return false;
3567 }
3568 
3569 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3570                                           const OperandVector &Operands) {
3571 
3572   const unsigned Opc = Inst.getOpcode();
3573 
3574   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3575     return true;
3576 
3577   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3578   assert(Src0Idx != -1);
3579 
3580   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3581   if (!Src0.isReg())
3582     return true;
3583 
3584   auto Reg = mc2PseudoReg(Src0.getReg());
3585   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3586   if (isSGPR(Reg, TRI)) {
3587     Error(getRegLoc(Reg, Operands),
3588           "source operand must be either a VGPR or an inline constant");
3589     return false;
3590   }
3591 
3592   return true;
3593 }
3594 
3595 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3596   switch (Inst.getOpcode()) {
3597   default:
3598     return true;
3599   case V_DIV_SCALE_F32_gfx6_gfx7:
3600   case V_DIV_SCALE_F32_vi:
3601   case V_DIV_SCALE_F32_gfx10:
3602   case V_DIV_SCALE_F64_gfx6_gfx7:
3603   case V_DIV_SCALE_F64_vi:
3604   case V_DIV_SCALE_F64_gfx10:
3605     break;
3606   }
3607 
3608   // TODO: Check that src0 = src1 or src2.
3609 
3610   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3611                     AMDGPU::OpName::src2_modifiers,
3612                     AMDGPU::OpName::src2_modifiers}) {
3613     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3614             .getImm() &
3615         SISrcMods::ABS) {
3616       return false;
3617     }
3618   }
3619 
3620   return true;
3621 }
3622 
3623 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3624 
3625   const unsigned Opc = Inst.getOpcode();
3626   const MCInstrDesc &Desc = MII.get(Opc);
3627 
3628   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3629     return true;
3630 
3631   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3632   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3633     if (isCI() || isSI())
3634       return false;
3635   }
3636 
3637   return true;
3638 }
3639 
3640 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3641   const unsigned Opc = Inst.getOpcode();
3642   const MCInstrDesc &Desc = MII.get(Opc);
3643 
3644   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3645     return true;
3646 
3647   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3648   if (DimIdx < 0)
3649     return true;
3650 
3651   long Imm = Inst.getOperand(DimIdx).getImm();
3652   if (Imm < 0 || Imm >= 8)
3653     return false;
3654 
3655   return true;
3656 }
3657 
3658 static bool IsRevOpcode(const unsigned Opcode)
3659 {
3660   switch (Opcode) {
3661   case AMDGPU::V_SUBREV_F32_e32:
3662   case AMDGPU::V_SUBREV_F32_e64:
3663   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3664   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3665   case AMDGPU::V_SUBREV_F32_e32_vi:
3666   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3667   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3668   case AMDGPU::V_SUBREV_F32_e64_vi:
3669 
3670   case AMDGPU::V_SUBREV_CO_U32_e32:
3671   case AMDGPU::V_SUBREV_CO_U32_e64:
3672   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3673   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3674 
3675   case AMDGPU::V_SUBBREV_U32_e32:
3676   case AMDGPU::V_SUBBREV_U32_e64:
3677   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3678   case AMDGPU::V_SUBBREV_U32_e32_vi:
3679   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3680   case AMDGPU::V_SUBBREV_U32_e64_vi:
3681 
3682   case AMDGPU::V_SUBREV_U32_e32:
3683   case AMDGPU::V_SUBREV_U32_e64:
3684   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3685   case AMDGPU::V_SUBREV_U32_e32_vi:
3686   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3687   case AMDGPU::V_SUBREV_U32_e64_vi:
3688 
3689   case AMDGPU::V_SUBREV_F16_e32:
3690   case AMDGPU::V_SUBREV_F16_e64:
3691   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3692   case AMDGPU::V_SUBREV_F16_e32_vi:
3693   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3694   case AMDGPU::V_SUBREV_F16_e64_vi:
3695 
3696   case AMDGPU::V_SUBREV_U16_e32:
3697   case AMDGPU::V_SUBREV_U16_e64:
3698   case AMDGPU::V_SUBREV_U16_e32_vi:
3699   case AMDGPU::V_SUBREV_U16_e64_vi:
3700 
3701   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3702   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3703   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3704 
3705   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3706   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3707 
3708   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3709   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3710 
3711   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3712   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3713 
3714   case AMDGPU::V_LSHRREV_B32_e32:
3715   case AMDGPU::V_LSHRREV_B32_e64:
3716   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3717   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3718   case AMDGPU::V_LSHRREV_B32_e32_vi:
3719   case AMDGPU::V_LSHRREV_B32_e64_vi:
3720   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3721   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3722 
3723   case AMDGPU::V_ASHRREV_I32_e32:
3724   case AMDGPU::V_ASHRREV_I32_e64:
3725   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3726   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3727   case AMDGPU::V_ASHRREV_I32_e32_vi:
3728   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3729   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3730   case AMDGPU::V_ASHRREV_I32_e64_vi:
3731 
3732   case AMDGPU::V_LSHLREV_B32_e32:
3733   case AMDGPU::V_LSHLREV_B32_e64:
3734   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3735   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3736   case AMDGPU::V_LSHLREV_B32_e32_vi:
3737   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3738   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3739   case AMDGPU::V_LSHLREV_B32_e64_vi:
3740 
3741   case AMDGPU::V_LSHLREV_B16_e32:
3742   case AMDGPU::V_LSHLREV_B16_e64:
3743   case AMDGPU::V_LSHLREV_B16_e32_vi:
3744   case AMDGPU::V_LSHLREV_B16_e64_vi:
3745   case AMDGPU::V_LSHLREV_B16_gfx10:
3746 
3747   case AMDGPU::V_LSHRREV_B16_e32:
3748   case AMDGPU::V_LSHRREV_B16_e64:
3749   case AMDGPU::V_LSHRREV_B16_e32_vi:
3750   case AMDGPU::V_LSHRREV_B16_e64_vi:
3751   case AMDGPU::V_LSHRREV_B16_gfx10:
3752 
3753   case AMDGPU::V_ASHRREV_I16_e32:
3754   case AMDGPU::V_ASHRREV_I16_e64:
3755   case AMDGPU::V_ASHRREV_I16_e32_vi:
3756   case AMDGPU::V_ASHRREV_I16_e64_vi:
3757   case AMDGPU::V_ASHRREV_I16_gfx10:
3758 
3759   case AMDGPU::V_LSHLREV_B64_e64:
3760   case AMDGPU::V_LSHLREV_B64_gfx10:
3761   case AMDGPU::V_LSHLREV_B64_vi:
3762 
3763   case AMDGPU::V_LSHRREV_B64_e64:
3764   case AMDGPU::V_LSHRREV_B64_gfx10:
3765   case AMDGPU::V_LSHRREV_B64_vi:
3766 
3767   case AMDGPU::V_ASHRREV_I64_e64:
3768   case AMDGPU::V_ASHRREV_I64_gfx10:
3769   case AMDGPU::V_ASHRREV_I64_vi:
3770 
3771   case AMDGPU::V_PK_LSHLREV_B16:
3772   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3773   case AMDGPU::V_PK_LSHLREV_B16_vi:
3774 
3775   case AMDGPU::V_PK_LSHRREV_B16:
3776   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3777   case AMDGPU::V_PK_LSHRREV_B16_vi:
3778   case AMDGPU::V_PK_ASHRREV_I16:
3779   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3780   case AMDGPU::V_PK_ASHRREV_I16_vi:
3781     return true;
3782   default:
3783     return false;
3784   }
3785 }
3786 
3787 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3788 
3789   using namespace SIInstrFlags;
3790   const unsigned Opcode = Inst.getOpcode();
3791   const MCInstrDesc &Desc = MII.get(Opcode);
3792 
3793   // lds_direct register is defined so that it can be used
3794   // with 9-bit operands only. Ignore encodings which do not accept these.
3795   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3796   if ((Desc.TSFlags & Enc) == 0)
3797     return None;
3798 
3799   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3800     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3801     if (SrcIdx == -1)
3802       break;
3803     const auto &Src = Inst.getOperand(SrcIdx);
3804     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3805 
3806       if (isGFX90A())
3807         return StringRef("lds_direct is not supported on this GPU");
3808 
3809       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3810         return StringRef("lds_direct cannot be used with this instruction");
3811 
3812       if (SrcName != OpName::src0)
3813         return StringRef("lds_direct may be used as src0 only");
3814     }
3815   }
3816 
3817   return None;
3818 }
3819 
3820 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3821   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3822     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3823     if (Op.isFlatOffset())
3824       return Op.getStartLoc();
3825   }
3826   return getLoc();
3827 }
3828 
3829 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3830                                          const OperandVector &Operands) {
3831   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3832   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3833     return true;
3834 
3835   auto Opcode = Inst.getOpcode();
3836   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3837   assert(OpNum != -1);
3838 
3839   const auto &Op = Inst.getOperand(OpNum);
3840   if (!hasFlatOffsets() && Op.getImm() != 0) {
3841     Error(getFlatOffsetLoc(Operands),
3842           "flat offset modifier is not supported on this GPU");
3843     return false;
3844   }
3845 
3846   // For FLAT segment the offset must be positive;
3847   // MSB is ignored and forced to zero.
3848   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3849     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3850     if (!isIntN(OffsetSize, Op.getImm())) {
3851       Error(getFlatOffsetLoc(Operands),
3852             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3853       return false;
3854     }
3855   } else {
3856     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3857     if (!isUIntN(OffsetSize, Op.getImm())) {
3858       Error(getFlatOffsetLoc(Operands),
3859             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3860       return false;
3861     }
3862   }
3863 
3864   return true;
3865 }
3866 
3867 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3868   // Start with second operand because SMEM Offset cannot be dst or src0.
3869   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3870     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3871     if (Op.isSMEMOffset())
3872       return Op.getStartLoc();
3873   }
3874   return getLoc();
3875 }
3876 
3877 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3878                                          const OperandVector &Operands) {
3879   if (isCI() || isSI())
3880     return true;
3881 
3882   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3883   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3884     return true;
3885 
3886   auto Opcode = Inst.getOpcode();
3887   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3888   if (OpNum == -1)
3889     return true;
3890 
3891   const auto &Op = Inst.getOperand(OpNum);
3892   if (!Op.isImm())
3893     return true;
3894 
3895   uint64_t Offset = Op.getImm();
3896   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3897   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3898       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3899     return true;
3900 
3901   Error(getSMEMOffsetLoc(Operands),
3902         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3903                                "expected a 21-bit signed offset");
3904 
3905   return false;
3906 }
3907 
3908 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3909   unsigned Opcode = Inst.getOpcode();
3910   const MCInstrDesc &Desc = MII.get(Opcode);
3911   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3912     return true;
3913 
3914   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3915   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3916 
3917   const int OpIndices[] = { Src0Idx, Src1Idx };
3918 
3919   unsigned NumExprs = 0;
3920   unsigned NumLiterals = 0;
3921   uint32_t LiteralValue;
3922 
3923   for (int OpIdx : OpIndices) {
3924     if (OpIdx == -1) break;
3925 
3926     const MCOperand &MO = Inst.getOperand(OpIdx);
3927     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3928     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3929       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3930         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3931         if (NumLiterals == 0 || LiteralValue != Value) {
3932           LiteralValue = Value;
3933           ++NumLiterals;
3934         }
3935       } else if (MO.isExpr()) {
3936         ++NumExprs;
3937       }
3938     }
3939   }
3940 
3941   return NumLiterals + NumExprs <= 1;
3942 }
3943 
3944 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3945   const unsigned Opc = Inst.getOpcode();
3946   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3947       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3948     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3949     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3950 
3951     if (OpSel & ~3)
3952       return false;
3953   }
3954   return true;
3955 }
3956 
3957 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
3958                                   const OperandVector &Operands) {
3959   const unsigned Opc = Inst.getOpcode();
3960   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
3961   if (DppCtrlIdx < 0)
3962     return true;
3963   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
3964 
3965   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
3966     // DPP64 is supported for row_newbcast only.
3967     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3968     if (Src0Idx >= 0 &&
3969         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
3970       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
3971       Error(S, "64 bit dpp only supports row_newbcast");
3972       return false;
3973     }
3974   }
3975 
3976   return true;
3977 }
3978 
3979 // Check if VCC register matches wavefront size
3980 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3981   auto FB = getFeatureBits();
3982   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3983     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3984 }
3985 
3986 // VOP3 literal is only allowed in GFX10+ and only one can be used
3987 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3988                                           const OperandVector &Operands) {
3989   unsigned Opcode = Inst.getOpcode();
3990   const MCInstrDesc &Desc = MII.get(Opcode);
3991   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3992     return true;
3993 
3994   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3995   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3996   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3997 
3998   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3999 
4000   unsigned NumExprs = 0;
4001   unsigned NumLiterals = 0;
4002   uint32_t LiteralValue;
4003 
4004   for (int OpIdx : OpIndices) {
4005     if (OpIdx == -1) break;
4006 
4007     const MCOperand &MO = Inst.getOperand(OpIdx);
4008     if (!MO.isImm() && !MO.isExpr())
4009       continue;
4010     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4011       continue;
4012 
4013     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4014         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4015       Error(getConstLoc(Operands),
4016             "inline constants are not allowed for this operand");
4017       return false;
4018     }
4019 
4020     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4021       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4022       if (NumLiterals == 0 || LiteralValue != Value) {
4023         LiteralValue = Value;
4024         ++NumLiterals;
4025       }
4026     } else if (MO.isExpr()) {
4027       ++NumExprs;
4028     }
4029   }
4030   NumLiterals += NumExprs;
4031 
4032   if (!NumLiterals)
4033     return true;
4034 
4035   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4036     Error(getLitLoc(Operands), "literal operands are not supported");
4037     return false;
4038   }
4039 
4040   if (NumLiterals > 1) {
4041     Error(getLitLoc(Operands), "only one literal operand is allowed");
4042     return false;
4043   }
4044 
4045   return true;
4046 }
4047 
4048 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4049 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4050                          const MCRegisterInfo *MRI) {
4051   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4052   if (OpIdx < 0)
4053     return -1;
4054 
4055   const MCOperand &Op = Inst.getOperand(OpIdx);
4056   if (!Op.isReg())
4057     return -1;
4058 
4059   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4060   auto Reg = Sub ? Sub : Op.getReg();
4061   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4062   return AGPR32.contains(Reg) ? 1 : 0;
4063 }
4064 
4065 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4066   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4067   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4068                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4069                   SIInstrFlags::DS)) == 0)
4070     return true;
4071 
4072   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4073                                                       : AMDGPU::OpName::vdata;
4074 
4075   const MCRegisterInfo *MRI = getMRI();
4076   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4077   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4078 
4079   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4080     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4081     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4082       return false;
4083   }
4084 
4085   auto FB = getFeatureBits();
4086   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4087     if (DataAreg < 0 || DstAreg < 0)
4088       return true;
4089     return DstAreg == DataAreg;
4090   }
4091 
4092   return DstAreg < 1 && DataAreg < 1;
4093 }
4094 
4095 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4096   auto FB = getFeatureBits();
4097   if (!FB[AMDGPU::FeatureGFX90AInsts])
4098     return true;
4099 
4100   const MCRegisterInfo *MRI = getMRI();
4101   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4102   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4103   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4104     const MCOperand &Op = Inst.getOperand(I);
4105     if (!Op.isReg())
4106       continue;
4107 
4108     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4109     if (!Sub)
4110       continue;
4111 
4112     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4113       return false;
4114     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4115       return false;
4116   }
4117 
4118   return true;
4119 }
4120 
4121 // gfx90a has an undocumented limitation:
4122 // DS_GWS opcodes must use even aligned registers.
4123 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4124                                   const OperandVector &Operands) {
4125   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4126     return true;
4127 
4128   int Opc = Inst.getOpcode();
4129   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4130       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4131     return true;
4132 
4133   const MCRegisterInfo *MRI = getMRI();
4134   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4135   int Data0Pos =
4136       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4137   assert(Data0Pos != -1);
4138   auto Reg = Inst.getOperand(Data0Pos).getReg();
4139   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4140   if (RegIdx & 1) {
4141     SMLoc RegLoc = getRegLoc(Reg, Operands);
4142     Error(RegLoc, "vgpr must be even aligned");
4143     return false;
4144   }
4145 
4146   return true;
4147 }
4148 
4149 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4150                                             const OperandVector &Operands,
4151                                             const SMLoc &IDLoc) {
4152   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4153                                            AMDGPU::OpName::cpol);
4154   if (CPolPos == -1)
4155     return true;
4156 
4157   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4158 
4159   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4160   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4161       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4162     Error(IDLoc, "invalid cache policy for SMRD instruction");
4163     return false;
4164   }
4165 
4166   if (isGFX90A() && (CPol & CPol::SCC)) {
4167     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4168     StringRef CStr(S.getPointer());
4169     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4170     Error(S, "scc is not supported on this GPU");
4171     return false;
4172   }
4173 
4174   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4175     return true;
4176 
4177   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4178     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4179       Error(IDLoc, "instruction must use glc");
4180       return false;
4181     }
4182   } else {
4183     if (CPol & CPol::GLC) {
4184       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4185       StringRef CStr(S.getPointer());
4186       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4187       Error(S, "instruction must not use glc");
4188       return false;
4189     }
4190   }
4191 
4192   return true;
4193 }
4194 
4195 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4196                                           const SMLoc &IDLoc,
4197                                           const OperandVector &Operands) {
4198   if (auto ErrMsg = validateLdsDirect(Inst)) {
4199     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4200     return false;
4201   }
4202   if (!validateSOPLiteral(Inst)) {
4203     Error(getLitLoc(Operands),
4204       "only one literal operand is allowed");
4205     return false;
4206   }
4207   if (!validateVOP3Literal(Inst, Operands)) {
4208     return false;
4209   }
4210   if (!validateConstantBusLimitations(Inst, Operands)) {
4211     return false;
4212   }
4213   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4214     return false;
4215   }
4216   if (!validateIntClampSupported(Inst)) {
4217     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4218       "integer clamping is not supported on this GPU");
4219     return false;
4220   }
4221   if (!validateOpSel(Inst)) {
4222     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4223       "invalid op_sel operand");
4224     return false;
4225   }
4226   if (!validateDPP(Inst, Operands)) {
4227     return false;
4228   }
4229   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4230   if (!validateMIMGD16(Inst)) {
4231     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4232       "d16 modifier is not supported on this GPU");
4233     return false;
4234   }
4235   if (!validateMIMGDim(Inst)) {
4236     Error(IDLoc, "dim modifier is required on this GPU");
4237     return false;
4238   }
4239   if (!validateMIMGMSAA(Inst)) {
4240     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4241           "invalid dim; must be MSAA type");
4242     return false;
4243   }
4244   if (!validateMIMGDataSize(Inst)) {
4245     Error(IDLoc,
4246       "image data size does not match dmask and tfe");
4247     return false;
4248   }
4249   if (!validateMIMGAddrSize(Inst)) {
4250     Error(IDLoc,
4251       "image address size does not match dim and a16");
4252     return false;
4253   }
4254   if (!validateMIMGAtomicDMask(Inst)) {
4255     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4256       "invalid atomic image dmask");
4257     return false;
4258   }
4259   if (!validateMIMGGatherDMask(Inst)) {
4260     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4261       "invalid image_gather dmask: only one bit must be set");
4262     return false;
4263   }
4264   if (!validateMovrels(Inst, Operands)) {
4265     return false;
4266   }
4267   if (!validateFlatOffset(Inst, Operands)) {
4268     return false;
4269   }
4270   if (!validateSMEMOffset(Inst, Operands)) {
4271     return false;
4272   }
4273   if (!validateMAIAccWrite(Inst, Operands)) {
4274     return false;
4275   }
4276   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4277     return false;
4278   }
4279 
4280   if (!validateAGPRLdSt(Inst)) {
4281     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4282     ? "invalid register class: data and dst should be all VGPR or AGPR"
4283     : "invalid register class: agpr loads and stores not supported on this GPU"
4284     );
4285     return false;
4286   }
4287   if (!validateVGPRAlign(Inst)) {
4288     Error(IDLoc,
4289       "invalid register class: vgpr tuples must be 64 bit aligned");
4290     return false;
4291   }
4292   if (!validateGWS(Inst, Operands)) {
4293     return false;
4294   }
4295 
4296   if (!validateDivScale(Inst)) {
4297     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4298     return false;
4299   }
4300   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4301     return false;
4302   }
4303 
4304   return true;
4305 }
4306 
4307 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4308                                             const FeatureBitset &FBS,
4309                                             unsigned VariantID = 0);
4310 
4311 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4312                                 const FeatureBitset &AvailableFeatures,
4313                                 unsigned VariantID);
4314 
4315 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4316                                        const FeatureBitset &FBS) {
4317   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4318 }
4319 
4320 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4321                                        const FeatureBitset &FBS,
4322                                        ArrayRef<unsigned> Variants) {
4323   for (auto Variant : Variants) {
4324     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4325       return true;
4326   }
4327 
4328   return false;
4329 }
4330 
4331 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4332                                                   const SMLoc &IDLoc) {
4333   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4334 
4335   // Check if requested instruction variant is supported.
4336   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4337     return false;
4338 
4339   // This instruction is not supported.
4340   // Clear any other pending errors because they are no longer relevant.
4341   getParser().clearPendingErrors();
4342 
4343   // Requested instruction variant is not supported.
4344   // Check if any other variants are supported.
4345   StringRef VariantName = getMatchedVariantName();
4346   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4347     return Error(IDLoc,
4348                  Twine(VariantName,
4349                        " variant of this instruction is not supported"));
4350   }
4351 
4352   // Finally check if this instruction is supported on any other GPU.
4353   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4354     return Error(IDLoc, "instruction not supported on this GPU");
4355   }
4356 
4357   // Instruction not supported on any GPU. Probably a typo.
4358   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4359   return Error(IDLoc, "invalid instruction" + Suggestion);
4360 }
4361 
4362 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4363                                               OperandVector &Operands,
4364                                               MCStreamer &Out,
4365                                               uint64_t &ErrorInfo,
4366                                               bool MatchingInlineAsm) {
4367   MCInst Inst;
4368   unsigned Result = Match_Success;
4369   for (auto Variant : getMatchedVariants()) {
4370     uint64_t EI;
4371     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4372                                   Variant);
4373     // We order match statuses from least to most specific. We use most specific
4374     // status as resulting
4375     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4376     if ((R == Match_Success) ||
4377         (R == Match_PreferE32) ||
4378         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4379         (R == Match_InvalidOperand && Result != Match_MissingFeature
4380                                    && Result != Match_PreferE32) ||
4381         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4382                                    && Result != Match_MissingFeature
4383                                    && Result != Match_PreferE32)) {
4384       Result = R;
4385       ErrorInfo = EI;
4386     }
4387     if (R == Match_Success)
4388       break;
4389   }
4390 
4391   if (Result == Match_Success) {
4392     if (!validateInstruction(Inst, IDLoc, Operands)) {
4393       return true;
4394     }
4395     Inst.setLoc(IDLoc);
4396     Out.emitInstruction(Inst, getSTI());
4397     return false;
4398   }
4399 
4400   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4401   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4402     return true;
4403   }
4404 
4405   switch (Result) {
4406   default: break;
4407   case Match_MissingFeature:
4408     // It has been verified that the specified instruction
4409     // mnemonic is valid. A match was found but it requires
4410     // features which are not supported on this GPU.
4411     return Error(IDLoc, "operands are not valid for this GPU or mode");
4412 
4413   case Match_InvalidOperand: {
4414     SMLoc ErrorLoc = IDLoc;
4415     if (ErrorInfo != ~0ULL) {
4416       if (ErrorInfo >= Operands.size()) {
4417         return Error(IDLoc, "too few operands for instruction");
4418       }
4419       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4420       if (ErrorLoc == SMLoc())
4421         ErrorLoc = IDLoc;
4422     }
4423     return Error(ErrorLoc, "invalid operand for instruction");
4424   }
4425 
4426   case Match_PreferE32:
4427     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4428                         "should be encoded as e32");
4429   case Match_MnemonicFail:
4430     llvm_unreachable("Invalid instructions should have been handled already");
4431   }
4432   llvm_unreachable("Implement any new match types added!");
4433 }
4434 
4435 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4436   int64_t Tmp = -1;
4437   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4438     return true;
4439   }
4440   if (getParser().parseAbsoluteExpression(Tmp)) {
4441     return true;
4442   }
4443   Ret = static_cast<uint32_t>(Tmp);
4444   return false;
4445 }
4446 
4447 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4448                                                uint32_t &Minor) {
4449   if (ParseAsAbsoluteExpression(Major))
4450     return TokError("invalid major version");
4451 
4452   if (!trySkipToken(AsmToken::Comma))
4453     return TokError("minor version number required, comma expected");
4454 
4455   if (ParseAsAbsoluteExpression(Minor))
4456     return TokError("invalid minor version");
4457 
4458   return false;
4459 }
4460 
4461 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4462   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4463     return TokError("directive only supported for amdgcn architecture");
4464 
4465   std::string TargetIDDirective;
4466   SMLoc TargetStart = getTok().getLoc();
4467   if (getParser().parseEscapedString(TargetIDDirective))
4468     return true;
4469 
4470   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4471   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4472     return getParser().Error(TargetRange.Start,
4473         (Twine(".amdgcn_target directive's target id ") +
4474          Twine(TargetIDDirective) +
4475          Twine(" does not match the specified target id ") +
4476          Twine(getTargetStreamer().getTargetID()->toString())).str());
4477 
4478   return false;
4479 }
4480 
4481 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4482   return Error(Range.Start, "value out of range", Range);
4483 }
4484 
4485 bool AMDGPUAsmParser::calculateGPRBlocks(
4486     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4487     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4488     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4489     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4490   // TODO(scott.linder): These calculations are duplicated from
4491   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4492   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4493 
4494   unsigned NumVGPRs = NextFreeVGPR;
4495   unsigned NumSGPRs = NextFreeSGPR;
4496 
4497   if (Version.Major >= 10)
4498     NumSGPRs = 0;
4499   else {
4500     unsigned MaxAddressableNumSGPRs =
4501         IsaInfo::getAddressableNumSGPRs(&getSTI());
4502 
4503     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4504         NumSGPRs > MaxAddressableNumSGPRs)
4505       return OutOfRangeError(SGPRRange);
4506 
4507     NumSGPRs +=
4508         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4509 
4510     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4511         NumSGPRs > MaxAddressableNumSGPRs)
4512       return OutOfRangeError(SGPRRange);
4513 
4514     if (Features.test(FeatureSGPRInitBug))
4515       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4516   }
4517 
4518   VGPRBlocks =
4519       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4520   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4521 
4522   return false;
4523 }
4524 
4525 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4526   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4527     return TokError("directive only supported for amdgcn architecture");
4528 
4529   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4530     return TokError("directive only supported for amdhsa OS");
4531 
4532   StringRef KernelName;
4533   if (getParser().parseIdentifier(KernelName))
4534     return true;
4535 
4536   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4537 
4538   StringSet<> Seen;
4539 
4540   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4541 
4542   SMRange VGPRRange;
4543   uint64_t NextFreeVGPR = 0;
4544   uint64_t AccumOffset = 0;
4545   SMRange SGPRRange;
4546   uint64_t NextFreeSGPR = 0;
4547   unsigned UserSGPRCount = 0;
4548   bool ReserveVCC = true;
4549   bool ReserveFlatScr = true;
4550   Optional<bool> EnableWavefrontSize32;
4551 
4552   while (true) {
4553     while (trySkipToken(AsmToken::EndOfStatement));
4554 
4555     StringRef ID;
4556     SMRange IDRange = getTok().getLocRange();
4557     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4558       return true;
4559 
4560     if (ID == ".end_amdhsa_kernel")
4561       break;
4562 
4563     if (Seen.find(ID) != Seen.end())
4564       return TokError(".amdhsa_ directives cannot be repeated");
4565     Seen.insert(ID);
4566 
4567     SMLoc ValStart = getLoc();
4568     int64_t IVal;
4569     if (getParser().parseAbsoluteExpression(IVal))
4570       return true;
4571     SMLoc ValEnd = getLoc();
4572     SMRange ValRange = SMRange(ValStart, ValEnd);
4573 
4574     if (IVal < 0)
4575       return OutOfRangeError(ValRange);
4576 
4577     uint64_t Val = IVal;
4578 
4579 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4580   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4581     return OutOfRangeError(RANGE);                                             \
4582   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4583 
4584     if (ID == ".amdhsa_group_segment_fixed_size") {
4585       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4586         return OutOfRangeError(ValRange);
4587       KD.group_segment_fixed_size = Val;
4588     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4589       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4590         return OutOfRangeError(ValRange);
4591       KD.private_segment_fixed_size = Val;
4592     } else if (ID == ".amdhsa_kernarg_size") {
4593       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4594         return OutOfRangeError(ValRange);
4595       KD.kernarg_size = Val;
4596     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4597       if (hasArchitectedFlatScratch())
4598         return Error(IDRange.Start,
4599                      "directive is not supported with architected flat scratch",
4600                      IDRange);
4601       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4602                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4603                        Val, ValRange);
4604       if (Val)
4605         UserSGPRCount += 4;
4606     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4607       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4608                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4609                        ValRange);
4610       if (Val)
4611         UserSGPRCount += 2;
4612     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4613       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4614                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4615                        ValRange);
4616       if (Val)
4617         UserSGPRCount += 2;
4618     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4619       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4620                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4621                        Val, ValRange);
4622       if (Val)
4623         UserSGPRCount += 2;
4624     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4625       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4626                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4627                        ValRange);
4628       if (Val)
4629         UserSGPRCount += 2;
4630     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4631       if (hasArchitectedFlatScratch())
4632         return Error(IDRange.Start,
4633                      "directive is not supported with architected flat scratch",
4634                      IDRange);
4635       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4636                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4637                        ValRange);
4638       if (Val)
4639         UserSGPRCount += 2;
4640     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4641       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4642                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4643                        Val, ValRange);
4644       if (Val)
4645         UserSGPRCount += 1;
4646     } else if (ID == ".amdhsa_wavefront_size32") {
4647       if (IVersion.Major < 10)
4648         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4649       EnableWavefrontSize32 = Val;
4650       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4651                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4652                        Val, ValRange);
4653     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4654       if (hasArchitectedFlatScratch())
4655         return Error(IDRange.Start,
4656                      "directive is not supported with architected flat scratch",
4657                      IDRange);
4658       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4659                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4660     } else if (ID == ".amdhsa_enable_private_segment") {
4661       if (!hasArchitectedFlatScratch())
4662         return Error(
4663             IDRange.Start,
4664             "directive is not supported without architected flat scratch",
4665             IDRange);
4666       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4667                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4668     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4669       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4670                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4671                        ValRange);
4672     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4673       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4674                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4675                        ValRange);
4676     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4677       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4678                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4679                        ValRange);
4680     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4681       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4682                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4683                        ValRange);
4684     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4685       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4686                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4687                        ValRange);
4688     } else if (ID == ".amdhsa_next_free_vgpr") {
4689       VGPRRange = ValRange;
4690       NextFreeVGPR = Val;
4691     } else if (ID == ".amdhsa_next_free_sgpr") {
4692       SGPRRange = ValRange;
4693       NextFreeSGPR = Val;
4694     } else if (ID == ".amdhsa_accum_offset") {
4695       if (!isGFX90A())
4696         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4697       AccumOffset = Val;
4698     } else if (ID == ".amdhsa_reserve_vcc") {
4699       if (!isUInt<1>(Val))
4700         return OutOfRangeError(ValRange);
4701       ReserveVCC = Val;
4702     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4703       if (IVersion.Major < 7)
4704         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4705       if (hasArchitectedFlatScratch())
4706         return Error(IDRange.Start,
4707                      "directive is not supported with architected flat scratch",
4708                      IDRange);
4709       if (!isUInt<1>(Val))
4710         return OutOfRangeError(ValRange);
4711       ReserveFlatScr = Val;
4712     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4713       if (IVersion.Major < 8)
4714         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4715       if (!isUInt<1>(Val))
4716         return OutOfRangeError(ValRange);
4717       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4718         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4719                                  IDRange);
4720     } else if (ID == ".amdhsa_float_round_mode_32") {
4721       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4722                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4723     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4724       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4725                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4726     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4727       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4728                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4729     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4730       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4731                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4732                        ValRange);
4733     } else if (ID == ".amdhsa_dx10_clamp") {
4734       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4735                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4736     } else if (ID == ".amdhsa_ieee_mode") {
4737       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4738                        Val, ValRange);
4739     } else if (ID == ".amdhsa_fp16_overflow") {
4740       if (IVersion.Major < 9)
4741         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4742       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4743                        ValRange);
4744     } else if (ID == ".amdhsa_tg_split") {
4745       if (!isGFX90A())
4746         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4747       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4748                        ValRange);
4749     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4750       if (IVersion.Major < 10)
4751         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4752       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4753                        ValRange);
4754     } else if (ID == ".amdhsa_memory_ordered") {
4755       if (IVersion.Major < 10)
4756         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4757       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4758                        ValRange);
4759     } else if (ID == ".amdhsa_forward_progress") {
4760       if (IVersion.Major < 10)
4761         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4762       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4763                        ValRange);
4764     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4765       PARSE_BITS_ENTRY(
4766           KD.compute_pgm_rsrc2,
4767           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4768           ValRange);
4769     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4770       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4771                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4772                        Val, ValRange);
4773     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4774       PARSE_BITS_ENTRY(
4775           KD.compute_pgm_rsrc2,
4776           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4777           ValRange);
4778     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4779       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4780                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4781                        Val, ValRange);
4782     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4783       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4784                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4785                        Val, ValRange);
4786     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4787       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4788                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4789                        Val, ValRange);
4790     } else if (ID == ".amdhsa_exception_int_div_zero") {
4791       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4792                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4793                        Val, ValRange);
4794     } else {
4795       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4796     }
4797 
4798 #undef PARSE_BITS_ENTRY
4799   }
4800 
4801   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4802     return TokError(".amdhsa_next_free_vgpr directive is required");
4803 
4804   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4805     return TokError(".amdhsa_next_free_sgpr directive is required");
4806 
4807   unsigned VGPRBlocks;
4808   unsigned SGPRBlocks;
4809   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4810                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4811                          EnableWavefrontSize32, NextFreeVGPR,
4812                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4813                          SGPRBlocks))
4814     return true;
4815 
4816   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4817           VGPRBlocks))
4818     return OutOfRangeError(VGPRRange);
4819   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4820                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4821 
4822   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4823           SGPRBlocks))
4824     return OutOfRangeError(SGPRRange);
4825   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4826                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4827                   SGPRBlocks);
4828 
4829   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4830     return TokError("too many user SGPRs enabled");
4831   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4832                   UserSGPRCount);
4833 
4834   if (isGFX90A()) {
4835     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4836       return TokError(".amdhsa_accum_offset directive is required");
4837     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4838       return TokError("accum_offset should be in range [4..256] in "
4839                       "increments of 4");
4840     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4841       return TokError("accum_offset exceeds total VGPR allocation");
4842     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4843                     (AccumOffset / 4 - 1));
4844   }
4845 
4846   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4847       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4848       ReserveFlatScr);
4849   return false;
4850 }
4851 
4852 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4853   uint32_t Major;
4854   uint32_t Minor;
4855 
4856   if (ParseDirectiveMajorMinor(Major, Minor))
4857     return true;
4858 
4859   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4860   return false;
4861 }
4862 
4863 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4864   uint32_t Major;
4865   uint32_t Minor;
4866   uint32_t Stepping;
4867   StringRef VendorName;
4868   StringRef ArchName;
4869 
4870   // If this directive has no arguments, then use the ISA version for the
4871   // targeted GPU.
4872   if (isToken(AsmToken::EndOfStatement)) {
4873     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4874     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4875                                                         ISA.Stepping,
4876                                                         "AMD", "AMDGPU");
4877     return false;
4878   }
4879 
4880   if (ParseDirectiveMajorMinor(Major, Minor))
4881     return true;
4882 
4883   if (!trySkipToken(AsmToken::Comma))
4884     return TokError("stepping version number required, comma expected");
4885 
4886   if (ParseAsAbsoluteExpression(Stepping))
4887     return TokError("invalid stepping version");
4888 
4889   if (!trySkipToken(AsmToken::Comma))
4890     return TokError("vendor name required, comma expected");
4891 
4892   if (!parseString(VendorName, "invalid vendor name"))
4893     return true;
4894 
4895   if (!trySkipToken(AsmToken::Comma))
4896     return TokError("arch name required, comma expected");
4897 
4898   if (!parseString(ArchName, "invalid arch name"))
4899     return true;
4900 
4901   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4902                                                       VendorName, ArchName);
4903   return false;
4904 }
4905 
4906 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4907                                                amd_kernel_code_t &Header) {
4908   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4909   // assembly for backwards compatibility.
4910   if (ID == "max_scratch_backing_memory_byte_size") {
4911     Parser.eatToEndOfStatement();
4912     return false;
4913   }
4914 
4915   SmallString<40> ErrStr;
4916   raw_svector_ostream Err(ErrStr);
4917   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4918     return TokError(Err.str());
4919   }
4920   Lex();
4921 
4922   if (ID == "enable_wavefront_size32") {
4923     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4924       if (!isGFX10Plus())
4925         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4926       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4927         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4928     } else {
4929       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4930         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4931     }
4932   }
4933 
4934   if (ID == "wavefront_size") {
4935     if (Header.wavefront_size == 5) {
4936       if (!isGFX10Plus())
4937         return TokError("wavefront_size=5 is only allowed on GFX10+");
4938       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4939         return TokError("wavefront_size=5 requires +WavefrontSize32");
4940     } else if (Header.wavefront_size == 6) {
4941       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4942         return TokError("wavefront_size=6 requires +WavefrontSize64");
4943     }
4944   }
4945 
4946   if (ID == "enable_wgp_mode") {
4947     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4948         !isGFX10Plus())
4949       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4950   }
4951 
4952   if (ID == "enable_mem_ordered") {
4953     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4954         !isGFX10Plus())
4955       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4956   }
4957 
4958   if (ID == "enable_fwd_progress") {
4959     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4960         !isGFX10Plus())
4961       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4962   }
4963 
4964   return false;
4965 }
4966 
4967 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4968   amd_kernel_code_t Header;
4969   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4970 
4971   while (true) {
4972     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4973     // will set the current token to EndOfStatement.
4974     while(trySkipToken(AsmToken::EndOfStatement));
4975 
4976     StringRef ID;
4977     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4978       return true;
4979 
4980     if (ID == ".end_amd_kernel_code_t")
4981       break;
4982 
4983     if (ParseAMDKernelCodeTValue(ID, Header))
4984       return true;
4985   }
4986 
4987   getTargetStreamer().EmitAMDKernelCodeT(Header);
4988 
4989   return false;
4990 }
4991 
4992 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4993   StringRef KernelName;
4994   if (!parseId(KernelName, "expected symbol name"))
4995     return true;
4996 
4997   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4998                                            ELF::STT_AMDGPU_HSA_KERNEL);
4999 
5000   KernelScope.initialize(getContext());
5001   return false;
5002 }
5003 
5004 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5005   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5006     return Error(getLoc(),
5007                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5008                  "architectures");
5009   }
5010 
5011   auto TargetIDDirective = getLexer().getTok().getStringContents();
5012   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5013     return Error(getParser().getTok().getLoc(), "target id must match options");
5014 
5015   getTargetStreamer().EmitISAVersion();
5016   Lex();
5017 
5018   return false;
5019 }
5020 
5021 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5022   const char *AssemblerDirectiveBegin;
5023   const char *AssemblerDirectiveEnd;
5024   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5025       isHsaAbiVersion3Or4(&getSTI())
5026           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5027                             HSAMD::V3::AssemblerDirectiveEnd)
5028           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5029                             HSAMD::AssemblerDirectiveEnd);
5030 
5031   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5032     return Error(getLoc(),
5033                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5034                  "not available on non-amdhsa OSes")).str());
5035   }
5036 
5037   std::string HSAMetadataString;
5038   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5039                           HSAMetadataString))
5040     return true;
5041 
5042   if (isHsaAbiVersion3Or4(&getSTI())) {
5043     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5044       return Error(getLoc(), "invalid HSA metadata");
5045   } else {
5046     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5047       return Error(getLoc(), "invalid HSA metadata");
5048   }
5049 
5050   return false;
5051 }
5052 
5053 /// Common code to parse out a block of text (typically YAML) between start and
5054 /// end directives.
5055 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5056                                           const char *AssemblerDirectiveEnd,
5057                                           std::string &CollectString) {
5058 
5059   raw_string_ostream CollectStream(CollectString);
5060 
5061   getLexer().setSkipSpace(false);
5062 
5063   bool FoundEnd = false;
5064   while (!isToken(AsmToken::Eof)) {
5065     while (isToken(AsmToken::Space)) {
5066       CollectStream << getTokenStr();
5067       Lex();
5068     }
5069 
5070     if (trySkipId(AssemblerDirectiveEnd)) {
5071       FoundEnd = true;
5072       break;
5073     }
5074 
5075     CollectStream << Parser.parseStringToEndOfStatement()
5076                   << getContext().getAsmInfo()->getSeparatorString();
5077 
5078     Parser.eatToEndOfStatement();
5079   }
5080 
5081   getLexer().setSkipSpace(true);
5082 
5083   if (isToken(AsmToken::Eof) && !FoundEnd) {
5084     return TokError(Twine("expected directive ") +
5085                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5086   }
5087 
5088   CollectStream.flush();
5089   return false;
5090 }
5091 
5092 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5093 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5094   std::string String;
5095   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5096                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5097     return true;
5098 
5099   auto PALMetadata = getTargetStreamer().getPALMetadata();
5100   if (!PALMetadata->setFromString(String))
5101     return Error(getLoc(), "invalid PAL metadata");
5102   return false;
5103 }
5104 
5105 /// Parse the assembler directive for old linear-format PAL metadata.
5106 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5107   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5108     return Error(getLoc(),
5109                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5110                  "not available on non-amdpal OSes")).str());
5111   }
5112 
5113   auto PALMetadata = getTargetStreamer().getPALMetadata();
5114   PALMetadata->setLegacy();
5115   for (;;) {
5116     uint32_t Key, Value;
5117     if (ParseAsAbsoluteExpression(Key)) {
5118       return TokError(Twine("invalid value in ") +
5119                       Twine(PALMD::AssemblerDirective));
5120     }
5121     if (!trySkipToken(AsmToken::Comma)) {
5122       return TokError(Twine("expected an even number of values in ") +
5123                       Twine(PALMD::AssemblerDirective));
5124     }
5125     if (ParseAsAbsoluteExpression(Value)) {
5126       return TokError(Twine("invalid value in ") +
5127                       Twine(PALMD::AssemblerDirective));
5128     }
5129     PALMetadata->setRegister(Key, Value);
5130     if (!trySkipToken(AsmToken::Comma))
5131       break;
5132   }
5133   return false;
5134 }
5135 
5136 /// ParseDirectiveAMDGPULDS
5137 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5138 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5139   if (getParser().checkForValidSection())
5140     return true;
5141 
5142   StringRef Name;
5143   SMLoc NameLoc = getLoc();
5144   if (getParser().parseIdentifier(Name))
5145     return TokError("expected identifier in directive");
5146 
5147   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5148   if (parseToken(AsmToken::Comma, "expected ','"))
5149     return true;
5150 
5151   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5152 
5153   int64_t Size;
5154   SMLoc SizeLoc = getLoc();
5155   if (getParser().parseAbsoluteExpression(Size))
5156     return true;
5157   if (Size < 0)
5158     return Error(SizeLoc, "size must be non-negative");
5159   if (Size > LocalMemorySize)
5160     return Error(SizeLoc, "size is too large");
5161 
5162   int64_t Alignment = 4;
5163   if (trySkipToken(AsmToken::Comma)) {
5164     SMLoc AlignLoc = getLoc();
5165     if (getParser().parseAbsoluteExpression(Alignment))
5166       return true;
5167     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5168       return Error(AlignLoc, "alignment must be a power of two");
5169 
5170     // Alignment larger than the size of LDS is possible in theory, as long
5171     // as the linker manages to place to symbol at address 0, but we do want
5172     // to make sure the alignment fits nicely into a 32-bit integer.
5173     if (Alignment >= 1u << 31)
5174       return Error(AlignLoc, "alignment is too large");
5175   }
5176 
5177   if (parseToken(AsmToken::EndOfStatement,
5178                  "unexpected token in '.amdgpu_lds' directive"))
5179     return true;
5180 
5181   Symbol->redefineIfPossible();
5182   if (!Symbol->isUndefined())
5183     return Error(NameLoc, "invalid symbol redefinition");
5184 
5185   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5186   return false;
5187 }
5188 
5189 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5190   StringRef IDVal = DirectiveID.getString();
5191 
5192   if (isHsaAbiVersion3Or4(&getSTI())) {
5193     if (IDVal == ".amdhsa_kernel")
5194      return ParseDirectiveAMDHSAKernel();
5195 
5196     // TODO: Restructure/combine with PAL metadata directive.
5197     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5198       return ParseDirectiveHSAMetadata();
5199   } else {
5200     if (IDVal == ".hsa_code_object_version")
5201       return ParseDirectiveHSACodeObjectVersion();
5202 
5203     if (IDVal == ".hsa_code_object_isa")
5204       return ParseDirectiveHSACodeObjectISA();
5205 
5206     if (IDVal == ".amd_kernel_code_t")
5207       return ParseDirectiveAMDKernelCodeT();
5208 
5209     if (IDVal == ".amdgpu_hsa_kernel")
5210       return ParseDirectiveAMDGPUHsaKernel();
5211 
5212     if (IDVal == ".amd_amdgpu_isa")
5213       return ParseDirectiveISAVersion();
5214 
5215     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5216       return ParseDirectiveHSAMetadata();
5217   }
5218 
5219   if (IDVal == ".amdgcn_target")
5220     return ParseDirectiveAMDGCNTarget();
5221 
5222   if (IDVal == ".amdgpu_lds")
5223     return ParseDirectiveAMDGPULDS();
5224 
5225   if (IDVal == PALMD::AssemblerDirectiveBegin)
5226     return ParseDirectivePALMetadataBegin();
5227 
5228   if (IDVal == PALMD::AssemblerDirective)
5229     return ParseDirectivePALMetadata();
5230 
5231   return true;
5232 }
5233 
5234 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5235                                            unsigned RegNo) {
5236 
5237   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5238        R.isValid(); ++R) {
5239     if (*R == RegNo)
5240       return isGFX9Plus();
5241   }
5242 
5243   // GFX10 has 2 more SGPRs 104 and 105.
5244   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5245        R.isValid(); ++R) {
5246     if (*R == RegNo)
5247       return hasSGPR104_SGPR105();
5248   }
5249 
5250   switch (RegNo) {
5251   case AMDGPU::SRC_SHARED_BASE:
5252   case AMDGPU::SRC_SHARED_LIMIT:
5253   case AMDGPU::SRC_PRIVATE_BASE:
5254   case AMDGPU::SRC_PRIVATE_LIMIT:
5255   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5256     return isGFX9Plus();
5257   case AMDGPU::TBA:
5258   case AMDGPU::TBA_LO:
5259   case AMDGPU::TBA_HI:
5260   case AMDGPU::TMA:
5261   case AMDGPU::TMA_LO:
5262   case AMDGPU::TMA_HI:
5263     return !isGFX9Plus();
5264   case AMDGPU::XNACK_MASK:
5265   case AMDGPU::XNACK_MASK_LO:
5266   case AMDGPU::XNACK_MASK_HI:
5267     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5268   case AMDGPU::SGPR_NULL:
5269     return isGFX10Plus();
5270   default:
5271     break;
5272   }
5273 
5274   if (isCI())
5275     return true;
5276 
5277   if (isSI() || isGFX10Plus()) {
5278     // No flat_scr on SI.
5279     // On GFX10 flat scratch is not a valid register operand and can only be
5280     // accessed with s_setreg/s_getreg.
5281     switch (RegNo) {
5282     case AMDGPU::FLAT_SCR:
5283     case AMDGPU::FLAT_SCR_LO:
5284     case AMDGPU::FLAT_SCR_HI:
5285       return false;
5286     default:
5287       return true;
5288     }
5289   }
5290 
5291   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5292   // SI/CI have.
5293   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5294        R.isValid(); ++R) {
5295     if (*R == RegNo)
5296       return hasSGPR102_SGPR103();
5297   }
5298 
5299   return true;
5300 }
5301 
5302 OperandMatchResultTy
5303 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5304                               OperandMode Mode) {
5305   // Try to parse with a custom parser
5306   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5307 
5308   // If we successfully parsed the operand or if there as an error parsing,
5309   // we are done.
5310   //
5311   // If we are parsing after we reach EndOfStatement then this means we
5312   // are appending default values to the Operands list.  This is only done
5313   // by custom parser, so we shouldn't continue on to the generic parsing.
5314   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5315       isToken(AsmToken::EndOfStatement))
5316     return ResTy;
5317 
5318   SMLoc RBraceLoc;
5319   SMLoc LBraceLoc = getLoc();
5320   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5321     unsigned Prefix = Operands.size();
5322 
5323     for (;;) {
5324       auto Loc = getLoc();
5325       ResTy = parseReg(Operands);
5326       if (ResTy == MatchOperand_NoMatch)
5327         Error(Loc, "expected a register");
5328       if (ResTy != MatchOperand_Success)
5329         return MatchOperand_ParseFail;
5330 
5331       RBraceLoc = getLoc();
5332       if (trySkipToken(AsmToken::RBrac))
5333         break;
5334 
5335       if (!skipToken(AsmToken::Comma,
5336                      "expected a comma or a closing square bracket")) {
5337         return MatchOperand_ParseFail;
5338       }
5339     }
5340 
5341     if (Operands.size() - Prefix > 1) {
5342       Operands.insert(Operands.begin() + Prefix,
5343                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5344       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5345     }
5346 
5347     return MatchOperand_Success;
5348   }
5349 
5350   return parseRegOrImm(Operands);
5351 }
5352 
5353 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5354   // Clear any forced encodings from the previous instruction.
5355   setForcedEncodingSize(0);
5356   setForcedDPP(false);
5357   setForcedSDWA(false);
5358 
5359   if (Name.endswith("_e64")) {
5360     setForcedEncodingSize(64);
5361     return Name.substr(0, Name.size() - 4);
5362   } else if (Name.endswith("_e32")) {
5363     setForcedEncodingSize(32);
5364     return Name.substr(0, Name.size() - 4);
5365   } else if (Name.endswith("_dpp")) {
5366     setForcedDPP(true);
5367     return Name.substr(0, Name.size() - 4);
5368   } else if (Name.endswith("_sdwa")) {
5369     setForcedSDWA(true);
5370     return Name.substr(0, Name.size() - 5);
5371   }
5372   return Name;
5373 }
5374 
5375 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5376                                        StringRef Name,
5377                                        SMLoc NameLoc, OperandVector &Operands) {
5378   // Add the instruction mnemonic
5379   Name = parseMnemonicSuffix(Name);
5380   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5381 
5382   bool IsMIMG = Name.startswith("image_");
5383 
5384   while (!trySkipToken(AsmToken::EndOfStatement)) {
5385     OperandMode Mode = OperandMode_Default;
5386     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5387       Mode = OperandMode_NSA;
5388     CPolSeen = 0;
5389     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5390 
5391     if (Res != MatchOperand_Success) {
5392       checkUnsupportedInstruction(Name, NameLoc);
5393       if (!Parser.hasPendingError()) {
5394         // FIXME: use real operand location rather than the current location.
5395         StringRef Msg =
5396           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5397                                             "not a valid operand.";
5398         Error(getLoc(), Msg);
5399       }
5400       while (!trySkipToken(AsmToken::EndOfStatement)) {
5401         lex();
5402       }
5403       return true;
5404     }
5405 
5406     // Eat the comma or space if there is one.
5407     trySkipToken(AsmToken::Comma);
5408   }
5409 
5410   return false;
5411 }
5412 
5413 //===----------------------------------------------------------------------===//
5414 // Utility functions
5415 //===----------------------------------------------------------------------===//
5416 
5417 OperandMatchResultTy
5418 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5419 
5420   if (!trySkipId(Prefix, AsmToken::Colon))
5421     return MatchOperand_NoMatch;
5422 
5423   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5424 }
5425 
5426 OperandMatchResultTy
5427 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5428                                     AMDGPUOperand::ImmTy ImmTy,
5429                                     bool (*ConvertResult)(int64_t&)) {
5430   SMLoc S = getLoc();
5431   int64_t Value = 0;
5432 
5433   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5434   if (Res != MatchOperand_Success)
5435     return Res;
5436 
5437   if (ConvertResult && !ConvertResult(Value)) {
5438     Error(S, "invalid " + StringRef(Prefix) + " value.");
5439   }
5440 
5441   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5442   return MatchOperand_Success;
5443 }
5444 
5445 OperandMatchResultTy
5446 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5447                                              OperandVector &Operands,
5448                                              AMDGPUOperand::ImmTy ImmTy,
5449                                              bool (*ConvertResult)(int64_t&)) {
5450   SMLoc S = getLoc();
5451   if (!trySkipId(Prefix, AsmToken::Colon))
5452     return MatchOperand_NoMatch;
5453 
5454   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5455     return MatchOperand_ParseFail;
5456 
5457   unsigned Val = 0;
5458   const unsigned MaxSize = 4;
5459 
5460   // FIXME: How to verify the number of elements matches the number of src
5461   // operands?
5462   for (int I = 0; ; ++I) {
5463     int64_t Op;
5464     SMLoc Loc = getLoc();
5465     if (!parseExpr(Op))
5466       return MatchOperand_ParseFail;
5467 
5468     if (Op != 0 && Op != 1) {
5469       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5470       return MatchOperand_ParseFail;
5471     }
5472 
5473     Val |= (Op << I);
5474 
5475     if (trySkipToken(AsmToken::RBrac))
5476       break;
5477 
5478     if (I + 1 == MaxSize) {
5479       Error(getLoc(), "expected a closing square bracket");
5480       return MatchOperand_ParseFail;
5481     }
5482 
5483     if (!skipToken(AsmToken::Comma, "expected a comma"))
5484       return MatchOperand_ParseFail;
5485   }
5486 
5487   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5488   return MatchOperand_Success;
5489 }
5490 
5491 OperandMatchResultTy
5492 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5493                                AMDGPUOperand::ImmTy ImmTy) {
5494   int64_t Bit;
5495   SMLoc S = getLoc();
5496 
5497   if (trySkipId(Name)) {
5498     Bit = 1;
5499   } else if (trySkipId("no", Name)) {
5500     Bit = 0;
5501   } else {
5502     return MatchOperand_NoMatch;
5503   }
5504 
5505   if (Name == "r128" && !hasMIMG_R128()) {
5506     Error(S, "r128 modifier is not supported on this GPU");
5507     return MatchOperand_ParseFail;
5508   }
5509   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5510     Error(S, "a16 modifier is not supported on this GPU");
5511     return MatchOperand_ParseFail;
5512   }
5513 
5514   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5515     ImmTy = AMDGPUOperand::ImmTyR128A16;
5516 
5517   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5518   return MatchOperand_Success;
5519 }
5520 
5521 OperandMatchResultTy
5522 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5523   unsigned CPolOn = 0;
5524   unsigned CPolOff = 0;
5525   SMLoc S = getLoc();
5526 
5527   if (trySkipId("glc"))
5528     CPolOn = AMDGPU::CPol::GLC;
5529   else if (trySkipId("noglc"))
5530     CPolOff = AMDGPU::CPol::GLC;
5531   else if (trySkipId("slc"))
5532     CPolOn = AMDGPU::CPol::SLC;
5533   else if (trySkipId("noslc"))
5534     CPolOff = AMDGPU::CPol::SLC;
5535   else if (trySkipId("dlc"))
5536     CPolOn = AMDGPU::CPol::DLC;
5537   else if (trySkipId("nodlc"))
5538     CPolOff = AMDGPU::CPol::DLC;
5539   else if (trySkipId("scc"))
5540     CPolOn = AMDGPU::CPol::SCC;
5541   else if (trySkipId("noscc"))
5542     CPolOff = AMDGPU::CPol::SCC;
5543   else
5544     return MatchOperand_NoMatch;
5545 
5546   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5547     Error(S, "dlc modifier is not supported on this GPU");
5548     return MatchOperand_ParseFail;
5549   }
5550 
5551   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5552     Error(S, "scc modifier is not supported on this GPU");
5553     return MatchOperand_ParseFail;
5554   }
5555 
5556   if (CPolSeen & (CPolOn | CPolOff)) {
5557     Error(S, "duplicate cache policy modifier");
5558     return MatchOperand_ParseFail;
5559   }
5560 
5561   CPolSeen |= (CPolOn | CPolOff);
5562 
5563   for (unsigned I = 1; I != Operands.size(); ++I) {
5564     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5565     if (Op.isCPol()) {
5566       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5567       return MatchOperand_Success;
5568     }
5569   }
5570 
5571   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5572                                               AMDGPUOperand::ImmTyCPol));
5573 
5574   return MatchOperand_Success;
5575 }
5576 
5577 static void addOptionalImmOperand(
5578   MCInst& Inst, const OperandVector& Operands,
5579   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5580   AMDGPUOperand::ImmTy ImmT,
5581   int64_t Default = 0) {
5582   auto i = OptionalIdx.find(ImmT);
5583   if (i != OptionalIdx.end()) {
5584     unsigned Idx = i->second;
5585     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5586   } else {
5587     Inst.addOperand(MCOperand::createImm(Default));
5588   }
5589 }
5590 
5591 OperandMatchResultTy
5592 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5593                                        StringRef &Value,
5594                                        SMLoc &StringLoc) {
5595   if (!trySkipId(Prefix, AsmToken::Colon))
5596     return MatchOperand_NoMatch;
5597 
5598   StringLoc = getLoc();
5599   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5600                                                   : MatchOperand_ParseFail;
5601 }
5602 
5603 //===----------------------------------------------------------------------===//
5604 // MTBUF format
5605 //===----------------------------------------------------------------------===//
5606 
5607 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5608                                   int64_t MaxVal,
5609                                   int64_t &Fmt) {
5610   int64_t Val;
5611   SMLoc Loc = getLoc();
5612 
5613   auto Res = parseIntWithPrefix(Pref, Val);
5614   if (Res == MatchOperand_ParseFail)
5615     return false;
5616   if (Res == MatchOperand_NoMatch)
5617     return true;
5618 
5619   if (Val < 0 || Val > MaxVal) {
5620     Error(Loc, Twine("out of range ", StringRef(Pref)));
5621     return false;
5622   }
5623 
5624   Fmt = Val;
5625   return true;
5626 }
5627 
5628 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5629 // values to live in a joint format operand in the MCInst encoding.
5630 OperandMatchResultTy
5631 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5632   using namespace llvm::AMDGPU::MTBUFFormat;
5633 
5634   int64_t Dfmt = DFMT_UNDEF;
5635   int64_t Nfmt = NFMT_UNDEF;
5636 
5637   // dfmt and nfmt can appear in either order, and each is optional.
5638   for (int I = 0; I < 2; ++I) {
5639     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5640       return MatchOperand_ParseFail;
5641 
5642     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5643       return MatchOperand_ParseFail;
5644     }
5645     // Skip optional comma between dfmt/nfmt
5646     // but guard against 2 commas following each other.
5647     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5648         !peekToken().is(AsmToken::Comma)) {
5649       trySkipToken(AsmToken::Comma);
5650     }
5651   }
5652 
5653   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5654     return MatchOperand_NoMatch;
5655 
5656   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5657   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5658 
5659   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5660   return MatchOperand_Success;
5661 }
5662 
5663 OperandMatchResultTy
5664 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5665   using namespace llvm::AMDGPU::MTBUFFormat;
5666 
5667   int64_t Fmt = UFMT_UNDEF;
5668 
5669   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5670     return MatchOperand_ParseFail;
5671 
5672   if (Fmt == UFMT_UNDEF)
5673     return MatchOperand_NoMatch;
5674 
5675   Format = Fmt;
5676   return MatchOperand_Success;
5677 }
5678 
5679 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5680                                     int64_t &Nfmt,
5681                                     StringRef FormatStr,
5682                                     SMLoc Loc) {
5683   using namespace llvm::AMDGPU::MTBUFFormat;
5684   int64_t Format;
5685 
5686   Format = getDfmt(FormatStr);
5687   if (Format != DFMT_UNDEF) {
5688     Dfmt = Format;
5689     return true;
5690   }
5691 
5692   Format = getNfmt(FormatStr, getSTI());
5693   if (Format != NFMT_UNDEF) {
5694     Nfmt = Format;
5695     return true;
5696   }
5697 
5698   Error(Loc, "unsupported format");
5699   return false;
5700 }
5701 
5702 OperandMatchResultTy
5703 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5704                                           SMLoc FormatLoc,
5705                                           int64_t &Format) {
5706   using namespace llvm::AMDGPU::MTBUFFormat;
5707 
5708   int64_t Dfmt = DFMT_UNDEF;
5709   int64_t Nfmt = NFMT_UNDEF;
5710   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5711     return MatchOperand_ParseFail;
5712 
5713   if (trySkipToken(AsmToken::Comma)) {
5714     StringRef Str;
5715     SMLoc Loc = getLoc();
5716     if (!parseId(Str, "expected a format string") ||
5717         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5718       return MatchOperand_ParseFail;
5719     }
5720     if (Dfmt == DFMT_UNDEF) {
5721       Error(Loc, "duplicate numeric format");
5722       return MatchOperand_ParseFail;
5723     } else if (Nfmt == NFMT_UNDEF) {
5724       Error(Loc, "duplicate data format");
5725       return MatchOperand_ParseFail;
5726     }
5727   }
5728 
5729   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5730   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5731 
5732   if (isGFX10Plus()) {
5733     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5734     if (Ufmt == UFMT_UNDEF) {
5735       Error(FormatLoc, "unsupported format");
5736       return MatchOperand_ParseFail;
5737     }
5738     Format = Ufmt;
5739   } else {
5740     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5741   }
5742 
5743   return MatchOperand_Success;
5744 }
5745 
5746 OperandMatchResultTy
5747 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5748                                             SMLoc Loc,
5749                                             int64_t &Format) {
5750   using namespace llvm::AMDGPU::MTBUFFormat;
5751 
5752   auto Id = getUnifiedFormat(FormatStr);
5753   if (Id == UFMT_UNDEF)
5754     return MatchOperand_NoMatch;
5755 
5756   if (!isGFX10Plus()) {
5757     Error(Loc, "unified format is not supported on this GPU");
5758     return MatchOperand_ParseFail;
5759   }
5760 
5761   Format = Id;
5762   return MatchOperand_Success;
5763 }
5764 
5765 OperandMatchResultTy
5766 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5767   using namespace llvm::AMDGPU::MTBUFFormat;
5768   SMLoc Loc = getLoc();
5769 
5770   if (!parseExpr(Format))
5771     return MatchOperand_ParseFail;
5772   if (!isValidFormatEncoding(Format, getSTI())) {
5773     Error(Loc, "out of range format");
5774     return MatchOperand_ParseFail;
5775   }
5776 
5777   return MatchOperand_Success;
5778 }
5779 
5780 OperandMatchResultTy
5781 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5782   using namespace llvm::AMDGPU::MTBUFFormat;
5783 
5784   if (!trySkipId("format", AsmToken::Colon))
5785     return MatchOperand_NoMatch;
5786 
5787   if (trySkipToken(AsmToken::LBrac)) {
5788     StringRef FormatStr;
5789     SMLoc Loc = getLoc();
5790     if (!parseId(FormatStr, "expected a format string"))
5791       return MatchOperand_ParseFail;
5792 
5793     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5794     if (Res == MatchOperand_NoMatch)
5795       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5796     if (Res != MatchOperand_Success)
5797       return Res;
5798 
5799     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5800       return MatchOperand_ParseFail;
5801 
5802     return MatchOperand_Success;
5803   }
5804 
5805   return parseNumericFormat(Format);
5806 }
5807 
5808 OperandMatchResultTy
5809 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5810   using namespace llvm::AMDGPU::MTBUFFormat;
5811 
5812   int64_t Format = getDefaultFormatEncoding(getSTI());
5813   OperandMatchResultTy Res;
5814   SMLoc Loc = getLoc();
5815 
5816   // Parse legacy format syntax.
5817   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5818   if (Res == MatchOperand_ParseFail)
5819     return Res;
5820 
5821   bool FormatFound = (Res == MatchOperand_Success);
5822 
5823   Operands.push_back(
5824     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5825 
5826   if (FormatFound)
5827     trySkipToken(AsmToken::Comma);
5828 
5829   if (isToken(AsmToken::EndOfStatement)) {
5830     // We are expecting an soffset operand,
5831     // but let matcher handle the error.
5832     return MatchOperand_Success;
5833   }
5834 
5835   // Parse soffset.
5836   Res = parseRegOrImm(Operands);
5837   if (Res != MatchOperand_Success)
5838     return Res;
5839 
5840   trySkipToken(AsmToken::Comma);
5841 
5842   if (!FormatFound) {
5843     Res = parseSymbolicOrNumericFormat(Format);
5844     if (Res == MatchOperand_ParseFail)
5845       return Res;
5846     if (Res == MatchOperand_Success) {
5847       auto Size = Operands.size();
5848       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5849       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5850       Op.setImm(Format);
5851     }
5852     return MatchOperand_Success;
5853   }
5854 
5855   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5856     Error(getLoc(), "duplicate format");
5857     return MatchOperand_ParseFail;
5858   }
5859   return MatchOperand_Success;
5860 }
5861 
5862 //===----------------------------------------------------------------------===//
5863 // ds
5864 //===----------------------------------------------------------------------===//
5865 
5866 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5867                                     const OperandVector &Operands) {
5868   OptionalImmIndexMap OptionalIdx;
5869 
5870   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5871     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5872 
5873     // Add the register arguments
5874     if (Op.isReg()) {
5875       Op.addRegOperands(Inst, 1);
5876       continue;
5877     }
5878 
5879     // Handle optional arguments
5880     OptionalIdx[Op.getImmTy()] = i;
5881   }
5882 
5883   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5884   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5885   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5886 
5887   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5888 }
5889 
5890 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5891                                 bool IsGdsHardcoded) {
5892   OptionalImmIndexMap OptionalIdx;
5893 
5894   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5895     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5896 
5897     // Add the register arguments
5898     if (Op.isReg()) {
5899       Op.addRegOperands(Inst, 1);
5900       continue;
5901     }
5902 
5903     if (Op.isToken() && Op.getToken() == "gds") {
5904       IsGdsHardcoded = true;
5905       continue;
5906     }
5907 
5908     // Handle optional arguments
5909     OptionalIdx[Op.getImmTy()] = i;
5910   }
5911 
5912   AMDGPUOperand::ImmTy OffsetType =
5913     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5914      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5915      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5916                                                       AMDGPUOperand::ImmTyOffset;
5917 
5918   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5919 
5920   if (!IsGdsHardcoded) {
5921     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5922   }
5923   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5924 }
5925 
5926 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5927   OptionalImmIndexMap OptionalIdx;
5928 
5929   unsigned OperandIdx[4];
5930   unsigned EnMask = 0;
5931   int SrcIdx = 0;
5932 
5933   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5934     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5935 
5936     // Add the register arguments
5937     if (Op.isReg()) {
5938       assert(SrcIdx < 4);
5939       OperandIdx[SrcIdx] = Inst.size();
5940       Op.addRegOperands(Inst, 1);
5941       ++SrcIdx;
5942       continue;
5943     }
5944 
5945     if (Op.isOff()) {
5946       assert(SrcIdx < 4);
5947       OperandIdx[SrcIdx] = Inst.size();
5948       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5949       ++SrcIdx;
5950       continue;
5951     }
5952 
5953     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5954       Op.addImmOperands(Inst, 1);
5955       continue;
5956     }
5957 
5958     if (Op.isToken() && Op.getToken() == "done")
5959       continue;
5960 
5961     // Handle optional arguments
5962     OptionalIdx[Op.getImmTy()] = i;
5963   }
5964 
5965   assert(SrcIdx == 4);
5966 
5967   bool Compr = false;
5968   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5969     Compr = true;
5970     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5971     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5972     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5973   }
5974 
5975   for (auto i = 0; i < SrcIdx; ++i) {
5976     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5977       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5978     }
5979   }
5980 
5981   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5982   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5983 
5984   Inst.addOperand(MCOperand::createImm(EnMask));
5985 }
5986 
5987 //===----------------------------------------------------------------------===//
5988 // s_waitcnt
5989 //===----------------------------------------------------------------------===//
5990 
5991 static bool
5992 encodeCnt(
5993   const AMDGPU::IsaVersion ISA,
5994   int64_t &IntVal,
5995   int64_t CntVal,
5996   bool Saturate,
5997   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5998   unsigned (*decode)(const IsaVersion &Version, unsigned))
5999 {
6000   bool Failed = false;
6001 
6002   IntVal = encode(ISA, IntVal, CntVal);
6003   if (CntVal != decode(ISA, IntVal)) {
6004     if (Saturate) {
6005       IntVal = encode(ISA, IntVal, -1);
6006     } else {
6007       Failed = true;
6008     }
6009   }
6010   return Failed;
6011 }
6012 
6013 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6014 
6015   SMLoc CntLoc = getLoc();
6016   StringRef CntName = getTokenStr();
6017 
6018   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6019       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6020     return false;
6021 
6022   int64_t CntVal;
6023   SMLoc ValLoc = getLoc();
6024   if (!parseExpr(CntVal))
6025     return false;
6026 
6027   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6028 
6029   bool Failed = true;
6030   bool Sat = CntName.endswith("_sat");
6031 
6032   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6033     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6034   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6035     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6036   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6037     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6038   } else {
6039     Error(CntLoc, "invalid counter name " + CntName);
6040     return false;
6041   }
6042 
6043   if (Failed) {
6044     Error(ValLoc, "too large value for " + CntName);
6045     return false;
6046   }
6047 
6048   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6049     return false;
6050 
6051   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6052     if (isToken(AsmToken::EndOfStatement)) {
6053       Error(getLoc(), "expected a counter name");
6054       return false;
6055     }
6056   }
6057 
6058   return true;
6059 }
6060 
6061 OperandMatchResultTy
6062 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6063   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6064   int64_t Waitcnt = getWaitcntBitMask(ISA);
6065   SMLoc S = getLoc();
6066 
6067   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6068     while (!isToken(AsmToken::EndOfStatement)) {
6069       if (!parseCnt(Waitcnt))
6070         return MatchOperand_ParseFail;
6071     }
6072   } else {
6073     if (!parseExpr(Waitcnt))
6074       return MatchOperand_ParseFail;
6075   }
6076 
6077   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6078   return MatchOperand_Success;
6079 }
6080 
6081 bool
6082 AMDGPUOperand::isSWaitCnt() const {
6083   return isImm();
6084 }
6085 
6086 //===----------------------------------------------------------------------===//
6087 // hwreg
6088 //===----------------------------------------------------------------------===//
6089 
6090 bool
6091 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6092                                 OperandInfoTy &Offset,
6093                                 OperandInfoTy &Width) {
6094   using namespace llvm::AMDGPU::Hwreg;
6095 
6096   // The register may be specified by name or using a numeric code
6097   HwReg.Loc = getLoc();
6098   if (isToken(AsmToken::Identifier) &&
6099       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6100     HwReg.IsSymbolic = true;
6101     lex(); // skip register name
6102   } else if (!parseExpr(HwReg.Id, "a register name")) {
6103     return false;
6104   }
6105 
6106   if (trySkipToken(AsmToken::RParen))
6107     return true;
6108 
6109   // parse optional params
6110   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6111     return false;
6112 
6113   Offset.Loc = getLoc();
6114   if (!parseExpr(Offset.Id))
6115     return false;
6116 
6117   if (!skipToken(AsmToken::Comma, "expected a comma"))
6118     return false;
6119 
6120   Width.Loc = getLoc();
6121   return parseExpr(Width.Id) &&
6122          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6123 }
6124 
6125 bool
6126 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6127                                const OperandInfoTy &Offset,
6128                                const OperandInfoTy &Width) {
6129 
6130   using namespace llvm::AMDGPU::Hwreg;
6131 
6132   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6133     Error(HwReg.Loc,
6134           "specified hardware register is not supported on this GPU");
6135     return false;
6136   }
6137   if (!isValidHwreg(HwReg.Id)) {
6138     Error(HwReg.Loc,
6139           "invalid code of hardware register: only 6-bit values are legal");
6140     return false;
6141   }
6142   if (!isValidHwregOffset(Offset.Id)) {
6143     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6144     return false;
6145   }
6146   if (!isValidHwregWidth(Width.Id)) {
6147     Error(Width.Loc,
6148           "invalid bitfield width: only values from 1 to 32 are legal");
6149     return false;
6150   }
6151   return true;
6152 }
6153 
6154 OperandMatchResultTy
6155 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6156   using namespace llvm::AMDGPU::Hwreg;
6157 
6158   int64_t ImmVal = 0;
6159   SMLoc Loc = getLoc();
6160 
6161   if (trySkipId("hwreg", AsmToken::LParen)) {
6162     OperandInfoTy HwReg(ID_UNKNOWN_);
6163     OperandInfoTy Offset(OFFSET_DEFAULT_);
6164     OperandInfoTy Width(WIDTH_DEFAULT_);
6165     if (parseHwregBody(HwReg, Offset, Width) &&
6166         validateHwreg(HwReg, Offset, Width)) {
6167       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6168     } else {
6169       return MatchOperand_ParseFail;
6170     }
6171   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6172     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6173       Error(Loc, "invalid immediate: only 16-bit values are legal");
6174       return MatchOperand_ParseFail;
6175     }
6176   } else {
6177     return MatchOperand_ParseFail;
6178   }
6179 
6180   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6181   return MatchOperand_Success;
6182 }
6183 
6184 bool AMDGPUOperand::isHwreg() const {
6185   return isImmTy(ImmTyHwreg);
6186 }
6187 
6188 //===----------------------------------------------------------------------===//
6189 // sendmsg
6190 //===----------------------------------------------------------------------===//
6191 
6192 bool
6193 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6194                                   OperandInfoTy &Op,
6195                                   OperandInfoTy &Stream) {
6196   using namespace llvm::AMDGPU::SendMsg;
6197 
6198   Msg.Loc = getLoc();
6199   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6200     Msg.IsSymbolic = true;
6201     lex(); // skip message name
6202   } else if (!parseExpr(Msg.Id, "a message name")) {
6203     return false;
6204   }
6205 
6206   if (trySkipToken(AsmToken::Comma)) {
6207     Op.IsDefined = true;
6208     Op.Loc = getLoc();
6209     if (isToken(AsmToken::Identifier) &&
6210         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6211       lex(); // skip operation name
6212     } else if (!parseExpr(Op.Id, "an operation name")) {
6213       return false;
6214     }
6215 
6216     if (trySkipToken(AsmToken::Comma)) {
6217       Stream.IsDefined = true;
6218       Stream.Loc = getLoc();
6219       if (!parseExpr(Stream.Id))
6220         return false;
6221     }
6222   }
6223 
6224   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6225 }
6226 
6227 bool
6228 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6229                                  const OperandInfoTy &Op,
6230                                  const OperandInfoTy &Stream) {
6231   using namespace llvm::AMDGPU::SendMsg;
6232 
6233   // Validation strictness depends on whether message is specified
6234   // in a symbolc or in a numeric form. In the latter case
6235   // only encoding possibility is checked.
6236   bool Strict = Msg.IsSymbolic;
6237 
6238   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6239     Error(Msg.Loc, "invalid message id");
6240     return false;
6241   }
6242   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6243     if (Op.IsDefined) {
6244       Error(Op.Loc, "message does not support operations");
6245     } else {
6246       Error(Msg.Loc, "missing message operation");
6247     }
6248     return false;
6249   }
6250   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6251     Error(Op.Loc, "invalid operation id");
6252     return false;
6253   }
6254   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6255     Error(Stream.Loc, "message operation does not support streams");
6256     return false;
6257   }
6258   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6259     Error(Stream.Loc, "invalid message stream id");
6260     return false;
6261   }
6262   return true;
6263 }
6264 
6265 OperandMatchResultTy
6266 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6267   using namespace llvm::AMDGPU::SendMsg;
6268 
6269   int64_t ImmVal = 0;
6270   SMLoc Loc = getLoc();
6271 
6272   if (trySkipId("sendmsg", AsmToken::LParen)) {
6273     OperandInfoTy Msg(ID_UNKNOWN_);
6274     OperandInfoTy Op(OP_NONE_);
6275     OperandInfoTy Stream(STREAM_ID_NONE_);
6276     if (parseSendMsgBody(Msg, Op, Stream) &&
6277         validateSendMsg(Msg, Op, Stream)) {
6278       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6279     } else {
6280       return MatchOperand_ParseFail;
6281     }
6282   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6283     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6284       Error(Loc, "invalid immediate: only 16-bit values are legal");
6285       return MatchOperand_ParseFail;
6286     }
6287   } else {
6288     return MatchOperand_ParseFail;
6289   }
6290 
6291   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6292   return MatchOperand_Success;
6293 }
6294 
6295 bool AMDGPUOperand::isSendMsg() const {
6296   return isImmTy(ImmTySendMsg);
6297 }
6298 
6299 //===----------------------------------------------------------------------===//
6300 // v_interp
6301 //===----------------------------------------------------------------------===//
6302 
6303 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6304   StringRef Str;
6305   SMLoc S = getLoc();
6306 
6307   if (!parseId(Str))
6308     return MatchOperand_NoMatch;
6309 
6310   int Slot = StringSwitch<int>(Str)
6311     .Case("p10", 0)
6312     .Case("p20", 1)
6313     .Case("p0", 2)
6314     .Default(-1);
6315 
6316   if (Slot == -1) {
6317     Error(S, "invalid interpolation slot");
6318     return MatchOperand_ParseFail;
6319   }
6320 
6321   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6322                                               AMDGPUOperand::ImmTyInterpSlot));
6323   return MatchOperand_Success;
6324 }
6325 
6326 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6327   StringRef Str;
6328   SMLoc S = getLoc();
6329 
6330   if (!parseId(Str))
6331     return MatchOperand_NoMatch;
6332 
6333   if (!Str.startswith("attr")) {
6334     Error(S, "invalid interpolation attribute");
6335     return MatchOperand_ParseFail;
6336   }
6337 
6338   StringRef Chan = Str.take_back(2);
6339   int AttrChan = StringSwitch<int>(Chan)
6340     .Case(".x", 0)
6341     .Case(".y", 1)
6342     .Case(".z", 2)
6343     .Case(".w", 3)
6344     .Default(-1);
6345   if (AttrChan == -1) {
6346     Error(S, "invalid or missing interpolation attribute channel");
6347     return MatchOperand_ParseFail;
6348   }
6349 
6350   Str = Str.drop_back(2).drop_front(4);
6351 
6352   uint8_t Attr;
6353   if (Str.getAsInteger(10, Attr)) {
6354     Error(S, "invalid or missing interpolation attribute number");
6355     return MatchOperand_ParseFail;
6356   }
6357 
6358   if (Attr > 63) {
6359     Error(S, "out of bounds interpolation attribute number");
6360     return MatchOperand_ParseFail;
6361   }
6362 
6363   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6364 
6365   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6366                                               AMDGPUOperand::ImmTyInterpAttr));
6367   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6368                                               AMDGPUOperand::ImmTyAttrChan));
6369   return MatchOperand_Success;
6370 }
6371 
6372 //===----------------------------------------------------------------------===//
6373 // exp
6374 //===----------------------------------------------------------------------===//
6375 
6376 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6377   using namespace llvm::AMDGPU::Exp;
6378 
6379   StringRef Str;
6380   SMLoc S = getLoc();
6381 
6382   if (!parseId(Str))
6383     return MatchOperand_NoMatch;
6384 
6385   unsigned Id = getTgtId(Str);
6386   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6387     Error(S, (Id == ET_INVALID) ?
6388                 "invalid exp target" :
6389                 "exp target is not supported on this GPU");
6390     return MatchOperand_ParseFail;
6391   }
6392 
6393   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6394                                               AMDGPUOperand::ImmTyExpTgt));
6395   return MatchOperand_Success;
6396 }
6397 
6398 //===----------------------------------------------------------------------===//
6399 // parser helpers
6400 //===----------------------------------------------------------------------===//
6401 
6402 bool
6403 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6404   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6405 }
6406 
6407 bool
6408 AMDGPUAsmParser::isId(const StringRef Id) const {
6409   return isId(getToken(), Id);
6410 }
6411 
6412 bool
6413 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6414   return getTokenKind() == Kind;
6415 }
6416 
6417 bool
6418 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6419   if (isId(Id)) {
6420     lex();
6421     return true;
6422   }
6423   return false;
6424 }
6425 
6426 bool
6427 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6428   if (isToken(AsmToken::Identifier)) {
6429     StringRef Tok = getTokenStr();
6430     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6431       lex();
6432       return true;
6433     }
6434   }
6435   return false;
6436 }
6437 
6438 bool
6439 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6440   if (isId(Id) && peekToken().is(Kind)) {
6441     lex();
6442     lex();
6443     return true;
6444   }
6445   return false;
6446 }
6447 
6448 bool
6449 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6450   if (isToken(Kind)) {
6451     lex();
6452     return true;
6453   }
6454   return false;
6455 }
6456 
6457 bool
6458 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6459                            const StringRef ErrMsg) {
6460   if (!trySkipToken(Kind)) {
6461     Error(getLoc(), ErrMsg);
6462     return false;
6463   }
6464   return true;
6465 }
6466 
6467 bool
6468 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6469   SMLoc S = getLoc();
6470 
6471   const MCExpr *Expr;
6472   if (Parser.parseExpression(Expr))
6473     return false;
6474 
6475   if (Expr->evaluateAsAbsolute(Imm))
6476     return true;
6477 
6478   if (Expected.empty()) {
6479     Error(S, "expected absolute expression");
6480   } else {
6481     Error(S, Twine("expected ", Expected) +
6482              Twine(" or an absolute expression"));
6483   }
6484   return false;
6485 }
6486 
6487 bool
6488 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6489   SMLoc S = getLoc();
6490 
6491   const MCExpr *Expr;
6492   if (Parser.parseExpression(Expr))
6493     return false;
6494 
6495   int64_t IntVal;
6496   if (Expr->evaluateAsAbsolute(IntVal)) {
6497     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6498   } else {
6499     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6500   }
6501   return true;
6502 }
6503 
6504 bool
6505 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6506   if (isToken(AsmToken::String)) {
6507     Val = getToken().getStringContents();
6508     lex();
6509     return true;
6510   } else {
6511     Error(getLoc(), ErrMsg);
6512     return false;
6513   }
6514 }
6515 
6516 bool
6517 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6518   if (isToken(AsmToken::Identifier)) {
6519     Val = getTokenStr();
6520     lex();
6521     return true;
6522   } else {
6523     if (!ErrMsg.empty())
6524       Error(getLoc(), ErrMsg);
6525     return false;
6526   }
6527 }
6528 
6529 AsmToken
6530 AMDGPUAsmParser::getToken() const {
6531   return Parser.getTok();
6532 }
6533 
6534 AsmToken
6535 AMDGPUAsmParser::peekToken() {
6536   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6537 }
6538 
6539 void
6540 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6541   auto TokCount = getLexer().peekTokens(Tokens);
6542 
6543   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6544     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6545 }
6546 
6547 AsmToken::TokenKind
6548 AMDGPUAsmParser::getTokenKind() const {
6549   return getLexer().getKind();
6550 }
6551 
6552 SMLoc
6553 AMDGPUAsmParser::getLoc() const {
6554   return getToken().getLoc();
6555 }
6556 
6557 StringRef
6558 AMDGPUAsmParser::getTokenStr() const {
6559   return getToken().getString();
6560 }
6561 
6562 void
6563 AMDGPUAsmParser::lex() {
6564   Parser.Lex();
6565 }
6566 
6567 SMLoc
6568 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6569                                const OperandVector &Operands) const {
6570   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6571     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6572     if (Test(Op))
6573       return Op.getStartLoc();
6574   }
6575   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6576 }
6577 
6578 SMLoc
6579 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6580                            const OperandVector &Operands) const {
6581   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6582   return getOperandLoc(Test, Operands);
6583 }
6584 
6585 SMLoc
6586 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6587                            const OperandVector &Operands) const {
6588   auto Test = [=](const AMDGPUOperand& Op) {
6589     return Op.isRegKind() && Op.getReg() == Reg;
6590   };
6591   return getOperandLoc(Test, Operands);
6592 }
6593 
6594 SMLoc
6595 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6596   auto Test = [](const AMDGPUOperand& Op) {
6597     return Op.IsImmKindLiteral() || Op.isExpr();
6598   };
6599   return getOperandLoc(Test, Operands);
6600 }
6601 
6602 SMLoc
6603 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6604   auto Test = [](const AMDGPUOperand& Op) {
6605     return Op.isImmKindConst();
6606   };
6607   return getOperandLoc(Test, Operands);
6608 }
6609 
6610 //===----------------------------------------------------------------------===//
6611 // swizzle
6612 //===----------------------------------------------------------------------===//
6613 
6614 LLVM_READNONE
6615 static unsigned
6616 encodeBitmaskPerm(const unsigned AndMask,
6617                   const unsigned OrMask,
6618                   const unsigned XorMask) {
6619   using namespace llvm::AMDGPU::Swizzle;
6620 
6621   return BITMASK_PERM_ENC |
6622          (AndMask << BITMASK_AND_SHIFT) |
6623          (OrMask  << BITMASK_OR_SHIFT)  |
6624          (XorMask << BITMASK_XOR_SHIFT);
6625 }
6626 
6627 bool
6628 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6629                                      const unsigned MinVal,
6630                                      const unsigned MaxVal,
6631                                      const StringRef ErrMsg,
6632                                      SMLoc &Loc) {
6633   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6634     return false;
6635   }
6636   Loc = getLoc();
6637   if (!parseExpr(Op)) {
6638     return false;
6639   }
6640   if (Op < MinVal || Op > MaxVal) {
6641     Error(Loc, ErrMsg);
6642     return false;
6643   }
6644 
6645   return true;
6646 }
6647 
6648 bool
6649 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6650                                       const unsigned MinVal,
6651                                       const unsigned MaxVal,
6652                                       const StringRef ErrMsg) {
6653   SMLoc Loc;
6654   for (unsigned i = 0; i < OpNum; ++i) {
6655     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6656       return false;
6657   }
6658 
6659   return true;
6660 }
6661 
6662 bool
6663 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6664   using namespace llvm::AMDGPU::Swizzle;
6665 
6666   int64_t Lane[LANE_NUM];
6667   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6668                            "expected a 2-bit lane id")) {
6669     Imm = QUAD_PERM_ENC;
6670     for (unsigned I = 0; I < LANE_NUM; ++I) {
6671       Imm |= Lane[I] << (LANE_SHIFT * I);
6672     }
6673     return true;
6674   }
6675   return false;
6676 }
6677 
6678 bool
6679 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6680   using namespace llvm::AMDGPU::Swizzle;
6681 
6682   SMLoc Loc;
6683   int64_t GroupSize;
6684   int64_t LaneIdx;
6685 
6686   if (!parseSwizzleOperand(GroupSize,
6687                            2, 32,
6688                            "group size must be in the interval [2,32]",
6689                            Loc)) {
6690     return false;
6691   }
6692   if (!isPowerOf2_64(GroupSize)) {
6693     Error(Loc, "group size must be a power of two");
6694     return false;
6695   }
6696   if (parseSwizzleOperand(LaneIdx,
6697                           0, GroupSize - 1,
6698                           "lane id must be in the interval [0,group size - 1]",
6699                           Loc)) {
6700     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6701     return true;
6702   }
6703   return false;
6704 }
6705 
6706 bool
6707 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6708   using namespace llvm::AMDGPU::Swizzle;
6709 
6710   SMLoc Loc;
6711   int64_t GroupSize;
6712 
6713   if (!parseSwizzleOperand(GroupSize,
6714                            2, 32,
6715                            "group size must be in the interval [2,32]",
6716                            Loc)) {
6717     return false;
6718   }
6719   if (!isPowerOf2_64(GroupSize)) {
6720     Error(Loc, "group size must be a power of two");
6721     return false;
6722   }
6723 
6724   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6725   return true;
6726 }
6727 
6728 bool
6729 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6730   using namespace llvm::AMDGPU::Swizzle;
6731 
6732   SMLoc Loc;
6733   int64_t GroupSize;
6734 
6735   if (!parseSwizzleOperand(GroupSize,
6736                            1, 16,
6737                            "group size must be in the interval [1,16]",
6738                            Loc)) {
6739     return false;
6740   }
6741   if (!isPowerOf2_64(GroupSize)) {
6742     Error(Loc, "group size must be a power of two");
6743     return false;
6744   }
6745 
6746   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6747   return true;
6748 }
6749 
6750 bool
6751 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6752   using namespace llvm::AMDGPU::Swizzle;
6753 
6754   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6755     return false;
6756   }
6757 
6758   StringRef Ctl;
6759   SMLoc StrLoc = getLoc();
6760   if (!parseString(Ctl)) {
6761     return false;
6762   }
6763   if (Ctl.size() != BITMASK_WIDTH) {
6764     Error(StrLoc, "expected a 5-character mask");
6765     return false;
6766   }
6767 
6768   unsigned AndMask = 0;
6769   unsigned OrMask = 0;
6770   unsigned XorMask = 0;
6771 
6772   for (size_t i = 0; i < Ctl.size(); ++i) {
6773     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6774     switch(Ctl[i]) {
6775     default:
6776       Error(StrLoc, "invalid mask");
6777       return false;
6778     case '0':
6779       break;
6780     case '1':
6781       OrMask |= Mask;
6782       break;
6783     case 'p':
6784       AndMask |= Mask;
6785       break;
6786     case 'i':
6787       AndMask |= Mask;
6788       XorMask |= Mask;
6789       break;
6790     }
6791   }
6792 
6793   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6794   return true;
6795 }
6796 
6797 bool
6798 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6799 
6800   SMLoc OffsetLoc = getLoc();
6801 
6802   if (!parseExpr(Imm, "a swizzle macro")) {
6803     return false;
6804   }
6805   if (!isUInt<16>(Imm)) {
6806     Error(OffsetLoc, "expected a 16-bit offset");
6807     return false;
6808   }
6809   return true;
6810 }
6811 
6812 bool
6813 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6814   using namespace llvm::AMDGPU::Swizzle;
6815 
6816   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6817 
6818     SMLoc ModeLoc = getLoc();
6819     bool Ok = false;
6820 
6821     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6822       Ok = parseSwizzleQuadPerm(Imm);
6823     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6824       Ok = parseSwizzleBitmaskPerm(Imm);
6825     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6826       Ok = parseSwizzleBroadcast(Imm);
6827     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6828       Ok = parseSwizzleSwap(Imm);
6829     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6830       Ok = parseSwizzleReverse(Imm);
6831     } else {
6832       Error(ModeLoc, "expected a swizzle mode");
6833     }
6834 
6835     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6836   }
6837 
6838   return false;
6839 }
6840 
6841 OperandMatchResultTy
6842 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6843   SMLoc S = getLoc();
6844   int64_t Imm = 0;
6845 
6846   if (trySkipId("offset")) {
6847 
6848     bool Ok = false;
6849     if (skipToken(AsmToken::Colon, "expected a colon")) {
6850       if (trySkipId("swizzle")) {
6851         Ok = parseSwizzleMacro(Imm);
6852       } else {
6853         Ok = parseSwizzleOffset(Imm);
6854       }
6855     }
6856 
6857     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6858 
6859     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6860   } else {
6861     // Swizzle "offset" operand is optional.
6862     // If it is omitted, try parsing other optional operands.
6863     return parseOptionalOpr(Operands);
6864   }
6865 }
6866 
6867 bool
6868 AMDGPUOperand::isSwizzle() const {
6869   return isImmTy(ImmTySwizzle);
6870 }
6871 
6872 //===----------------------------------------------------------------------===//
6873 // VGPR Index Mode
6874 //===----------------------------------------------------------------------===//
6875 
6876 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6877 
6878   using namespace llvm::AMDGPU::VGPRIndexMode;
6879 
6880   if (trySkipToken(AsmToken::RParen)) {
6881     return OFF;
6882   }
6883 
6884   int64_t Imm = 0;
6885 
6886   while (true) {
6887     unsigned Mode = 0;
6888     SMLoc S = getLoc();
6889 
6890     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6891       if (trySkipId(IdSymbolic[ModeId])) {
6892         Mode = 1 << ModeId;
6893         break;
6894       }
6895     }
6896 
6897     if (Mode == 0) {
6898       Error(S, (Imm == 0)?
6899                "expected a VGPR index mode or a closing parenthesis" :
6900                "expected a VGPR index mode");
6901       return UNDEF;
6902     }
6903 
6904     if (Imm & Mode) {
6905       Error(S, "duplicate VGPR index mode");
6906       return UNDEF;
6907     }
6908     Imm |= Mode;
6909 
6910     if (trySkipToken(AsmToken::RParen))
6911       break;
6912     if (!skipToken(AsmToken::Comma,
6913                    "expected a comma or a closing parenthesis"))
6914       return UNDEF;
6915   }
6916 
6917   return Imm;
6918 }
6919 
6920 OperandMatchResultTy
6921 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6922 
6923   using namespace llvm::AMDGPU::VGPRIndexMode;
6924 
6925   int64_t Imm = 0;
6926   SMLoc S = getLoc();
6927 
6928   if (trySkipId("gpr_idx", AsmToken::LParen)) {
6929     Imm = parseGPRIdxMacro();
6930     if (Imm == UNDEF)
6931       return MatchOperand_ParseFail;
6932   } else {
6933     if (getParser().parseAbsoluteExpression(Imm))
6934       return MatchOperand_ParseFail;
6935     if (Imm < 0 || !isUInt<4>(Imm)) {
6936       Error(S, "invalid immediate: only 4-bit values are legal");
6937       return MatchOperand_ParseFail;
6938     }
6939   }
6940 
6941   Operands.push_back(
6942       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6943   return MatchOperand_Success;
6944 }
6945 
6946 bool AMDGPUOperand::isGPRIdxMode() const {
6947   return isImmTy(ImmTyGprIdxMode);
6948 }
6949 
6950 //===----------------------------------------------------------------------===//
6951 // sopp branch targets
6952 //===----------------------------------------------------------------------===//
6953 
6954 OperandMatchResultTy
6955 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6956 
6957   // Make sure we are not parsing something
6958   // that looks like a label or an expression but is not.
6959   // This will improve error messages.
6960   if (isRegister() || isModifier())
6961     return MatchOperand_NoMatch;
6962 
6963   if (!parseExpr(Operands))
6964     return MatchOperand_ParseFail;
6965 
6966   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6967   assert(Opr.isImm() || Opr.isExpr());
6968   SMLoc Loc = Opr.getStartLoc();
6969 
6970   // Currently we do not support arbitrary expressions as branch targets.
6971   // Only labels and absolute expressions are accepted.
6972   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6973     Error(Loc, "expected an absolute expression or a label");
6974   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6975     Error(Loc, "expected a 16-bit signed jump offset");
6976   }
6977 
6978   return MatchOperand_Success;
6979 }
6980 
6981 //===----------------------------------------------------------------------===//
6982 // Boolean holding registers
6983 //===----------------------------------------------------------------------===//
6984 
6985 OperandMatchResultTy
6986 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6987   return parseReg(Operands);
6988 }
6989 
6990 //===----------------------------------------------------------------------===//
6991 // mubuf
6992 //===----------------------------------------------------------------------===//
6993 
6994 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
6995   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
6996 }
6997 
6998 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6999                                    const OperandVector &Operands,
7000                                    bool IsAtomic,
7001                                    bool IsLds) {
7002   bool IsLdsOpcode = IsLds;
7003   bool HasLdsModifier = false;
7004   OptionalImmIndexMap OptionalIdx;
7005   unsigned FirstOperandIdx = 1;
7006   bool IsAtomicReturn = false;
7007 
7008   if (IsAtomic) {
7009     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7010       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7011       if (!Op.isCPol())
7012         continue;
7013       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7014       break;
7015     }
7016 
7017     if (!IsAtomicReturn) {
7018       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7019       if (NewOpc != -1)
7020         Inst.setOpcode(NewOpc);
7021     }
7022 
7023     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7024                       SIInstrFlags::IsAtomicRet;
7025   }
7026 
7027   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7028     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7029 
7030     // Add the register arguments
7031     if (Op.isReg()) {
7032       Op.addRegOperands(Inst, 1);
7033       // Insert a tied src for atomic return dst.
7034       // This cannot be postponed as subsequent calls to
7035       // addImmOperands rely on correct number of MC operands.
7036       if (IsAtomicReturn && i == FirstOperandIdx)
7037         Op.addRegOperands(Inst, 1);
7038       continue;
7039     }
7040 
7041     // Handle the case where soffset is an immediate
7042     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7043       Op.addImmOperands(Inst, 1);
7044       continue;
7045     }
7046 
7047     HasLdsModifier |= Op.isLDS();
7048 
7049     // Handle tokens like 'offen' which are sometimes hard-coded into the
7050     // asm string.  There are no MCInst operands for these.
7051     if (Op.isToken()) {
7052       continue;
7053     }
7054     assert(Op.isImm());
7055 
7056     // Handle optional arguments
7057     OptionalIdx[Op.getImmTy()] = i;
7058   }
7059 
7060   // This is a workaround for an llvm quirk which may result in an
7061   // incorrect instruction selection. Lds and non-lds versions of
7062   // MUBUF instructions are identical except that lds versions
7063   // have mandatory 'lds' modifier. However this modifier follows
7064   // optional modifiers and llvm asm matcher regards this 'lds'
7065   // modifier as an optional one. As a result, an lds version
7066   // of opcode may be selected even if it has no 'lds' modifier.
7067   if (IsLdsOpcode && !HasLdsModifier) {
7068     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7069     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7070       Inst.setOpcode(NoLdsOpcode);
7071       IsLdsOpcode = false;
7072     }
7073   }
7074 
7075   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7076   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7077 
7078   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7079     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7080   }
7081   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7082 }
7083 
7084 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7085   OptionalImmIndexMap OptionalIdx;
7086 
7087   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7088     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7089 
7090     // Add the register arguments
7091     if (Op.isReg()) {
7092       Op.addRegOperands(Inst, 1);
7093       continue;
7094     }
7095 
7096     // Handle the case where soffset is an immediate
7097     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7098       Op.addImmOperands(Inst, 1);
7099       continue;
7100     }
7101 
7102     // Handle tokens like 'offen' which are sometimes hard-coded into the
7103     // asm string.  There are no MCInst operands for these.
7104     if (Op.isToken()) {
7105       continue;
7106     }
7107     assert(Op.isImm());
7108 
7109     // Handle optional arguments
7110     OptionalIdx[Op.getImmTy()] = i;
7111   }
7112 
7113   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7114                         AMDGPUOperand::ImmTyOffset);
7115   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7116   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7117   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7118   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7119 }
7120 
7121 //===----------------------------------------------------------------------===//
7122 // mimg
7123 //===----------------------------------------------------------------------===//
7124 
7125 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7126                               bool IsAtomic) {
7127   unsigned I = 1;
7128   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7129   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7130     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7131   }
7132 
7133   if (IsAtomic) {
7134     // Add src, same as dst
7135     assert(Desc.getNumDefs() == 1);
7136     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7137   }
7138 
7139   OptionalImmIndexMap OptionalIdx;
7140 
7141   for (unsigned E = Operands.size(); I != E; ++I) {
7142     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7143 
7144     // Add the register arguments
7145     if (Op.isReg()) {
7146       Op.addRegOperands(Inst, 1);
7147     } else if (Op.isImmModifier()) {
7148       OptionalIdx[Op.getImmTy()] = I;
7149     } else if (!Op.isToken()) {
7150       llvm_unreachable("unexpected operand type");
7151     }
7152   }
7153 
7154   bool IsGFX10Plus = isGFX10Plus();
7155 
7156   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7157   if (IsGFX10Plus)
7158     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7159   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7160   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7161   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7162   if (IsGFX10Plus)
7163     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7164   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7165     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7166   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7167   if (!IsGFX10Plus)
7168     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7169   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7170 }
7171 
7172 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7173   cvtMIMG(Inst, Operands, true);
7174 }
7175 
7176 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7177   OptionalImmIndexMap OptionalIdx;
7178   bool IsAtomicReturn = false;
7179 
7180   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7181     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7182     if (!Op.isCPol())
7183       continue;
7184     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7185     break;
7186   }
7187 
7188   if (!IsAtomicReturn) {
7189     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7190     if (NewOpc != -1)
7191       Inst.setOpcode(NewOpc);
7192   }
7193 
7194   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7195                     SIInstrFlags::IsAtomicRet;
7196 
7197   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7198     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7199 
7200     // Add the register arguments
7201     if (Op.isReg()) {
7202       Op.addRegOperands(Inst, 1);
7203       if (IsAtomicReturn && i == 1)
7204         Op.addRegOperands(Inst, 1);
7205       continue;
7206     }
7207 
7208     // Handle the case where soffset is an immediate
7209     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7210       Op.addImmOperands(Inst, 1);
7211       continue;
7212     }
7213 
7214     // Handle tokens like 'offen' which are sometimes hard-coded into the
7215     // asm string.  There are no MCInst operands for these.
7216     if (Op.isToken()) {
7217       continue;
7218     }
7219     assert(Op.isImm());
7220 
7221     // Handle optional arguments
7222     OptionalIdx[Op.getImmTy()] = i;
7223   }
7224 
7225   if ((int)Inst.getNumOperands() <=
7226       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7227     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7228   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7229 }
7230 
7231 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7232                                       const OperandVector &Operands) {
7233   for (unsigned I = 1; I < Operands.size(); ++I) {
7234     auto &Operand = (AMDGPUOperand &)*Operands[I];
7235     if (Operand.isReg())
7236       Operand.addRegOperands(Inst, 1);
7237   }
7238 
7239   Inst.addOperand(MCOperand::createImm(1)); // a16
7240 }
7241 
7242 //===----------------------------------------------------------------------===//
7243 // smrd
7244 //===----------------------------------------------------------------------===//
7245 
7246 bool AMDGPUOperand::isSMRDOffset8() const {
7247   return isImm() && isUInt<8>(getImm());
7248 }
7249 
7250 bool AMDGPUOperand::isSMEMOffset() const {
7251   return isImm(); // Offset range is checked later by validator.
7252 }
7253 
7254 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7255   // 32-bit literals are only supported on CI and we only want to use them
7256   // when the offset is > 8-bits.
7257   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7258 }
7259 
7260 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7261   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7262 }
7263 
7264 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7265   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7266 }
7267 
7268 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7269   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7270 }
7271 
7272 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7273   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7274 }
7275 
7276 //===----------------------------------------------------------------------===//
7277 // vop3
7278 //===----------------------------------------------------------------------===//
7279 
7280 static bool ConvertOmodMul(int64_t &Mul) {
7281   if (Mul != 1 && Mul != 2 && Mul != 4)
7282     return false;
7283 
7284   Mul >>= 1;
7285   return true;
7286 }
7287 
7288 static bool ConvertOmodDiv(int64_t &Div) {
7289   if (Div == 1) {
7290     Div = 0;
7291     return true;
7292   }
7293 
7294   if (Div == 2) {
7295     Div = 3;
7296     return true;
7297   }
7298 
7299   return false;
7300 }
7301 
7302 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7303 // This is intentional and ensures compatibility with sp3.
7304 // See bug 35397 for details.
7305 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7306   if (BoundCtrl == 0 || BoundCtrl == 1) {
7307     BoundCtrl = 1;
7308     return true;
7309   }
7310   return false;
7311 }
7312 
7313 // Note: the order in this table matches the order of operands in AsmString.
7314 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7315   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7316   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7317   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7318   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7319   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7320   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7321   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7322   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7323   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7324   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7325   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7326   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7327   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7328   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7329   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7330   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7331   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7332   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7333   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7334   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7335   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7336   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7337   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7338   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7339   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7340   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7341   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7342   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7343   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7344   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7345   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7346   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7347   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7348   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7349   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7350   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7351   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7352   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7353   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7354   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7355   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7356 };
7357 
7358 void AMDGPUAsmParser::onBeginOfFile() {
7359   if (!getParser().getStreamer().getTargetStreamer() ||
7360       getSTI().getTargetTriple().getArch() == Triple::r600)
7361     return;
7362 
7363   if (!getTargetStreamer().getTargetID())
7364     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7365 
7366   if (isHsaAbiVersion3Or4(&getSTI()))
7367     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7368 }
7369 
7370 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7371 
7372   OperandMatchResultTy res = parseOptionalOpr(Operands);
7373 
7374   // This is a hack to enable hardcoded mandatory operands which follow
7375   // optional operands.
7376   //
7377   // Current design assumes that all operands after the first optional operand
7378   // are also optional. However implementation of some instructions violates
7379   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7380   //
7381   // To alleviate this problem, we have to (implicitly) parse extra operands
7382   // to make sure autogenerated parser of custom operands never hit hardcoded
7383   // mandatory operands.
7384 
7385   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7386     if (res != MatchOperand_Success ||
7387         isToken(AsmToken::EndOfStatement))
7388       break;
7389 
7390     trySkipToken(AsmToken::Comma);
7391     res = parseOptionalOpr(Operands);
7392   }
7393 
7394   return res;
7395 }
7396 
7397 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7398   OperandMatchResultTy res;
7399   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7400     // try to parse any optional operand here
7401     if (Op.IsBit) {
7402       res = parseNamedBit(Op.Name, Operands, Op.Type);
7403     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7404       res = parseOModOperand(Operands);
7405     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7406                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7407                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7408       res = parseSDWASel(Operands, Op.Name, Op.Type);
7409     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7410       res = parseSDWADstUnused(Operands);
7411     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7412                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7413                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7414                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7415       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7416                                         Op.ConvertResult);
7417     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7418       res = parseDim(Operands);
7419     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7420       res = parseCPol(Operands);
7421     } else {
7422       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7423     }
7424     if (res != MatchOperand_NoMatch) {
7425       return res;
7426     }
7427   }
7428   return MatchOperand_NoMatch;
7429 }
7430 
7431 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7432   StringRef Name = getTokenStr();
7433   if (Name == "mul") {
7434     return parseIntWithPrefix("mul", Operands,
7435                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7436   }
7437 
7438   if (Name == "div") {
7439     return parseIntWithPrefix("div", Operands,
7440                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7441   }
7442 
7443   return MatchOperand_NoMatch;
7444 }
7445 
7446 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7447   cvtVOP3P(Inst, Operands);
7448 
7449   int Opc = Inst.getOpcode();
7450 
7451   int SrcNum;
7452   const int Ops[] = { AMDGPU::OpName::src0,
7453                       AMDGPU::OpName::src1,
7454                       AMDGPU::OpName::src2 };
7455   for (SrcNum = 0;
7456        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7457        ++SrcNum);
7458   assert(SrcNum > 0);
7459 
7460   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7461   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7462 
7463   if ((OpSel & (1 << SrcNum)) != 0) {
7464     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7465     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7466     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7467   }
7468 }
7469 
7470 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7471       // 1. This operand is input modifiers
7472   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7473       // 2. This is not last operand
7474       && Desc.NumOperands > (OpNum + 1)
7475       // 3. Next operand is register class
7476       && Desc.OpInfo[OpNum + 1].RegClass != -1
7477       // 4. Next register is not tied to any other operand
7478       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7479 }
7480 
7481 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7482 {
7483   OptionalImmIndexMap OptionalIdx;
7484   unsigned Opc = Inst.getOpcode();
7485 
7486   unsigned I = 1;
7487   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7488   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7489     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7490   }
7491 
7492   for (unsigned E = Operands.size(); I != E; ++I) {
7493     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7494     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7495       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7496     } else if (Op.isInterpSlot() ||
7497                Op.isInterpAttr() ||
7498                Op.isAttrChan()) {
7499       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7500     } else if (Op.isImmModifier()) {
7501       OptionalIdx[Op.getImmTy()] = I;
7502     } else {
7503       llvm_unreachable("unhandled operand type");
7504     }
7505   }
7506 
7507   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7508     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7509   }
7510 
7511   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7512     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7513   }
7514 
7515   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7516     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7517   }
7518 }
7519 
7520 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7521                               OptionalImmIndexMap &OptionalIdx) {
7522   unsigned Opc = Inst.getOpcode();
7523 
7524   unsigned I = 1;
7525   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7526   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7527     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7528   }
7529 
7530   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7531     // This instruction has src modifiers
7532     for (unsigned E = Operands.size(); I != E; ++I) {
7533       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7534       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7535         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7536       } else if (Op.isImmModifier()) {
7537         OptionalIdx[Op.getImmTy()] = I;
7538       } else if (Op.isRegOrImm()) {
7539         Op.addRegOrImmOperands(Inst, 1);
7540       } else {
7541         llvm_unreachable("unhandled operand type");
7542       }
7543     }
7544   } else {
7545     // No src modifiers
7546     for (unsigned E = Operands.size(); I != E; ++I) {
7547       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7548       if (Op.isMod()) {
7549         OptionalIdx[Op.getImmTy()] = I;
7550       } else {
7551         Op.addRegOrImmOperands(Inst, 1);
7552       }
7553     }
7554   }
7555 
7556   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7557     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7558   }
7559 
7560   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7561     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7562   }
7563 
7564   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7565   // it has src2 register operand that is tied to dst operand
7566   // we don't allow modifiers for this operand in assembler so src2_modifiers
7567   // should be 0.
7568   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7569       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7570       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7571       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7572       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7573       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7574       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7575       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7576       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7577       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7578       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7579     auto it = Inst.begin();
7580     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7581     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7582     ++it;
7583     // Copy the operand to ensure it's not invalidated when Inst grows.
7584     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7585   }
7586 }
7587 
7588 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7589   OptionalImmIndexMap OptionalIdx;
7590   cvtVOP3(Inst, Operands, OptionalIdx);
7591 }
7592 
7593 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7594                                OptionalImmIndexMap &OptIdx) {
7595   const int Opc = Inst.getOpcode();
7596   const MCInstrDesc &Desc = MII.get(Opc);
7597 
7598   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7599 
7600   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7601     assert(!IsPacked);
7602     Inst.addOperand(Inst.getOperand(0));
7603   }
7604 
7605   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7606   // instruction, and then figure out where to actually put the modifiers
7607 
7608   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7609   if (OpSelIdx != -1) {
7610     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7611   }
7612 
7613   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7614   if (OpSelHiIdx != -1) {
7615     int DefaultVal = IsPacked ? -1 : 0;
7616     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7617                           DefaultVal);
7618   }
7619 
7620   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7621   if (NegLoIdx != -1) {
7622     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7623     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7624   }
7625 
7626   const int Ops[] = { AMDGPU::OpName::src0,
7627                       AMDGPU::OpName::src1,
7628                       AMDGPU::OpName::src2 };
7629   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7630                          AMDGPU::OpName::src1_modifiers,
7631                          AMDGPU::OpName::src2_modifiers };
7632 
7633   unsigned OpSel = 0;
7634   unsigned OpSelHi = 0;
7635   unsigned NegLo = 0;
7636   unsigned NegHi = 0;
7637 
7638   if (OpSelIdx != -1)
7639     OpSel = Inst.getOperand(OpSelIdx).getImm();
7640 
7641   if (OpSelHiIdx != -1)
7642     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7643 
7644   if (NegLoIdx != -1) {
7645     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7646     NegLo = Inst.getOperand(NegLoIdx).getImm();
7647     NegHi = Inst.getOperand(NegHiIdx).getImm();
7648   }
7649 
7650   for (int J = 0; J < 3; ++J) {
7651     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7652     if (OpIdx == -1)
7653       break;
7654 
7655     uint32_t ModVal = 0;
7656 
7657     if ((OpSel & (1 << J)) != 0)
7658       ModVal |= SISrcMods::OP_SEL_0;
7659 
7660     if ((OpSelHi & (1 << J)) != 0)
7661       ModVal |= SISrcMods::OP_SEL_1;
7662 
7663     if ((NegLo & (1 << J)) != 0)
7664       ModVal |= SISrcMods::NEG;
7665 
7666     if ((NegHi & (1 << J)) != 0)
7667       ModVal |= SISrcMods::NEG_HI;
7668 
7669     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7670 
7671     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7672   }
7673 }
7674 
7675 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7676   OptionalImmIndexMap OptIdx;
7677   cvtVOP3(Inst, Operands, OptIdx);
7678   cvtVOP3P(Inst, Operands, OptIdx);
7679 }
7680 
7681 //===----------------------------------------------------------------------===//
7682 // dpp
7683 //===----------------------------------------------------------------------===//
7684 
7685 bool AMDGPUOperand::isDPP8() const {
7686   return isImmTy(ImmTyDPP8);
7687 }
7688 
7689 bool AMDGPUOperand::isDPPCtrl() const {
7690   using namespace AMDGPU::DPP;
7691 
7692   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7693   if (result) {
7694     int64_t Imm = getImm();
7695     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7696            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7697            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7698            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7699            (Imm == DppCtrl::WAVE_SHL1) ||
7700            (Imm == DppCtrl::WAVE_ROL1) ||
7701            (Imm == DppCtrl::WAVE_SHR1) ||
7702            (Imm == DppCtrl::WAVE_ROR1) ||
7703            (Imm == DppCtrl::ROW_MIRROR) ||
7704            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7705            (Imm == DppCtrl::BCAST15) ||
7706            (Imm == DppCtrl::BCAST31) ||
7707            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7708            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7709   }
7710   return false;
7711 }
7712 
7713 //===----------------------------------------------------------------------===//
7714 // mAI
7715 //===----------------------------------------------------------------------===//
7716 
7717 bool AMDGPUOperand::isBLGP() const {
7718   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7719 }
7720 
7721 bool AMDGPUOperand::isCBSZ() const {
7722   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7723 }
7724 
7725 bool AMDGPUOperand::isABID() const {
7726   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7727 }
7728 
7729 bool AMDGPUOperand::isS16Imm() const {
7730   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7731 }
7732 
7733 bool AMDGPUOperand::isU16Imm() const {
7734   return isImm() && isUInt<16>(getImm());
7735 }
7736 
7737 //===----------------------------------------------------------------------===//
7738 // dim
7739 //===----------------------------------------------------------------------===//
7740 
7741 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7742   // We want to allow "dim:1D" etc.,
7743   // but the initial 1 is tokenized as an integer.
7744   std::string Token;
7745   if (isToken(AsmToken::Integer)) {
7746     SMLoc Loc = getToken().getEndLoc();
7747     Token = std::string(getTokenStr());
7748     lex();
7749     if (getLoc() != Loc)
7750       return false;
7751   }
7752 
7753   StringRef Suffix;
7754   if (!parseId(Suffix))
7755     return false;
7756   Token += Suffix;
7757 
7758   StringRef DimId = Token;
7759   if (DimId.startswith("SQ_RSRC_IMG_"))
7760     DimId = DimId.drop_front(12);
7761 
7762   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7763   if (!DimInfo)
7764     return false;
7765 
7766   Encoding = DimInfo->Encoding;
7767   return true;
7768 }
7769 
7770 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7771   if (!isGFX10Plus())
7772     return MatchOperand_NoMatch;
7773 
7774   SMLoc S = getLoc();
7775 
7776   if (!trySkipId("dim", AsmToken::Colon))
7777     return MatchOperand_NoMatch;
7778 
7779   unsigned Encoding;
7780   SMLoc Loc = getLoc();
7781   if (!parseDimId(Encoding)) {
7782     Error(Loc, "invalid dim value");
7783     return MatchOperand_ParseFail;
7784   }
7785 
7786   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7787                                               AMDGPUOperand::ImmTyDim));
7788   return MatchOperand_Success;
7789 }
7790 
7791 //===----------------------------------------------------------------------===//
7792 // dpp
7793 //===----------------------------------------------------------------------===//
7794 
7795 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7796   SMLoc S = getLoc();
7797 
7798   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7799     return MatchOperand_NoMatch;
7800 
7801   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7802 
7803   int64_t Sels[8];
7804 
7805   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7806     return MatchOperand_ParseFail;
7807 
7808   for (size_t i = 0; i < 8; ++i) {
7809     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7810       return MatchOperand_ParseFail;
7811 
7812     SMLoc Loc = getLoc();
7813     if (getParser().parseAbsoluteExpression(Sels[i]))
7814       return MatchOperand_ParseFail;
7815     if (0 > Sels[i] || 7 < Sels[i]) {
7816       Error(Loc, "expected a 3-bit value");
7817       return MatchOperand_ParseFail;
7818     }
7819   }
7820 
7821   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7822     return MatchOperand_ParseFail;
7823 
7824   unsigned DPP8 = 0;
7825   for (size_t i = 0; i < 8; ++i)
7826     DPP8 |= (Sels[i] << (i * 3));
7827 
7828   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7829   return MatchOperand_Success;
7830 }
7831 
7832 bool
7833 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7834                                     const OperandVector &Operands) {
7835   if (Ctrl == "row_newbcast")
7836     return isGFX90A();
7837 
7838   if (Ctrl == "row_share" ||
7839       Ctrl == "row_xmask")
7840     return isGFX10Plus();
7841 
7842   if (Ctrl == "wave_shl" ||
7843       Ctrl == "wave_shr" ||
7844       Ctrl == "wave_rol" ||
7845       Ctrl == "wave_ror" ||
7846       Ctrl == "row_bcast")
7847     return isVI() || isGFX9();
7848 
7849   return Ctrl == "row_mirror" ||
7850          Ctrl == "row_half_mirror" ||
7851          Ctrl == "quad_perm" ||
7852          Ctrl == "row_shl" ||
7853          Ctrl == "row_shr" ||
7854          Ctrl == "row_ror";
7855 }
7856 
7857 int64_t
7858 AMDGPUAsmParser::parseDPPCtrlPerm() {
7859   // quad_perm:[%d,%d,%d,%d]
7860 
7861   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7862     return -1;
7863 
7864   int64_t Val = 0;
7865   for (int i = 0; i < 4; ++i) {
7866     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7867       return -1;
7868 
7869     int64_t Temp;
7870     SMLoc Loc = getLoc();
7871     if (getParser().parseAbsoluteExpression(Temp))
7872       return -1;
7873     if (Temp < 0 || Temp > 3) {
7874       Error(Loc, "expected a 2-bit value");
7875       return -1;
7876     }
7877 
7878     Val += (Temp << i * 2);
7879   }
7880 
7881   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7882     return -1;
7883 
7884   return Val;
7885 }
7886 
7887 int64_t
7888 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7889   using namespace AMDGPU::DPP;
7890 
7891   // sel:%d
7892 
7893   int64_t Val;
7894   SMLoc Loc = getLoc();
7895 
7896   if (getParser().parseAbsoluteExpression(Val))
7897     return -1;
7898 
7899   struct DppCtrlCheck {
7900     int64_t Ctrl;
7901     int Lo;
7902     int Hi;
7903   };
7904 
7905   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7906     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7907     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7908     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7909     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7910     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7911     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7912     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7913     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7914     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7915     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7916     .Default({-1, 0, 0});
7917 
7918   bool Valid;
7919   if (Check.Ctrl == -1) {
7920     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7921     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7922   } else {
7923     Valid = Check.Lo <= Val && Val <= Check.Hi;
7924     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7925   }
7926 
7927   if (!Valid) {
7928     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7929     return -1;
7930   }
7931 
7932   return Val;
7933 }
7934 
7935 OperandMatchResultTy
7936 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7937   using namespace AMDGPU::DPP;
7938 
7939   if (!isToken(AsmToken::Identifier) ||
7940       !isSupportedDPPCtrl(getTokenStr(), Operands))
7941     return MatchOperand_NoMatch;
7942 
7943   SMLoc S = getLoc();
7944   int64_t Val = -1;
7945   StringRef Ctrl;
7946 
7947   parseId(Ctrl);
7948 
7949   if (Ctrl == "row_mirror") {
7950     Val = DppCtrl::ROW_MIRROR;
7951   } else if (Ctrl == "row_half_mirror") {
7952     Val = DppCtrl::ROW_HALF_MIRROR;
7953   } else {
7954     if (skipToken(AsmToken::Colon, "expected a colon")) {
7955       if (Ctrl == "quad_perm") {
7956         Val = parseDPPCtrlPerm();
7957       } else {
7958         Val = parseDPPCtrlSel(Ctrl);
7959       }
7960     }
7961   }
7962 
7963   if (Val == -1)
7964     return MatchOperand_ParseFail;
7965 
7966   Operands.push_back(
7967     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7968   return MatchOperand_Success;
7969 }
7970 
7971 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7972   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7973 }
7974 
7975 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7976   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7977 }
7978 
7979 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7980   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7981 }
7982 
7983 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7984   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7985 }
7986 
7987 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7988   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7989 }
7990 
7991 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7992   OptionalImmIndexMap OptionalIdx;
7993 
7994   unsigned Opc = Inst.getOpcode();
7995   bool HasModifiers =
7996       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
7997   unsigned I = 1;
7998   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7999   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8000     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8001   }
8002 
8003   int Fi = 0;
8004   for (unsigned E = Operands.size(); I != E; ++I) {
8005     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8006                                             MCOI::TIED_TO);
8007     if (TiedTo != -1) {
8008       assert((unsigned)TiedTo < Inst.getNumOperands());
8009       // handle tied old or src2 for MAC instructions
8010       Inst.addOperand(Inst.getOperand(TiedTo));
8011     }
8012     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8013     // Add the register arguments
8014     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8015       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8016       // Skip it.
8017       continue;
8018     }
8019 
8020     if (IsDPP8) {
8021       if (Op.isDPP8()) {
8022         Op.addImmOperands(Inst, 1);
8023       } else if (HasModifiers &&
8024                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8025         Op.addRegWithFPInputModsOperands(Inst, 2);
8026       } else if (Op.isFI()) {
8027         Fi = Op.getImm();
8028       } else if (Op.isReg()) {
8029         Op.addRegOperands(Inst, 1);
8030       } else {
8031         llvm_unreachable("Invalid operand type");
8032       }
8033     } else {
8034       if (HasModifiers &&
8035           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8036         Op.addRegWithFPInputModsOperands(Inst, 2);
8037       } else if (Op.isReg()) {
8038         Op.addRegOperands(Inst, 1);
8039       } else if (Op.isDPPCtrl()) {
8040         Op.addImmOperands(Inst, 1);
8041       } else if (Op.isImm()) {
8042         // Handle optional arguments
8043         OptionalIdx[Op.getImmTy()] = I;
8044       } else {
8045         llvm_unreachable("Invalid operand type");
8046       }
8047     }
8048   }
8049 
8050   if (IsDPP8) {
8051     using namespace llvm::AMDGPU::DPP;
8052     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8053   } else {
8054     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8055     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8056     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8057     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8058       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8059     }
8060   }
8061 }
8062 
8063 //===----------------------------------------------------------------------===//
8064 // sdwa
8065 //===----------------------------------------------------------------------===//
8066 
8067 OperandMatchResultTy
8068 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8069                               AMDGPUOperand::ImmTy Type) {
8070   using namespace llvm::AMDGPU::SDWA;
8071 
8072   SMLoc S = getLoc();
8073   StringRef Value;
8074   OperandMatchResultTy res;
8075 
8076   SMLoc StringLoc;
8077   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8078   if (res != MatchOperand_Success) {
8079     return res;
8080   }
8081 
8082   int64_t Int;
8083   Int = StringSwitch<int64_t>(Value)
8084         .Case("BYTE_0", SdwaSel::BYTE_0)
8085         .Case("BYTE_1", SdwaSel::BYTE_1)
8086         .Case("BYTE_2", SdwaSel::BYTE_2)
8087         .Case("BYTE_3", SdwaSel::BYTE_3)
8088         .Case("WORD_0", SdwaSel::WORD_0)
8089         .Case("WORD_1", SdwaSel::WORD_1)
8090         .Case("DWORD", SdwaSel::DWORD)
8091         .Default(0xffffffff);
8092 
8093   if (Int == 0xffffffff) {
8094     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8095     return MatchOperand_ParseFail;
8096   }
8097 
8098   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8099   return MatchOperand_Success;
8100 }
8101 
8102 OperandMatchResultTy
8103 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8104   using namespace llvm::AMDGPU::SDWA;
8105 
8106   SMLoc S = getLoc();
8107   StringRef Value;
8108   OperandMatchResultTy res;
8109 
8110   SMLoc StringLoc;
8111   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8112   if (res != MatchOperand_Success) {
8113     return res;
8114   }
8115 
8116   int64_t Int;
8117   Int = StringSwitch<int64_t>(Value)
8118         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8119         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8120         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8121         .Default(0xffffffff);
8122 
8123   if (Int == 0xffffffff) {
8124     Error(StringLoc, "invalid dst_unused value");
8125     return MatchOperand_ParseFail;
8126   }
8127 
8128   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8129   return MatchOperand_Success;
8130 }
8131 
8132 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8133   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8134 }
8135 
8136 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8137   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8138 }
8139 
8140 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8141   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8142 }
8143 
8144 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8145   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8146 }
8147 
8148 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8149   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8150 }
8151 
8152 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8153                               uint64_t BasicInstType,
8154                               bool SkipDstVcc,
8155                               bool SkipSrcVcc) {
8156   using namespace llvm::AMDGPU::SDWA;
8157 
8158   OptionalImmIndexMap OptionalIdx;
8159   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8160   bool SkippedVcc = false;
8161 
8162   unsigned I = 1;
8163   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8164   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8165     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8166   }
8167 
8168   for (unsigned E = Operands.size(); I != E; ++I) {
8169     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8170     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8171         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8172       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8173       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8174       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8175       // Skip VCC only if we didn't skip it on previous iteration.
8176       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8177       if (BasicInstType == SIInstrFlags::VOP2 &&
8178           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8179            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8180         SkippedVcc = true;
8181         continue;
8182       } else if (BasicInstType == SIInstrFlags::VOPC &&
8183                  Inst.getNumOperands() == 0) {
8184         SkippedVcc = true;
8185         continue;
8186       }
8187     }
8188     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8189       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8190     } else if (Op.isImm()) {
8191       // Handle optional arguments
8192       OptionalIdx[Op.getImmTy()] = I;
8193     } else {
8194       llvm_unreachable("Invalid operand type");
8195     }
8196     SkippedVcc = false;
8197   }
8198 
8199   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8200       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8201       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8202     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8203     switch (BasicInstType) {
8204     case SIInstrFlags::VOP1:
8205       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8206       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8207         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8208       }
8209       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8210       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8211       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8212       break;
8213 
8214     case SIInstrFlags::VOP2:
8215       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8216       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8217         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8218       }
8219       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8220       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8221       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8222       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8223       break;
8224 
8225     case SIInstrFlags::VOPC:
8226       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8227         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8228       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8229       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8230       break;
8231 
8232     default:
8233       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8234     }
8235   }
8236 
8237   // special case v_mac_{f16, f32}:
8238   // it has src2 register operand that is tied to dst operand
8239   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8240       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8241     auto it = Inst.begin();
8242     std::advance(
8243       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8244     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8245   }
8246 }
8247 
8248 //===----------------------------------------------------------------------===//
8249 // mAI
8250 //===----------------------------------------------------------------------===//
8251 
8252 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8253   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8254 }
8255 
8256 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8257   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8258 }
8259 
8260 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8261   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8262 }
8263 
8264 /// Force static initialization.
8265 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8266   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8267   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8268 }
8269 
8270 #define GET_REGISTER_MATCHER
8271 #define GET_MATCHER_IMPLEMENTATION
8272 #define GET_MNEMONIC_SPELL_CHECKER
8273 #define GET_MNEMONIC_CHECKER
8274 #include "AMDGPUGenAsmMatcher.inc"
8275 
8276 // This fuction should be defined after auto-generated include so that we have
8277 // MatchClassKind enum defined
8278 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8279                                                      unsigned Kind) {
8280   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8281   // But MatchInstructionImpl() expects to meet token and fails to validate
8282   // operand. This method checks if we are given immediate operand but expect to
8283   // get corresponding token.
8284   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8285   switch (Kind) {
8286   case MCK_addr64:
8287     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8288   case MCK_gds:
8289     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8290   case MCK_lds:
8291     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8292   case MCK_idxen:
8293     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8294   case MCK_offen:
8295     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8296   case MCK_SSrcB32:
8297     // When operands have expression values, they will return true for isToken,
8298     // because it is not possible to distinguish between a token and an
8299     // expression at parse time. MatchInstructionImpl() will always try to
8300     // match an operand as a token, when isToken returns true, and when the
8301     // name of the expression is not a valid token, the match will fail,
8302     // so we need to handle it here.
8303     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8304   case MCK_SSrcF32:
8305     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8306   case MCK_SoppBrTarget:
8307     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8308   case MCK_VReg32OrOff:
8309     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8310   case MCK_InterpSlot:
8311     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8312   case MCK_Attr:
8313     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8314   case MCK_AttrChan:
8315     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8316   case MCK_ImmSMEMOffset:
8317     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8318   case MCK_SReg_64:
8319   case MCK_SReg_64_XEXEC:
8320     // Null is defined as a 32-bit register but
8321     // it should also be enabled with 64-bit operands.
8322     // The following code enables it for SReg_64 operands
8323     // used as source and destination. Remaining source
8324     // operands are handled in isInlinableImm.
8325     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8326   default:
8327     return Match_InvalidOperand;
8328   }
8329 }
8330 
8331 //===----------------------------------------------------------------------===//
8332 // endpgm
8333 //===----------------------------------------------------------------------===//
8334 
8335 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8336   SMLoc S = getLoc();
8337   int64_t Imm = 0;
8338 
8339   if (!parseExpr(Imm)) {
8340     // The operand is optional, if not present default to 0
8341     Imm = 0;
8342   }
8343 
8344   if (!isUInt<16>(Imm)) {
8345     Error(S, "expected a 16-bit value");
8346     return MatchOperand_ParseFail;
8347   }
8348 
8349   Operands.push_back(
8350       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8351   return MatchOperand_Success;
8352 }
8353 
8354 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8355