1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/Support/AMDGPUMetadata.h"
32 #include "llvm/Support/AMDHSAKernelDescriptor.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/MachineValueType.h"
35 #include "llvm/Support/TargetParser.h"
36 #include "llvm/Support/TargetRegistry.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53   enum KindTy {
54     Token,
55     Immediate,
56     Register,
57     Expression
58   } Kind;
59 
60   SMLoc StartLoc, EndLoc;
61   const AMDGPUAsmParser *AsmParser;
62 
63 public:
64   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66 
67   using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69   struct Modifiers {
70     bool Abs = false;
71     bool Neg = false;
72     bool Sext = false;
73 
74     bool hasFPModifiers() const { return Abs || Neg; }
75     bool hasIntModifiers() const { return Sext; }
76     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78     int64_t getFPModifiersOperand() const {
79       int64_t Operand = 0;
80       Operand |= Abs ? SISrcMods::ABS : 0u;
81       Operand |= Neg ? SISrcMods::NEG : 0u;
82       return Operand;
83     }
84 
85     int64_t getIntModifiersOperand() const {
86       int64_t Operand = 0;
87       Operand |= Sext ? SISrcMods::SEXT : 0u;
88       return Operand;
89     }
90 
91     int64_t getModifiersOperand() const {
92       assert(!(hasFPModifiers() && hasIntModifiers())
93            && "fp and int modifiers should not be used simultaneously");
94       if (hasFPModifiers()) {
95         return getFPModifiersOperand();
96       } else if (hasIntModifiers()) {
97         return getIntModifiersOperand();
98       } else {
99         return 0;
100       }
101     }
102 
103     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104   };
105 
106   enum ImmTy {
107     ImmTyNone,
108     ImmTyGDS,
109     ImmTyLDS,
110     ImmTyOffen,
111     ImmTyIdxen,
112     ImmTyAddr64,
113     ImmTyOffset,
114     ImmTyInstOffset,
115     ImmTyOffset0,
116     ImmTyOffset1,
117     ImmTyCPol,
118     ImmTySWZ,
119     ImmTyTFE,
120     ImmTyD16,
121     ImmTyClampSI,
122     ImmTyOModSI,
123     ImmTyDPP8,
124     ImmTyDppCtrl,
125     ImmTyDppRowMask,
126     ImmTyDppBankMask,
127     ImmTyDppBoundCtrl,
128     ImmTyDppFi,
129     ImmTySdwaDstSel,
130     ImmTySdwaSrc0Sel,
131     ImmTySdwaSrc1Sel,
132     ImmTySdwaDstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTySwizzle,
155     ImmTyGprIdxMode,
156     ImmTyHigh,
157     ImmTyBLGP,
158     ImmTyCBSZ,
159     ImmTyABID,
160     ImmTyEndpgm,
161   };
162 
163   enum ImmKindTy {
164     ImmKindTyNone,
165     ImmKindTyLiteral,
166     ImmKindTyConst,
167   };
168 
169 private:
170   struct TokOp {
171     const char *Data;
172     unsigned Length;
173   };
174 
175   struct ImmOp {
176     int64_t Val;
177     ImmTy Type;
178     bool IsFPImm;
179     mutable ImmKindTy Kind;
180     Modifiers Mods;
181   };
182 
183   struct RegOp {
184     unsigned RegNo;
185     Modifiers Mods;
186   };
187 
188   union {
189     TokOp Tok;
190     ImmOp Imm;
191     RegOp Reg;
192     const MCExpr *Expr;
193   };
194 
195 public:
196   bool isToken() const override {
197     if (Kind == Token)
198       return true;
199 
200     // When parsing operands, we can't always tell if something was meant to be
201     // a token, like 'gds', or an expression that references a global variable.
202     // In this case, we assume the string is an expression, and if we need to
203     // interpret is a token, then we treat the symbol name as the token.
204     return isSymbolRefExpr();
205   }
206 
207   bool isSymbolRefExpr() const {
208     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209   }
210 
211   bool isImm() const override {
212     return Kind == Immediate;
213   }
214 
215   void setImmKindNone() const {
216     assert(isImm());
217     Imm.Kind = ImmKindTyNone;
218   }
219 
220   void setImmKindLiteral() const {
221     assert(isImm());
222     Imm.Kind = ImmKindTyLiteral;
223   }
224 
225   void setImmKindConst() const {
226     assert(isImm());
227     Imm.Kind = ImmKindTyConst;
228   }
229 
230   bool IsImmKindLiteral() const {
231     return isImm() && Imm.Kind == ImmKindTyLiteral;
232   }
233 
234   bool isImmKindConst() const {
235     return isImm() && Imm.Kind == ImmKindTyConst;
236   }
237 
238   bool isInlinableImm(MVT type) const;
239   bool isLiteralImm(MVT type) const;
240 
241   bool isRegKind() const {
242     return Kind == Register;
243   }
244 
245   bool isReg() const override {
246     return isRegKind() && !hasModifiers();
247   }
248 
249   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251   }
252 
253   bool isRegOrImmWithInt16InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255   }
256 
257   bool isRegOrImmWithInt32InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259   }
260 
261   bool isRegOrImmWithInt64InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263   }
264 
265   bool isRegOrImmWithFP16InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267   }
268 
269   bool isRegOrImmWithFP32InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271   }
272 
273   bool isRegOrImmWithFP64InputMods() const {
274     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275   }
276 
277   bool isVReg() const {
278     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279            isRegClass(AMDGPU::VReg_64RegClassID) ||
280            isRegClass(AMDGPU::VReg_96RegClassID) ||
281            isRegClass(AMDGPU::VReg_128RegClassID) ||
282            isRegClass(AMDGPU::VReg_160RegClassID) ||
283            isRegClass(AMDGPU::VReg_192RegClassID) ||
284            isRegClass(AMDGPU::VReg_256RegClassID) ||
285            isRegClass(AMDGPU::VReg_512RegClassID) ||
286            isRegClass(AMDGPU::VReg_1024RegClassID);
287   }
288 
289   bool isVReg32() const {
290     return isRegClass(AMDGPU::VGPR_32RegClassID);
291   }
292 
293   bool isVReg32OrOff() const {
294     return isOff() || isVReg32();
295   }
296 
297   bool isNull() const {
298     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299   }
300 
301   bool isVRegWithInputMods() const;
302 
303   bool isSDWAOperand(MVT type) const;
304   bool isSDWAFP16Operand() const;
305   bool isSDWAFP32Operand() const;
306   bool isSDWAInt16Operand() const;
307   bool isSDWAInt32Operand() const;
308 
309   bool isImmTy(ImmTy ImmT) const {
310     return isImm() && Imm.Type == ImmT;
311   }
312 
313   bool isImmModifier() const {
314     return isImm() && Imm.Type != ImmTyNone;
315   }
316 
317   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
318   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
319   bool isDMask() const { return isImmTy(ImmTyDMask); }
320   bool isDim() const { return isImmTy(ImmTyDim); }
321   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
322   bool isDA() const { return isImmTy(ImmTyDA); }
323   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
324   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
325   bool isLWE() const { return isImmTy(ImmTyLWE); }
326   bool isOff() const { return isImmTy(ImmTyOff); }
327   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
328   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
329   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
330   bool isOffen() const { return isImmTy(ImmTyOffen); }
331   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
332   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
333   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
334   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
335   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336 
337   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
338   bool isGDS() const { return isImmTy(ImmTyGDS); }
339   bool isLDS() const { return isImmTy(ImmTyLDS); }
340   bool isCPol() const { return isImmTy(ImmTyCPol); }
341   // "CPol_GLC1" is a MatchClass of the CPOL_GLC1 operand with the default and
342   // forced value of the GLC operand.
343   bool isCPol_GLC1() const { return isImmTy(ImmTyCPol); }
344   bool isSWZ() const { return isImmTy(ImmTySWZ); }
345   bool isTFE() const { return isImmTy(ImmTyTFE); }
346   bool isD16() const { return isImmTy(ImmTyD16); }
347   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
348   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
349   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
350   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
351   bool isFI() const { return isImmTy(ImmTyDppFi); }
352   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
353   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
354   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
355   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
356   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
357   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
358   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
359   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
360   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
361   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
362   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
363   bool isHigh() const { return isImmTy(ImmTyHigh); }
364 
365   bool isMod() const {
366     return isClampSI() || isOModSI();
367   }
368 
369   bool isRegOrImm() const {
370     return isReg() || isImm();
371   }
372 
373   bool isRegClass(unsigned RCID) const;
374 
375   bool isInlineValue() const;
376 
377   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
378     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
379   }
380 
381   bool isSCSrcB16() const {
382     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
383   }
384 
385   bool isSCSrcV2B16() const {
386     return isSCSrcB16();
387   }
388 
389   bool isSCSrcB32() const {
390     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
391   }
392 
393   bool isSCSrcB64() const {
394     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
395   }
396 
397   bool isBoolReg() const;
398 
399   bool isSCSrcF16() const {
400     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
401   }
402 
403   bool isSCSrcV2F16() const {
404     return isSCSrcF16();
405   }
406 
407   bool isSCSrcF32() const {
408     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
409   }
410 
411   bool isSCSrcF64() const {
412     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
413   }
414 
415   bool isSSrcB32() const {
416     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
417   }
418 
419   bool isSSrcB16() const {
420     return isSCSrcB16() || isLiteralImm(MVT::i16);
421   }
422 
423   bool isSSrcV2B16() const {
424     llvm_unreachable("cannot happen");
425     return isSSrcB16();
426   }
427 
428   bool isSSrcB64() const {
429     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
430     // See isVSrc64().
431     return isSCSrcB64() || isLiteralImm(MVT::i64);
432   }
433 
434   bool isSSrcF32() const {
435     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
436   }
437 
438   bool isSSrcF64() const {
439     return isSCSrcB64() || isLiteralImm(MVT::f64);
440   }
441 
442   bool isSSrcF16() const {
443     return isSCSrcB16() || isLiteralImm(MVT::f16);
444   }
445 
446   bool isSSrcV2F16() const {
447     llvm_unreachable("cannot happen");
448     return isSSrcF16();
449   }
450 
451   bool isSSrcV2FP32() const {
452     llvm_unreachable("cannot happen");
453     return isSSrcF32();
454   }
455 
456   bool isSCSrcV2FP32() const {
457     llvm_unreachable("cannot happen");
458     return isSCSrcF32();
459   }
460 
461   bool isSSrcV2INT32() const {
462     llvm_unreachable("cannot happen");
463     return isSSrcB32();
464   }
465 
466   bool isSCSrcV2INT32() const {
467     llvm_unreachable("cannot happen");
468     return isSCSrcB32();
469   }
470 
471   bool isSSrcOrLdsB32() const {
472     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
473            isLiteralImm(MVT::i32) || isExpr();
474   }
475 
476   bool isVCSrcB32() const {
477     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
478   }
479 
480   bool isVCSrcB64() const {
481     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
482   }
483 
484   bool isVCSrcB16() const {
485     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
486   }
487 
488   bool isVCSrcV2B16() const {
489     return isVCSrcB16();
490   }
491 
492   bool isVCSrcF32() const {
493     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
494   }
495 
496   bool isVCSrcF64() const {
497     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
498   }
499 
500   bool isVCSrcF16() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
502   }
503 
504   bool isVCSrcV2F16() const {
505     return isVCSrcF16();
506   }
507 
508   bool isVSrcB32() const {
509     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
510   }
511 
512   bool isVSrcB64() const {
513     return isVCSrcF64() || isLiteralImm(MVT::i64);
514   }
515 
516   bool isVSrcB16() const {
517     return isVCSrcB16() || isLiteralImm(MVT::i16);
518   }
519 
520   bool isVSrcV2B16() const {
521     return isVSrcB16() || isLiteralImm(MVT::v2i16);
522   }
523 
524   bool isVCSrcV2FP32() const {
525     return isVCSrcF64();
526   }
527 
528   bool isVSrcV2FP32() const {
529     return isVSrcF64() || isLiteralImm(MVT::v2f32);
530   }
531 
532   bool isVCSrcV2INT32() const {
533     return isVCSrcB64();
534   }
535 
536   bool isVSrcV2INT32() const {
537     return isVSrcB64() || isLiteralImm(MVT::v2i32);
538   }
539 
540   bool isVSrcF32() const {
541     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
542   }
543 
544   bool isVSrcF64() const {
545     return isVCSrcF64() || isLiteralImm(MVT::f64);
546   }
547 
548   bool isVSrcF16() const {
549     return isVCSrcF16() || isLiteralImm(MVT::f16);
550   }
551 
552   bool isVSrcV2F16() const {
553     return isVSrcF16() || isLiteralImm(MVT::v2f16);
554   }
555 
556   bool isVISrcB32() const {
557     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
558   }
559 
560   bool isVISrcB16() const {
561     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
562   }
563 
564   bool isVISrcV2B16() const {
565     return isVISrcB16();
566   }
567 
568   bool isVISrcF32() const {
569     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
570   }
571 
572   bool isVISrcF16() const {
573     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
574   }
575 
576   bool isVISrcV2F16() const {
577     return isVISrcF16() || isVISrcB32();
578   }
579 
580   bool isVISrc_64B64() const {
581     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
582   }
583 
584   bool isVISrc_64F64() const {
585     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
586   }
587 
588   bool isVISrc_64V2FP32() const {
589     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
590   }
591 
592   bool isVISrc_64V2INT32() const {
593     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
594   }
595 
596   bool isVISrc_256B64() const {
597     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
598   }
599 
600   bool isVISrc_256F64() const {
601     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
602   }
603 
604   bool isVISrc_128B16() const {
605     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
606   }
607 
608   bool isVISrc_128V2B16() const {
609     return isVISrc_128B16();
610   }
611 
612   bool isVISrc_128B32() const {
613     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
614   }
615 
616   bool isVISrc_128F32() const {
617     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
618   }
619 
620   bool isVISrc_256V2FP32() const {
621     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
622   }
623 
624   bool isVISrc_256V2INT32() const {
625     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
626   }
627 
628   bool isVISrc_512B32() const {
629     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
630   }
631 
632   bool isVISrc_512B16() const {
633     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
634   }
635 
636   bool isVISrc_512V2B16() const {
637     return isVISrc_512B16();
638   }
639 
640   bool isVISrc_512F32() const {
641     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
642   }
643 
644   bool isVISrc_512F16() const {
645     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
646   }
647 
648   bool isVISrc_512V2F16() const {
649     return isVISrc_512F16() || isVISrc_512B32();
650   }
651 
652   bool isVISrc_1024B32() const {
653     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
654   }
655 
656   bool isVISrc_1024B16() const {
657     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
658   }
659 
660   bool isVISrc_1024V2B16() const {
661     return isVISrc_1024B16();
662   }
663 
664   bool isVISrc_1024F32() const {
665     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
666   }
667 
668   bool isVISrc_1024F16() const {
669     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
670   }
671 
672   bool isVISrc_1024V2F16() const {
673     return isVISrc_1024F16() || isVISrc_1024B32();
674   }
675 
676   bool isAISrcB32() const {
677     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
678   }
679 
680   bool isAISrcB16() const {
681     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
682   }
683 
684   bool isAISrcV2B16() const {
685     return isAISrcB16();
686   }
687 
688   bool isAISrcF32() const {
689     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
690   }
691 
692   bool isAISrcF16() const {
693     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
694   }
695 
696   bool isAISrcV2F16() const {
697     return isAISrcF16() || isAISrcB32();
698   }
699 
700   bool isAISrc_64B64() const {
701     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
702   }
703 
704   bool isAISrc_64F64() const {
705     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
706   }
707 
708   bool isAISrc_128B32() const {
709     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
710   }
711 
712   bool isAISrc_128B16() const {
713     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
714   }
715 
716   bool isAISrc_128V2B16() const {
717     return isAISrc_128B16();
718   }
719 
720   bool isAISrc_128F32() const {
721     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
722   }
723 
724   bool isAISrc_128F16() const {
725     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
726   }
727 
728   bool isAISrc_128V2F16() const {
729     return isAISrc_128F16() || isAISrc_128B32();
730   }
731 
732   bool isVISrc_128F16() const {
733     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
734   }
735 
736   bool isVISrc_128V2F16() const {
737     return isVISrc_128F16() || isVISrc_128B32();
738   }
739 
740   bool isAISrc_256B64() const {
741     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
742   }
743 
744   bool isAISrc_256F64() const {
745     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
746   }
747 
748   bool isAISrc_512B32() const {
749     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
750   }
751 
752   bool isAISrc_512B16() const {
753     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
754   }
755 
756   bool isAISrc_512V2B16() const {
757     return isAISrc_512B16();
758   }
759 
760   bool isAISrc_512F32() const {
761     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
762   }
763 
764   bool isAISrc_512F16() const {
765     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
766   }
767 
768   bool isAISrc_512V2F16() const {
769     return isAISrc_512F16() || isAISrc_512B32();
770   }
771 
772   bool isAISrc_1024B32() const {
773     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
774   }
775 
776   bool isAISrc_1024B16() const {
777     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
778   }
779 
780   bool isAISrc_1024V2B16() const {
781     return isAISrc_1024B16();
782   }
783 
784   bool isAISrc_1024F32() const {
785     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
786   }
787 
788   bool isAISrc_1024F16() const {
789     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
790   }
791 
792   bool isAISrc_1024V2F16() const {
793     return isAISrc_1024F16() || isAISrc_1024B32();
794   }
795 
796   bool isKImmFP32() const {
797     return isLiteralImm(MVT::f32);
798   }
799 
800   bool isKImmFP16() const {
801     return isLiteralImm(MVT::f16);
802   }
803 
804   bool isMem() const override {
805     return false;
806   }
807 
808   bool isExpr() const {
809     return Kind == Expression;
810   }
811 
812   bool isSoppBrTarget() const {
813     return isExpr() || isImm();
814   }
815 
816   bool isSWaitCnt() const;
817   bool isHwreg() const;
818   bool isSendMsg() const;
819   bool isSwizzle() const;
820   bool isSMRDOffset8() const;
821   bool isSMEMOffset() const;
822   bool isSMRDLiteralOffset() const;
823   bool isDPP8() const;
824   bool isDPPCtrl() const;
825   bool isBLGP() const;
826   bool isCBSZ() const;
827   bool isABID() const;
828   bool isGPRIdxMode() const;
829   bool isS16Imm() const;
830   bool isU16Imm() const;
831   bool isEndpgm() const;
832 
833   StringRef getExpressionAsToken() const {
834     assert(isExpr());
835     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
836     return S->getSymbol().getName();
837   }
838 
839   StringRef getToken() const {
840     assert(isToken());
841 
842     if (Kind == Expression)
843       return getExpressionAsToken();
844 
845     return StringRef(Tok.Data, Tok.Length);
846   }
847 
848   int64_t getImm() const {
849     assert(isImm());
850     return Imm.Val;
851   }
852 
853   void setImm(int64_t Val) {
854     assert(isImm());
855     Imm.Val = Val;
856   }
857 
858   ImmTy getImmTy() const {
859     assert(isImm());
860     return Imm.Type;
861   }
862 
863   unsigned getReg() const override {
864     assert(isRegKind());
865     return Reg.RegNo;
866   }
867 
868   SMLoc getStartLoc() const override {
869     return StartLoc;
870   }
871 
872   SMLoc getEndLoc() const override {
873     return EndLoc;
874   }
875 
876   SMRange getLocRange() const {
877     return SMRange(StartLoc, EndLoc);
878   }
879 
880   Modifiers getModifiers() const {
881     assert(isRegKind() || isImmTy(ImmTyNone));
882     return isRegKind() ? Reg.Mods : Imm.Mods;
883   }
884 
885   void setModifiers(Modifiers Mods) {
886     assert(isRegKind() || isImmTy(ImmTyNone));
887     if (isRegKind())
888       Reg.Mods = Mods;
889     else
890       Imm.Mods = Mods;
891   }
892 
893   bool hasModifiers() const {
894     return getModifiers().hasModifiers();
895   }
896 
897   bool hasFPModifiers() const {
898     return getModifiers().hasFPModifiers();
899   }
900 
901   bool hasIntModifiers() const {
902     return getModifiers().hasIntModifiers();
903   }
904 
905   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
906 
907   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
908 
909   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
910 
911   template <unsigned Bitwidth>
912   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
913 
914   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
915     addKImmFPOperands<16>(Inst, N);
916   }
917 
918   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
919     addKImmFPOperands<32>(Inst, N);
920   }
921 
922   void addRegOperands(MCInst &Inst, unsigned N) const;
923 
924   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
925     addRegOperands(Inst, N);
926   }
927 
928   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
929     if (isRegKind())
930       addRegOperands(Inst, N);
931     else if (isExpr())
932       Inst.addOperand(MCOperand::createExpr(Expr));
933     else
934       addImmOperands(Inst, N);
935   }
936 
937   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
938     Modifiers Mods = getModifiers();
939     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
940     if (isRegKind()) {
941       addRegOperands(Inst, N);
942     } else {
943       addImmOperands(Inst, N, false);
944     }
945   }
946 
947   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
948     assert(!hasIntModifiers());
949     addRegOrImmWithInputModsOperands(Inst, N);
950   }
951 
952   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
953     assert(!hasFPModifiers());
954     addRegOrImmWithInputModsOperands(Inst, N);
955   }
956 
957   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
958     Modifiers Mods = getModifiers();
959     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
960     assert(isRegKind());
961     addRegOperands(Inst, N);
962   }
963 
964   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
965     assert(!hasIntModifiers());
966     addRegWithInputModsOperands(Inst, N);
967   }
968 
969   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
970     assert(!hasFPModifiers());
971     addRegWithInputModsOperands(Inst, N);
972   }
973 
974   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
975     if (isImm())
976       addImmOperands(Inst, N);
977     else {
978       assert(isExpr());
979       Inst.addOperand(MCOperand::createExpr(Expr));
980     }
981   }
982 
983   static void printImmTy(raw_ostream& OS, ImmTy Type) {
984     switch (Type) {
985     case ImmTyNone: OS << "None"; break;
986     case ImmTyGDS: OS << "GDS"; break;
987     case ImmTyLDS: OS << "LDS"; break;
988     case ImmTyOffen: OS << "Offen"; break;
989     case ImmTyIdxen: OS << "Idxen"; break;
990     case ImmTyAddr64: OS << "Addr64"; break;
991     case ImmTyOffset: OS << "Offset"; break;
992     case ImmTyInstOffset: OS << "InstOffset"; break;
993     case ImmTyOffset0: OS << "Offset0"; break;
994     case ImmTyOffset1: OS << "Offset1"; break;
995     case ImmTyCPol: OS << "CPol"; break;
996     case ImmTySWZ: OS << "SWZ"; break;
997     case ImmTyTFE: OS << "TFE"; break;
998     case ImmTyD16: OS << "D16"; break;
999     case ImmTyFORMAT: OS << "FORMAT"; break;
1000     case ImmTyClampSI: OS << "ClampSI"; break;
1001     case ImmTyOModSI: OS << "OModSI"; break;
1002     case ImmTyDPP8: OS << "DPP8"; break;
1003     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1004     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1005     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1006     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1007     case ImmTyDppFi: OS << "FI"; break;
1008     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1009     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1010     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1011     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1012     case ImmTyDMask: OS << "DMask"; break;
1013     case ImmTyDim: OS << "Dim"; break;
1014     case ImmTyUNorm: OS << "UNorm"; break;
1015     case ImmTyDA: OS << "DA"; break;
1016     case ImmTyR128A16: OS << "R128A16"; break;
1017     case ImmTyA16: OS << "A16"; break;
1018     case ImmTyLWE: OS << "LWE"; break;
1019     case ImmTyOff: OS << "Off"; break;
1020     case ImmTyExpTgt: OS << "ExpTgt"; break;
1021     case ImmTyExpCompr: OS << "ExpCompr"; break;
1022     case ImmTyExpVM: OS << "ExpVM"; break;
1023     case ImmTyHwreg: OS << "Hwreg"; break;
1024     case ImmTySendMsg: OS << "SendMsg"; break;
1025     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1026     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1027     case ImmTyAttrChan: OS << "AttrChan"; break;
1028     case ImmTyOpSel: OS << "OpSel"; break;
1029     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1030     case ImmTyNegLo: OS << "NegLo"; break;
1031     case ImmTyNegHi: OS << "NegHi"; break;
1032     case ImmTySwizzle: OS << "Swizzle"; break;
1033     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1034     case ImmTyHigh: OS << "High"; break;
1035     case ImmTyBLGP: OS << "BLGP"; break;
1036     case ImmTyCBSZ: OS << "CBSZ"; break;
1037     case ImmTyABID: OS << "ABID"; break;
1038     case ImmTyEndpgm: OS << "Endpgm"; break;
1039     }
1040   }
1041 
1042   void print(raw_ostream &OS) const override {
1043     switch (Kind) {
1044     case Register:
1045       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1046       break;
1047     case Immediate:
1048       OS << '<' << getImm();
1049       if (getImmTy() != ImmTyNone) {
1050         OS << " type: "; printImmTy(OS, getImmTy());
1051       }
1052       OS << " mods: " << Imm.Mods << '>';
1053       break;
1054     case Token:
1055       OS << '\'' << getToken() << '\'';
1056       break;
1057     case Expression:
1058       OS << "<expr " << *Expr << '>';
1059       break;
1060     }
1061   }
1062 
1063   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1064                                       int64_t Val, SMLoc Loc,
1065                                       ImmTy Type = ImmTyNone,
1066                                       bool IsFPImm = false) {
1067     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1068     Op->Imm.Val = Val;
1069     Op->Imm.IsFPImm = IsFPImm;
1070     Op->Imm.Kind = ImmKindTyNone;
1071     Op->Imm.Type = Type;
1072     Op->Imm.Mods = Modifiers();
1073     Op->StartLoc = Loc;
1074     Op->EndLoc = Loc;
1075     return Op;
1076   }
1077 
1078   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1079                                         StringRef Str, SMLoc Loc,
1080                                         bool HasExplicitEncodingSize = true) {
1081     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1082     Res->Tok.Data = Str.data();
1083     Res->Tok.Length = Str.size();
1084     Res->StartLoc = Loc;
1085     Res->EndLoc = Loc;
1086     return Res;
1087   }
1088 
1089   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1090                                       unsigned RegNo, SMLoc S,
1091                                       SMLoc E) {
1092     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1093     Op->Reg.RegNo = RegNo;
1094     Op->Reg.Mods = Modifiers();
1095     Op->StartLoc = S;
1096     Op->EndLoc = E;
1097     return Op;
1098   }
1099 
1100   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1101                                        const class MCExpr *Expr, SMLoc S) {
1102     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1103     Op->Expr = Expr;
1104     Op->StartLoc = S;
1105     Op->EndLoc = S;
1106     return Op;
1107   }
1108 };
1109 
1110 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1111   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1112   return OS;
1113 }
1114 
1115 //===----------------------------------------------------------------------===//
1116 // AsmParser
1117 //===----------------------------------------------------------------------===//
1118 
1119 // Holds info related to the current kernel, e.g. count of SGPRs used.
1120 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1121 // .amdgpu_hsa_kernel or at EOF.
1122 class KernelScopeInfo {
1123   int SgprIndexUnusedMin = -1;
1124   int VgprIndexUnusedMin = -1;
1125   MCContext *Ctx = nullptr;
1126 
1127   void usesSgprAt(int i) {
1128     if (i >= SgprIndexUnusedMin) {
1129       SgprIndexUnusedMin = ++i;
1130       if (Ctx) {
1131         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1132         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1133       }
1134     }
1135   }
1136 
1137   void usesVgprAt(int i) {
1138     if (i >= VgprIndexUnusedMin) {
1139       VgprIndexUnusedMin = ++i;
1140       if (Ctx) {
1141         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1142         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1143       }
1144     }
1145   }
1146 
1147 public:
1148   KernelScopeInfo() = default;
1149 
1150   void initialize(MCContext &Context) {
1151     Ctx = &Context;
1152     usesSgprAt(SgprIndexUnusedMin = -1);
1153     usesVgprAt(VgprIndexUnusedMin = -1);
1154   }
1155 
1156   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1157     switch (RegKind) {
1158       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1159       case IS_AGPR: // fall through
1160       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1161       default: break;
1162     }
1163   }
1164 };
1165 
1166 class AMDGPUAsmParser : public MCTargetAsmParser {
1167   MCAsmParser &Parser;
1168 
1169   // Number of extra operands parsed after the first optional operand.
1170   // This may be necessary to skip hardcoded mandatory operands.
1171   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1172 
1173   unsigned ForcedEncodingSize = 0;
1174   bool ForcedDPP = false;
1175   bool ForcedSDWA = false;
1176   KernelScopeInfo KernelScope;
1177   unsigned CPolSeen;
1178 
1179   /// @name Auto-generated Match Functions
1180   /// {
1181 
1182 #define GET_ASSEMBLER_HEADER
1183 #include "AMDGPUGenAsmMatcher.inc"
1184 
1185   /// }
1186 
1187 private:
1188   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1189   bool OutOfRangeError(SMRange Range);
1190   /// Calculate VGPR/SGPR blocks required for given target, reserved
1191   /// registers, and user-specified NextFreeXGPR values.
1192   ///
1193   /// \param Features [in] Target features, used for bug corrections.
1194   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1195   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1196   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1197   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1198   /// descriptor field, if valid.
1199   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1200   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1201   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1202   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1203   /// \param VGPRBlocks [out] Result VGPR block count.
1204   /// \param SGPRBlocks [out] Result SGPR block count.
1205   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1206                           bool FlatScrUsed, bool XNACKUsed,
1207                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1208                           SMRange VGPRRange, unsigned NextFreeSGPR,
1209                           SMRange SGPRRange, unsigned &VGPRBlocks,
1210                           unsigned &SGPRBlocks);
1211   bool ParseDirectiveAMDGCNTarget();
1212   bool ParseDirectiveAMDHSAKernel();
1213   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1214   bool ParseDirectiveHSACodeObjectVersion();
1215   bool ParseDirectiveHSACodeObjectISA();
1216   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1217   bool ParseDirectiveAMDKernelCodeT();
1218   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1219   bool ParseDirectiveAMDGPUHsaKernel();
1220 
1221   bool ParseDirectiveISAVersion();
1222   bool ParseDirectiveHSAMetadata();
1223   bool ParseDirectivePALMetadataBegin();
1224   bool ParseDirectivePALMetadata();
1225   bool ParseDirectiveAMDGPULDS();
1226 
1227   /// Common code to parse out a block of text (typically YAML) between start and
1228   /// end directives.
1229   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1230                            const char *AssemblerDirectiveEnd,
1231                            std::string &CollectString);
1232 
1233   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1234                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1235   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1236                            unsigned &RegNum, unsigned &RegWidth,
1237                            bool RestoreOnFailure = false);
1238   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1239                            unsigned &RegNum, unsigned &RegWidth,
1240                            SmallVectorImpl<AsmToken> &Tokens);
1241   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1242                            unsigned &RegWidth,
1243                            SmallVectorImpl<AsmToken> &Tokens);
1244   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1245                            unsigned &RegWidth,
1246                            SmallVectorImpl<AsmToken> &Tokens);
1247   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1248                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1249   bool ParseRegRange(unsigned& Num, unsigned& Width);
1250   unsigned getRegularReg(RegisterKind RegKind,
1251                          unsigned RegNum,
1252                          unsigned RegWidth,
1253                          SMLoc Loc);
1254 
1255   bool isRegister();
1256   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1257   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1258   void initializeGprCountSymbol(RegisterKind RegKind);
1259   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1260                              unsigned RegWidth);
1261   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1262                     bool IsAtomic, bool IsLds = false);
1263   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1264                  bool IsGdsHardcoded);
1265 
1266 public:
1267   enum AMDGPUMatchResultTy {
1268     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1269   };
1270   enum OperandMode {
1271     OperandMode_Default,
1272     OperandMode_NSA,
1273   };
1274 
1275   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1276 
1277   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1278                const MCInstrInfo &MII,
1279                const MCTargetOptions &Options)
1280       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1281     MCAsmParserExtension::Initialize(Parser);
1282 
1283     if (getFeatureBits().none()) {
1284       // Set default features.
1285       copySTI().ToggleFeature("southern-islands");
1286     }
1287 
1288     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1289 
1290     {
1291       // TODO: make those pre-defined variables read-only.
1292       // Currently there is none suitable machinery in the core llvm-mc for this.
1293       // MCSymbol::isRedefinable is intended for another purpose, and
1294       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1295       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1296       MCContext &Ctx = getContext();
1297       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1298         MCSymbol *Sym =
1299             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1300         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1301         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1302         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1303         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1304         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1305       } else {
1306         MCSymbol *Sym =
1307             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1308         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1309         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1310         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1311         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1312         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1313       }
1314       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1315         initializeGprCountSymbol(IS_VGPR);
1316         initializeGprCountSymbol(IS_SGPR);
1317       } else
1318         KernelScope.initialize(getContext());
1319     }
1320   }
1321 
1322   bool hasXNACK() const {
1323     return AMDGPU::hasXNACK(getSTI());
1324   }
1325 
1326   bool hasMIMG_R128() const {
1327     return AMDGPU::hasMIMG_R128(getSTI());
1328   }
1329 
1330   bool hasPackedD16() const {
1331     return AMDGPU::hasPackedD16(getSTI());
1332   }
1333 
1334   bool hasGFX10A16() const {
1335     return AMDGPU::hasGFX10A16(getSTI());
1336   }
1337 
1338   bool isSI() const {
1339     return AMDGPU::isSI(getSTI());
1340   }
1341 
1342   bool isCI() const {
1343     return AMDGPU::isCI(getSTI());
1344   }
1345 
1346   bool isVI() const {
1347     return AMDGPU::isVI(getSTI());
1348   }
1349 
1350   bool isGFX9() const {
1351     return AMDGPU::isGFX9(getSTI());
1352   }
1353 
1354   bool isGFX90A() const {
1355     return AMDGPU::isGFX90A(getSTI());
1356   }
1357 
1358   bool isGFX9Plus() const {
1359     return AMDGPU::isGFX9Plus(getSTI());
1360   }
1361 
1362   bool isGFX10() const {
1363     return AMDGPU::isGFX10(getSTI());
1364   }
1365 
1366   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1367 
1368   bool isGFX10_BEncoding() const {
1369     return AMDGPU::isGFX10_BEncoding(getSTI());
1370   }
1371 
1372   bool hasInv2PiInlineImm() const {
1373     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1374   }
1375 
1376   bool hasFlatOffsets() const {
1377     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1378   }
1379 
1380   bool hasSGPR102_SGPR103() const {
1381     return !isVI() && !isGFX9();
1382   }
1383 
1384   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1385 
1386   bool hasIntClamp() const {
1387     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1388   }
1389 
1390   AMDGPUTargetStreamer &getTargetStreamer() {
1391     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1392     return static_cast<AMDGPUTargetStreamer &>(TS);
1393   }
1394 
1395   const MCRegisterInfo *getMRI() const {
1396     // We need this const_cast because for some reason getContext() is not const
1397     // in MCAsmParser.
1398     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1399   }
1400 
1401   const MCInstrInfo *getMII() const {
1402     return &MII;
1403   }
1404 
1405   const FeatureBitset &getFeatureBits() const {
1406     return getSTI().getFeatureBits();
1407   }
1408 
1409   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1410   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1411   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1412 
1413   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1414   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1415   bool isForcedDPP() const { return ForcedDPP; }
1416   bool isForcedSDWA() const { return ForcedSDWA; }
1417   ArrayRef<unsigned> getMatchedVariants() const;
1418   StringRef getMatchedVariantName() const;
1419 
1420   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1421   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1422                      bool RestoreOnFailure);
1423   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1424   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1425                                         SMLoc &EndLoc) override;
1426   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1427   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1428                                       unsigned Kind) override;
1429   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1430                                OperandVector &Operands, MCStreamer &Out,
1431                                uint64_t &ErrorInfo,
1432                                bool MatchingInlineAsm) override;
1433   bool ParseDirective(AsmToken DirectiveID) override;
1434   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1435                                     OperandMode Mode = OperandMode_Default);
1436   StringRef parseMnemonicSuffix(StringRef Name);
1437   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1438                         SMLoc NameLoc, OperandVector &Operands) override;
1439   //bool ProcessInstruction(MCInst &Inst);
1440 
1441   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1442 
1443   OperandMatchResultTy
1444   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1445                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1446                      bool (*ConvertResult)(int64_t &) = nullptr);
1447 
1448   OperandMatchResultTy
1449   parseOperandArrayWithPrefix(const char *Prefix,
1450                               OperandVector &Operands,
1451                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1452                               bool (*ConvertResult)(int64_t&) = nullptr);
1453 
1454   OperandMatchResultTy
1455   parseNamedBit(StringRef Name, OperandVector &Operands,
1456                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1457   OperandMatchResultTy parseCPol(OperandVector &Operands);
1458   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1459                                              StringRef &Value,
1460                                              SMLoc &StringLoc);
1461 
1462   bool isModifier();
1463   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1464   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1465   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1466   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1467   bool parseSP3NegModifier();
1468   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1469   OperandMatchResultTy parseReg(OperandVector &Operands);
1470   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1471   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1472   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1473   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1474   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1475   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1476   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1477   OperandMatchResultTy parseUfmt(int64_t &Format);
1478   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1479   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1480   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1481   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1482   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1483   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1484   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1485 
1486   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1487   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1488   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1489   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1490 
1491   bool parseCnt(int64_t &IntVal);
1492   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1493   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1494 
1495 private:
1496   struct OperandInfoTy {
1497     SMLoc Loc;
1498     int64_t Id;
1499     bool IsSymbolic = false;
1500     bool IsDefined = false;
1501 
1502     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1503   };
1504 
1505   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1506   bool validateSendMsg(const OperandInfoTy &Msg,
1507                        const OperandInfoTy &Op,
1508                        const OperandInfoTy &Stream);
1509 
1510   bool parseHwregBody(OperandInfoTy &HwReg,
1511                       OperandInfoTy &Offset,
1512                       OperandInfoTy &Width);
1513   bool validateHwreg(const OperandInfoTy &HwReg,
1514                      const OperandInfoTy &Offset,
1515                      const OperandInfoTy &Width);
1516 
1517   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1518   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1519 
1520   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1521                       const OperandVector &Operands) const;
1522   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1523   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1524   SMLoc getLitLoc(const OperandVector &Operands) const;
1525   SMLoc getConstLoc(const OperandVector &Operands) const;
1526 
1527   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1528   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1529   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1530   bool validateSOPLiteral(const MCInst &Inst) const;
1531   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1532   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1533   bool validateIntClampSupported(const MCInst &Inst);
1534   bool validateMIMGAtomicDMask(const MCInst &Inst);
1535   bool validateMIMGGatherDMask(const MCInst &Inst);
1536   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1537   bool validateMIMGDataSize(const MCInst &Inst);
1538   bool validateMIMGAddrSize(const MCInst &Inst);
1539   bool validateMIMGD16(const MCInst &Inst);
1540   bool validateMIMGDim(const MCInst &Inst);
1541   bool validateMIMGMSAA(const MCInst &Inst);
1542   bool validateLdsDirect(const MCInst &Inst);
1543   bool validateOpSel(const MCInst &Inst);
1544   bool validateVccOperand(unsigned Reg) const;
1545   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1546   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1547   bool validateAGPRLdSt(const MCInst &Inst) const;
1548   bool validateVGPRAlign(const MCInst &Inst) const;
1549   bool validateDivScale(const MCInst &Inst);
1550   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1551                              const SMLoc &IDLoc);
1552   unsigned getConstantBusLimit(unsigned Opcode) const;
1553   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1554   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1555   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1556 
1557   bool isSupportedMnemo(StringRef Mnemo,
1558                         const FeatureBitset &FBS);
1559   bool isSupportedMnemo(StringRef Mnemo,
1560                         const FeatureBitset &FBS,
1561                         ArrayRef<unsigned> Variants);
1562   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1563 
1564   bool isId(const StringRef Id) const;
1565   bool isId(const AsmToken &Token, const StringRef Id) const;
1566   bool isToken(const AsmToken::TokenKind Kind) const;
1567   bool trySkipId(const StringRef Id);
1568   bool trySkipId(const StringRef Pref, const StringRef Id);
1569   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1570   bool trySkipToken(const AsmToken::TokenKind Kind);
1571   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1572   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1573   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1574 
1575   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1576   AsmToken::TokenKind getTokenKind() const;
1577   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1578   bool parseExpr(OperandVector &Operands);
1579   StringRef getTokenStr() const;
1580   AsmToken peekToken();
1581   AsmToken getToken() const;
1582   SMLoc getLoc() const;
1583   void lex();
1584 
1585 public:
1586   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1587   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1588 
1589   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1590   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1591   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1592   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1593   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1594   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1595 
1596   bool parseSwizzleOperand(int64_t &Op,
1597                            const unsigned MinVal,
1598                            const unsigned MaxVal,
1599                            const StringRef ErrMsg,
1600                            SMLoc &Loc);
1601   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1602                             const unsigned MinVal,
1603                             const unsigned MaxVal,
1604                             const StringRef ErrMsg);
1605   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1606   bool parseSwizzleOffset(int64_t &Imm);
1607   bool parseSwizzleMacro(int64_t &Imm);
1608   bool parseSwizzleQuadPerm(int64_t &Imm);
1609   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1610   bool parseSwizzleBroadcast(int64_t &Imm);
1611   bool parseSwizzleSwap(int64_t &Imm);
1612   bool parseSwizzleReverse(int64_t &Imm);
1613 
1614   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1615   int64_t parseGPRIdxMacro();
1616 
1617   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1618   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1619   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1620   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1621 
1622   AMDGPUOperand::Ptr defaultCPol() const;
1623   AMDGPUOperand::Ptr defaultCPol_GLC1() const;
1624 
1625   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1626   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1627   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1628   AMDGPUOperand::Ptr defaultFlatOffset() const;
1629 
1630   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1631 
1632   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1633                OptionalImmIndexMap &OptionalIdx);
1634   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1635   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1636   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1637 
1638   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1639 
1640   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1641                bool IsAtomic = false);
1642   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1643   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1644 
1645   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1646 
1647   bool parseDimId(unsigned &Encoding);
1648   OperandMatchResultTy parseDim(OperandVector &Operands);
1649   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1650   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1651   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1652   int64_t parseDPPCtrlSel(StringRef Ctrl);
1653   int64_t parseDPPCtrlPerm();
1654   AMDGPUOperand::Ptr defaultRowMask() const;
1655   AMDGPUOperand::Ptr defaultBankMask() const;
1656   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1657   AMDGPUOperand::Ptr defaultFI() const;
1658   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1659   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1660 
1661   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1662                                     AMDGPUOperand::ImmTy Type);
1663   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1664   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1665   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1666   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1667   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1668   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1669   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1670                uint64_t BasicInstType,
1671                bool SkipDstVcc = false,
1672                bool SkipSrcVcc = false);
1673 
1674   AMDGPUOperand::Ptr defaultBLGP() const;
1675   AMDGPUOperand::Ptr defaultCBSZ() const;
1676   AMDGPUOperand::Ptr defaultABID() const;
1677 
1678   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1679   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1680 };
1681 
1682 struct OptionalOperand {
1683   const char *Name;
1684   AMDGPUOperand::ImmTy Type;
1685   bool IsBit;
1686   bool (*ConvertResult)(int64_t&);
1687 };
1688 
1689 } // end anonymous namespace
1690 
1691 // May be called with integer type with equivalent bitwidth.
1692 static const fltSemantics *getFltSemantics(unsigned Size) {
1693   switch (Size) {
1694   case 4:
1695     return &APFloat::IEEEsingle();
1696   case 8:
1697     return &APFloat::IEEEdouble();
1698   case 2:
1699     return &APFloat::IEEEhalf();
1700   default:
1701     llvm_unreachable("unsupported fp type");
1702   }
1703 }
1704 
1705 static const fltSemantics *getFltSemantics(MVT VT) {
1706   return getFltSemantics(VT.getSizeInBits() / 8);
1707 }
1708 
1709 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1710   switch (OperandType) {
1711   case AMDGPU::OPERAND_REG_IMM_INT32:
1712   case AMDGPU::OPERAND_REG_IMM_FP32:
1713   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1714   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1715   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1716   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1717   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1718   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1719   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1720   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1721     return &APFloat::IEEEsingle();
1722   case AMDGPU::OPERAND_REG_IMM_INT64:
1723   case AMDGPU::OPERAND_REG_IMM_FP64:
1724   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1725   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1726   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1727     return &APFloat::IEEEdouble();
1728   case AMDGPU::OPERAND_REG_IMM_INT16:
1729   case AMDGPU::OPERAND_REG_IMM_FP16:
1730   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1731   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1732   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1733   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1734   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1735   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1736   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1737   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1738   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1739   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1740     return &APFloat::IEEEhalf();
1741   default:
1742     llvm_unreachable("unsupported fp type");
1743   }
1744 }
1745 
1746 //===----------------------------------------------------------------------===//
1747 // Operand
1748 //===----------------------------------------------------------------------===//
1749 
1750 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1751   bool Lost;
1752 
1753   // Convert literal to single precision
1754   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1755                                                APFloat::rmNearestTiesToEven,
1756                                                &Lost);
1757   // We allow precision lost but not overflow or underflow
1758   if (Status != APFloat::opOK &&
1759       Lost &&
1760       ((Status & APFloat::opOverflow)  != 0 ||
1761        (Status & APFloat::opUnderflow) != 0)) {
1762     return false;
1763   }
1764 
1765   return true;
1766 }
1767 
1768 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1769   return isUIntN(Size, Val) || isIntN(Size, Val);
1770 }
1771 
1772 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1773   if (VT.getScalarType() == MVT::i16) {
1774     // FP immediate values are broken.
1775     return isInlinableIntLiteral(Val);
1776   }
1777 
1778   // f16/v2f16 operands work correctly for all values.
1779   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1780 }
1781 
1782 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1783 
1784   // This is a hack to enable named inline values like
1785   // shared_base with both 32-bit and 64-bit operands.
1786   // Note that these values are defined as
1787   // 32-bit operands only.
1788   if (isInlineValue()) {
1789     return true;
1790   }
1791 
1792   if (!isImmTy(ImmTyNone)) {
1793     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1794     return false;
1795   }
1796   // TODO: We should avoid using host float here. It would be better to
1797   // check the float bit values which is what a few other places do.
1798   // We've had bot failures before due to weird NaN support on mips hosts.
1799 
1800   APInt Literal(64, Imm.Val);
1801 
1802   if (Imm.IsFPImm) { // We got fp literal token
1803     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1804       return AMDGPU::isInlinableLiteral64(Imm.Val,
1805                                           AsmParser->hasInv2PiInlineImm());
1806     }
1807 
1808     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1809     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1810       return false;
1811 
1812     if (type.getScalarSizeInBits() == 16) {
1813       return isInlineableLiteralOp16(
1814         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1815         type, AsmParser->hasInv2PiInlineImm());
1816     }
1817 
1818     // Check if single precision literal is inlinable
1819     return AMDGPU::isInlinableLiteral32(
1820       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1821       AsmParser->hasInv2PiInlineImm());
1822   }
1823 
1824   // We got int literal token.
1825   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1826     return AMDGPU::isInlinableLiteral64(Imm.Val,
1827                                         AsmParser->hasInv2PiInlineImm());
1828   }
1829 
1830   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1831     return false;
1832   }
1833 
1834   if (type.getScalarSizeInBits() == 16) {
1835     return isInlineableLiteralOp16(
1836       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1837       type, AsmParser->hasInv2PiInlineImm());
1838   }
1839 
1840   return AMDGPU::isInlinableLiteral32(
1841     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1842     AsmParser->hasInv2PiInlineImm());
1843 }
1844 
1845 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1846   // Check that this immediate can be added as literal
1847   if (!isImmTy(ImmTyNone)) {
1848     return false;
1849   }
1850 
1851   if (!Imm.IsFPImm) {
1852     // We got int literal token.
1853 
1854     if (type == MVT::f64 && hasFPModifiers()) {
1855       // Cannot apply fp modifiers to int literals preserving the same semantics
1856       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1857       // disable these cases.
1858       return false;
1859     }
1860 
1861     unsigned Size = type.getSizeInBits();
1862     if (Size == 64)
1863       Size = 32;
1864 
1865     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1866     // types.
1867     return isSafeTruncation(Imm.Val, Size);
1868   }
1869 
1870   // We got fp literal token
1871   if (type == MVT::f64) { // Expected 64-bit fp operand
1872     // We would set low 64-bits of literal to zeroes but we accept this literals
1873     return true;
1874   }
1875 
1876   if (type == MVT::i64) { // Expected 64-bit int operand
1877     // We don't allow fp literals in 64-bit integer instructions. It is
1878     // unclear how we should encode them.
1879     return false;
1880   }
1881 
1882   // We allow fp literals with f16x2 operands assuming that the specified
1883   // literal goes into the lower half and the upper half is zero. We also
1884   // require that the literal may be losslesly converted to f16.
1885   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1886                      (type == MVT::v2i16)? MVT::i16 :
1887                      (type == MVT::v2f32)? MVT::f32 : type;
1888 
1889   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1890   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1891 }
1892 
1893 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1894   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1895 }
1896 
1897 bool AMDGPUOperand::isVRegWithInputMods() const {
1898   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1899          // GFX90A allows DPP on 64-bit operands.
1900          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1901           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1902 }
1903 
1904 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1905   if (AsmParser->isVI())
1906     return isVReg32();
1907   else if (AsmParser->isGFX9Plus())
1908     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1909   else
1910     return false;
1911 }
1912 
1913 bool AMDGPUOperand::isSDWAFP16Operand() const {
1914   return isSDWAOperand(MVT::f16);
1915 }
1916 
1917 bool AMDGPUOperand::isSDWAFP32Operand() const {
1918   return isSDWAOperand(MVT::f32);
1919 }
1920 
1921 bool AMDGPUOperand::isSDWAInt16Operand() const {
1922   return isSDWAOperand(MVT::i16);
1923 }
1924 
1925 bool AMDGPUOperand::isSDWAInt32Operand() const {
1926   return isSDWAOperand(MVT::i32);
1927 }
1928 
1929 bool AMDGPUOperand::isBoolReg() const {
1930   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1931          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1932 }
1933 
1934 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1935 {
1936   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1937   assert(Size == 2 || Size == 4 || Size == 8);
1938 
1939   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1940 
1941   if (Imm.Mods.Abs) {
1942     Val &= ~FpSignMask;
1943   }
1944   if (Imm.Mods.Neg) {
1945     Val ^= FpSignMask;
1946   }
1947 
1948   return Val;
1949 }
1950 
1951 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1952   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1953                              Inst.getNumOperands())) {
1954     addLiteralImmOperand(Inst, Imm.Val,
1955                          ApplyModifiers &
1956                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1957   } else {
1958     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1959     Inst.addOperand(MCOperand::createImm(Imm.Val));
1960     setImmKindNone();
1961   }
1962 }
1963 
1964 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1965   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1966   auto OpNum = Inst.getNumOperands();
1967   // Check that this operand accepts literals
1968   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1969 
1970   if (ApplyModifiers) {
1971     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1972     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1973     Val = applyInputFPModifiers(Val, Size);
1974   }
1975 
1976   APInt Literal(64, Val);
1977   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1978 
1979   if (Imm.IsFPImm) { // We got fp literal token
1980     switch (OpTy) {
1981     case AMDGPU::OPERAND_REG_IMM_INT64:
1982     case AMDGPU::OPERAND_REG_IMM_FP64:
1983     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1984     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1985     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1986       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1987                                        AsmParser->hasInv2PiInlineImm())) {
1988         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1989         setImmKindConst();
1990         return;
1991       }
1992 
1993       // Non-inlineable
1994       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1995         // For fp operands we check if low 32 bits are zeros
1996         if (Literal.getLoBits(32) != 0) {
1997           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1998           "Can't encode literal as exact 64-bit floating-point operand. "
1999           "Low 32-bits will be set to zero");
2000         }
2001 
2002         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2003         setImmKindLiteral();
2004         return;
2005       }
2006 
2007       // We don't allow fp literals in 64-bit integer instructions. It is
2008       // unclear how we should encode them. This case should be checked earlier
2009       // in predicate methods (isLiteralImm())
2010       llvm_unreachable("fp literal in 64-bit integer instruction.");
2011 
2012     case AMDGPU::OPERAND_REG_IMM_INT32:
2013     case AMDGPU::OPERAND_REG_IMM_FP32:
2014     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2015     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2016     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2017     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2018     case AMDGPU::OPERAND_REG_IMM_INT16:
2019     case AMDGPU::OPERAND_REG_IMM_FP16:
2020     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2021     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2022     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2023     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2024     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2025     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2026     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2027     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2028     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2029     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2030     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2031     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2032     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2033     case AMDGPU::OPERAND_REG_IMM_V2INT32: {
2034       bool lost;
2035       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2036       // Convert literal to single precision
2037       FPLiteral.convert(*getOpFltSemantics(OpTy),
2038                         APFloat::rmNearestTiesToEven, &lost);
2039       // We allow precision lost but not overflow or underflow. This should be
2040       // checked earlier in isLiteralImm()
2041 
2042       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2043       Inst.addOperand(MCOperand::createImm(ImmVal));
2044       setImmKindLiteral();
2045       return;
2046     }
2047     default:
2048       llvm_unreachable("invalid operand size");
2049     }
2050 
2051     return;
2052   }
2053 
2054   // We got int literal token.
2055   // Only sign extend inline immediates.
2056   switch (OpTy) {
2057   case AMDGPU::OPERAND_REG_IMM_INT32:
2058   case AMDGPU::OPERAND_REG_IMM_FP32:
2059   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2060   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2061   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2062   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2063   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2064   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2065   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2066   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2067   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2068   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2069     if (isSafeTruncation(Val, 32) &&
2070         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2071                                      AsmParser->hasInv2PiInlineImm())) {
2072       Inst.addOperand(MCOperand::createImm(Val));
2073       setImmKindConst();
2074       return;
2075     }
2076 
2077     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2078     setImmKindLiteral();
2079     return;
2080 
2081   case AMDGPU::OPERAND_REG_IMM_INT64:
2082   case AMDGPU::OPERAND_REG_IMM_FP64:
2083   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2084   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2085   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2086     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2087       Inst.addOperand(MCOperand::createImm(Val));
2088       setImmKindConst();
2089       return;
2090     }
2091 
2092     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2093     setImmKindLiteral();
2094     return;
2095 
2096   case AMDGPU::OPERAND_REG_IMM_INT16:
2097   case AMDGPU::OPERAND_REG_IMM_FP16:
2098   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2099   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2100   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2101   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2102     if (isSafeTruncation(Val, 16) &&
2103         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2104                                      AsmParser->hasInv2PiInlineImm())) {
2105       Inst.addOperand(MCOperand::createImm(Val));
2106       setImmKindConst();
2107       return;
2108     }
2109 
2110     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2111     setImmKindLiteral();
2112     return;
2113 
2114   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2115   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2116   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2117   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2118     assert(isSafeTruncation(Val, 16));
2119     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2120                                         AsmParser->hasInv2PiInlineImm()));
2121 
2122     Inst.addOperand(MCOperand::createImm(Val));
2123     return;
2124   }
2125   default:
2126     llvm_unreachable("invalid operand size");
2127   }
2128 }
2129 
2130 template <unsigned Bitwidth>
2131 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2132   APInt Literal(64, Imm.Val);
2133   setImmKindNone();
2134 
2135   if (!Imm.IsFPImm) {
2136     // We got int literal token.
2137     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2138     return;
2139   }
2140 
2141   bool Lost;
2142   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2143   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2144                     APFloat::rmNearestTiesToEven, &Lost);
2145   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2146 }
2147 
2148 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2149   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2150 }
2151 
2152 static bool isInlineValue(unsigned Reg) {
2153   switch (Reg) {
2154   case AMDGPU::SRC_SHARED_BASE:
2155   case AMDGPU::SRC_SHARED_LIMIT:
2156   case AMDGPU::SRC_PRIVATE_BASE:
2157   case AMDGPU::SRC_PRIVATE_LIMIT:
2158   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2159     return true;
2160   case AMDGPU::SRC_VCCZ:
2161   case AMDGPU::SRC_EXECZ:
2162   case AMDGPU::SRC_SCC:
2163     return true;
2164   case AMDGPU::SGPR_NULL:
2165     return true;
2166   default:
2167     return false;
2168   }
2169 }
2170 
2171 bool AMDGPUOperand::isInlineValue() const {
2172   return isRegKind() && ::isInlineValue(getReg());
2173 }
2174 
2175 //===----------------------------------------------------------------------===//
2176 // AsmParser
2177 //===----------------------------------------------------------------------===//
2178 
2179 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2180   if (Is == IS_VGPR) {
2181     switch (RegWidth) {
2182       default: return -1;
2183       case 1: return AMDGPU::VGPR_32RegClassID;
2184       case 2: return AMDGPU::VReg_64RegClassID;
2185       case 3: return AMDGPU::VReg_96RegClassID;
2186       case 4: return AMDGPU::VReg_128RegClassID;
2187       case 5: return AMDGPU::VReg_160RegClassID;
2188       case 6: return AMDGPU::VReg_192RegClassID;
2189       case 8: return AMDGPU::VReg_256RegClassID;
2190       case 16: return AMDGPU::VReg_512RegClassID;
2191       case 32: return AMDGPU::VReg_1024RegClassID;
2192     }
2193   } else if (Is == IS_TTMP) {
2194     switch (RegWidth) {
2195       default: return -1;
2196       case 1: return AMDGPU::TTMP_32RegClassID;
2197       case 2: return AMDGPU::TTMP_64RegClassID;
2198       case 4: return AMDGPU::TTMP_128RegClassID;
2199       case 8: return AMDGPU::TTMP_256RegClassID;
2200       case 16: return AMDGPU::TTMP_512RegClassID;
2201     }
2202   } else if (Is == IS_SGPR) {
2203     switch (RegWidth) {
2204       default: return -1;
2205       case 1: return AMDGPU::SGPR_32RegClassID;
2206       case 2: return AMDGPU::SGPR_64RegClassID;
2207       case 3: return AMDGPU::SGPR_96RegClassID;
2208       case 4: return AMDGPU::SGPR_128RegClassID;
2209       case 5: return AMDGPU::SGPR_160RegClassID;
2210       case 6: return AMDGPU::SGPR_192RegClassID;
2211       case 8: return AMDGPU::SGPR_256RegClassID;
2212       case 16: return AMDGPU::SGPR_512RegClassID;
2213     }
2214   } else if (Is == IS_AGPR) {
2215     switch (RegWidth) {
2216       default: return -1;
2217       case 1: return AMDGPU::AGPR_32RegClassID;
2218       case 2: return AMDGPU::AReg_64RegClassID;
2219       case 3: return AMDGPU::AReg_96RegClassID;
2220       case 4: return AMDGPU::AReg_128RegClassID;
2221       case 5: return AMDGPU::AReg_160RegClassID;
2222       case 6: return AMDGPU::AReg_192RegClassID;
2223       case 8: return AMDGPU::AReg_256RegClassID;
2224       case 16: return AMDGPU::AReg_512RegClassID;
2225       case 32: return AMDGPU::AReg_1024RegClassID;
2226     }
2227   }
2228   return -1;
2229 }
2230 
2231 static unsigned getSpecialRegForName(StringRef RegName) {
2232   return StringSwitch<unsigned>(RegName)
2233     .Case("exec", AMDGPU::EXEC)
2234     .Case("vcc", AMDGPU::VCC)
2235     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2236     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2237     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2238     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2239     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2240     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2241     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2242     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2243     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2244     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2245     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2246     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2247     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2248     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2249     .Case("m0", AMDGPU::M0)
2250     .Case("vccz", AMDGPU::SRC_VCCZ)
2251     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2252     .Case("execz", AMDGPU::SRC_EXECZ)
2253     .Case("src_execz", AMDGPU::SRC_EXECZ)
2254     .Case("scc", AMDGPU::SRC_SCC)
2255     .Case("src_scc", AMDGPU::SRC_SCC)
2256     .Case("tba", AMDGPU::TBA)
2257     .Case("tma", AMDGPU::TMA)
2258     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2259     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2260     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2261     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2262     .Case("vcc_lo", AMDGPU::VCC_LO)
2263     .Case("vcc_hi", AMDGPU::VCC_HI)
2264     .Case("exec_lo", AMDGPU::EXEC_LO)
2265     .Case("exec_hi", AMDGPU::EXEC_HI)
2266     .Case("tma_lo", AMDGPU::TMA_LO)
2267     .Case("tma_hi", AMDGPU::TMA_HI)
2268     .Case("tba_lo", AMDGPU::TBA_LO)
2269     .Case("tba_hi", AMDGPU::TBA_HI)
2270     .Case("pc", AMDGPU::PC_REG)
2271     .Case("null", AMDGPU::SGPR_NULL)
2272     .Default(AMDGPU::NoRegister);
2273 }
2274 
2275 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2276                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2277   auto R = parseRegister();
2278   if (!R) return true;
2279   assert(R->isReg());
2280   RegNo = R->getReg();
2281   StartLoc = R->getStartLoc();
2282   EndLoc = R->getEndLoc();
2283   return false;
2284 }
2285 
2286 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2287                                     SMLoc &EndLoc) {
2288   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2289 }
2290 
2291 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2292                                                        SMLoc &StartLoc,
2293                                                        SMLoc &EndLoc) {
2294   bool Result =
2295       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2296   bool PendingErrors = getParser().hasPendingError();
2297   getParser().clearPendingErrors();
2298   if (PendingErrors)
2299     return MatchOperand_ParseFail;
2300   if (Result)
2301     return MatchOperand_NoMatch;
2302   return MatchOperand_Success;
2303 }
2304 
2305 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2306                                             RegisterKind RegKind, unsigned Reg1,
2307                                             SMLoc Loc) {
2308   switch (RegKind) {
2309   case IS_SPECIAL:
2310     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2311       Reg = AMDGPU::EXEC;
2312       RegWidth = 2;
2313       return true;
2314     }
2315     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2316       Reg = AMDGPU::FLAT_SCR;
2317       RegWidth = 2;
2318       return true;
2319     }
2320     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2321       Reg = AMDGPU::XNACK_MASK;
2322       RegWidth = 2;
2323       return true;
2324     }
2325     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2326       Reg = AMDGPU::VCC;
2327       RegWidth = 2;
2328       return true;
2329     }
2330     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2331       Reg = AMDGPU::TBA;
2332       RegWidth = 2;
2333       return true;
2334     }
2335     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2336       Reg = AMDGPU::TMA;
2337       RegWidth = 2;
2338       return true;
2339     }
2340     Error(Loc, "register does not fit in the list");
2341     return false;
2342   case IS_VGPR:
2343   case IS_SGPR:
2344   case IS_AGPR:
2345   case IS_TTMP:
2346     if (Reg1 != Reg + RegWidth) {
2347       Error(Loc, "registers in a list must have consecutive indices");
2348       return false;
2349     }
2350     RegWidth++;
2351     return true;
2352   default:
2353     llvm_unreachable("unexpected register kind");
2354   }
2355 }
2356 
2357 struct RegInfo {
2358   StringLiteral Name;
2359   RegisterKind Kind;
2360 };
2361 
2362 static constexpr RegInfo RegularRegisters[] = {
2363   {{"v"},    IS_VGPR},
2364   {{"s"},    IS_SGPR},
2365   {{"ttmp"}, IS_TTMP},
2366   {{"acc"},  IS_AGPR},
2367   {{"a"},    IS_AGPR},
2368 };
2369 
2370 static bool isRegularReg(RegisterKind Kind) {
2371   return Kind == IS_VGPR ||
2372          Kind == IS_SGPR ||
2373          Kind == IS_TTMP ||
2374          Kind == IS_AGPR;
2375 }
2376 
2377 static const RegInfo* getRegularRegInfo(StringRef Str) {
2378   for (const RegInfo &Reg : RegularRegisters)
2379     if (Str.startswith(Reg.Name))
2380       return &Reg;
2381   return nullptr;
2382 }
2383 
2384 static bool getRegNum(StringRef Str, unsigned& Num) {
2385   return !Str.getAsInteger(10, Num);
2386 }
2387 
2388 bool
2389 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2390                             const AsmToken &NextToken) const {
2391 
2392   // A list of consecutive registers: [s0,s1,s2,s3]
2393   if (Token.is(AsmToken::LBrac))
2394     return true;
2395 
2396   if (!Token.is(AsmToken::Identifier))
2397     return false;
2398 
2399   // A single register like s0 or a range of registers like s[0:1]
2400 
2401   StringRef Str = Token.getString();
2402   const RegInfo *Reg = getRegularRegInfo(Str);
2403   if (Reg) {
2404     StringRef RegName = Reg->Name;
2405     StringRef RegSuffix = Str.substr(RegName.size());
2406     if (!RegSuffix.empty()) {
2407       unsigned Num;
2408       // A single register with an index: rXX
2409       if (getRegNum(RegSuffix, Num))
2410         return true;
2411     } else {
2412       // A range of registers: r[XX:YY].
2413       if (NextToken.is(AsmToken::LBrac))
2414         return true;
2415     }
2416   }
2417 
2418   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2419 }
2420 
2421 bool
2422 AMDGPUAsmParser::isRegister()
2423 {
2424   return isRegister(getToken(), peekToken());
2425 }
2426 
2427 unsigned
2428 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2429                                unsigned RegNum,
2430                                unsigned RegWidth,
2431                                SMLoc Loc) {
2432 
2433   assert(isRegularReg(RegKind));
2434 
2435   unsigned AlignSize = 1;
2436   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2437     // SGPR and TTMP registers must be aligned.
2438     // Max required alignment is 4 dwords.
2439     AlignSize = std::min(RegWidth, 4u);
2440   }
2441 
2442   if (RegNum % AlignSize != 0) {
2443     Error(Loc, "invalid register alignment");
2444     return AMDGPU::NoRegister;
2445   }
2446 
2447   unsigned RegIdx = RegNum / AlignSize;
2448   int RCID = getRegClass(RegKind, RegWidth);
2449   if (RCID == -1) {
2450     Error(Loc, "invalid or unsupported register size");
2451     return AMDGPU::NoRegister;
2452   }
2453 
2454   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2455   const MCRegisterClass RC = TRI->getRegClass(RCID);
2456   if (RegIdx >= RC.getNumRegs()) {
2457     Error(Loc, "register index is out of range");
2458     return AMDGPU::NoRegister;
2459   }
2460 
2461   return RC.getRegister(RegIdx);
2462 }
2463 
2464 bool
2465 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2466   int64_t RegLo, RegHi;
2467   if (!skipToken(AsmToken::LBrac, "missing register index"))
2468     return false;
2469 
2470   SMLoc FirstIdxLoc = getLoc();
2471   SMLoc SecondIdxLoc;
2472 
2473   if (!parseExpr(RegLo))
2474     return false;
2475 
2476   if (trySkipToken(AsmToken::Colon)) {
2477     SecondIdxLoc = getLoc();
2478     if (!parseExpr(RegHi))
2479       return false;
2480   } else {
2481     RegHi = RegLo;
2482   }
2483 
2484   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2485     return false;
2486 
2487   if (!isUInt<32>(RegLo)) {
2488     Error(FirstIdxLoc, "invalid register index");
2489     return false;
2490   }
2491 
2492   if (!isUInt<32>(RegHi)) {
2493     Error(SecondIdxLoc, "invalid register index");
2494     return false;
2495   }
2496 
2497   if (RegLo > RegHi) {
2498     Error(FirstIdxLoc, "first register index should not exceed second index");
2499     return false;
2500   }
2501 
2502   Num = static_cast<unsigned>(RegLo);
2503   Width = (RegHi - RegLo) + 1;
2504   return true;
2505 }
2506 
2507 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2508                                           unsigned &RegNum, unsigned &RegWidth,
2509                                           SmallVectorImpl<AsmToken> &Tokens) {
2510   assert(isToken(AsmToken::Identifier));
2511   unsigned Reg = getSpecialRegForName(getTokenStr());
2512   if (Reg) {
2513     RegNum = 0;
2514     RegWidth = 1;
2515     RegKind = IS_SPECIAL;
2516     Tokens.push_back(getToken());
2517     lex(); // skip register name
2518   }
2519   return Reg;
2520 }
2521 
2522 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2523                                           unsigned &RegNum, unsigned &RegWidth,
2524                                           SmallVectorImpl<AsmToken> &Tokens) {
2525   assert(isToken(AsmToken::Identifier));
2526   StringRef RegName = getTokenStr();
2527   auto Loc = getLoc();
2528 
2529   const RegInfo *RI = getRegularRegInfo(RegName);
2530   if (!RI) {
2531     Error(Loc, "invalid register name");
2532     return AMDGPU::NoRegister;
2533   }
2534 
2535   Tokens.push_back(getToken());
2536   lex(); // skip register name
2537 
2538   RegKind = RI->Kind;
2539   StringRef RegSuffix = RegName.substr(RI->Name.size());
2540   if (!RegSuffix.empty()) {
2541     // Single 32-bit register: vXX.
2542     if (!getRegNum(RegSuffix, RegNum)) {
2543       Error(Loc, "invalid register index");
2544       return AMDGPU::NoRegister;
2545     }
2546     RegWidth = 1;
2547   } else {
2548     // Range of registers: v[XX:YY]. ":YY" is optional.
2549     if (!ParseRegRange(RegNum, RegWidth))
2550       return AMDGPU::NoRegister;
2551   }
2552 
2553   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2554 }
2555 
2556 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2557                                        unsigned &RegWidth,
2558                                        SmallVectorImpl<AsmToken> &Tokens) {
2559   unsigned Reg = AMDGPU::NoRegister;
2560   auto ListLoc = getLoc();
2561 
2562   if (!skipToken(AsmToken::LBrac,
2563                  "expected a register or a list of registers")) {
2564     return AMDGPU::NoRegister;
2565   }
2566 
2567   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2568 
2569   auto Loc = getLoc();
2570   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2571     return AMDGPU::NoRegister;
2572   if (RegWidth != 1) {
2573     Error(Loc, "expected a single 32-bit register");
2574     return AMDGPU::NoRegister;
2575   }
2576 
2577   for (; trySkipToken(AsmToken::Comma); ) {
2578     RegisterKind NextRegKind;
2579     unsigned NextReg, NextRegNum, NextRegWidth;
2580     Loc = getLoc();
2581 
2582     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2583                              NextRegNum, NextRegWidth,
2584                              Tokens)) {
2585       return AMDGPU::NoRegister;
2586     }
2587     if (NextRegWidth != 1) {
2588       Error(Loc, "expected a single 32-bit register");
2589       return AMDGPU::NoRegister;
2590     }
2591     if (NextRegKind != RegKind) {
2592       Error(Loc, "registers in a list must be of the same kind");
2593       return AMDGPU::NoRegister;
2594     }
2595     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2596       return AMDGPU::NoRegister;
2597   }
2598 
2599   if (!skipToken(AsmToken::RBrac,
2600                  "expected a comma or a closing square bracket")) {
2601     return AMDGPU::NoRegister;
2602   }
2603 
2604   if (isRegularReg(RegKind))
2605     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2606 
2607   return Reg;
2608 }
2609 
2610 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2611                                           unsigned &RegNum, unsigned &RegWidth,
2612                                           SmallVectorImpl<AsmToken> &Tokens) {
2613   auto Loc = getLoc();
2614   Reg = AMDGPU::NoRegister;
2615 
2616   if (isToken(AsmToken::Identifier)) {
2617     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2618     if (Reg == AMDGPU::NoRegister)
2619       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2620   } else {
2621     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2622   }
2623 
2624   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2625   if (Reg == AMDGPU::NoRegister) {
2626     assert(Parser.hasPendingError());
2627     return false;
2628   }
2629 
2630   if (!subtargetHasRegister(*TRI, Reg)) {
2631     if (Reg == AMDGPU::SGPR_NULL) {
2632       Error(Loc, "'null' operand is not supported on this GPU");
2633     } else {
2634       Error(Loc, "register not available on this GPU");
2635     }
2636     return false;
2637   }
2638 
2639   return true;
2640 }
2641 
2642 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2643                                           unsigned &RegNum, unsigned &RegWidth,
2644                                           bool RestoreOnFailure /*=false*/) {
2645   Reg = AMDGPU::NoRegister;
2646 
2647   SmallVector<AsmToken, 1> Tokens;
2648   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2649     if (RestoreOnFailure) {
2650       while (!Tokens.empty()) {
2651         getLexer().UnLex(Tokens.pop_back_val());
2652       }
2653     }
2654     return true;
2655   }
2656   return false;
2657 }
2658 
2659 Optional<StringRef>
2660 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2661   switch (RegKind) {
2662   case IS_VGPR:
2663     return StringRef(".amdgcn.next_free_vgpr");
2664   case IS_SGPR:
2665     return StringRef(".amdgcn.next_free_sgpr");
2666   default:
2667     return None;
2668   }
2669 }
2670 
2671 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2672   auto SymbolName = getGprCountSymbolName(RegKind);
2673   assert(SymbolName && "initializing invalid register kind");
2674   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2675   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2676 }
2677 
2678 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2679                                             unsigned DwordRegIndex,
2680                                             unsigned RegWidth) {
2681   // Symbols are only defined for GCN targets
2682   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2683     return true;
2684 
2685   auto SymbolName = getGprCountSymbolName(RegKind);
2686   if (!SymbolName)
2687     return true;
2688   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2689 
2690   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2691   int64_t OldCount;
2692 
2693   if (!Sym->isVariable())
2694     return !Error(getLoc(),
2695                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2696   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2697     return !Error(
2698         getLoc(),
2699         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2700 
2701   if (OldCount <= NewMax)
2702     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2703 
2704   return true;
2705 }
2706 
2707 std::unique_ptr<AMDGPUOperand>
2708 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2709   const auto &Tok = getToken();
2710   SMLoc StartLoc = Tok.getLoc();
2711   SMLoc EndLoc = Tok.getEndLoc();
2712   RegisterKind RegKind;
2713   unsigned Reg, RegNum, RegWidth;
2714 
2715   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2716     return nullptr;
2717   }
2718   if (isHsaAbiVersion3(&getSTI())) {
2719     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2720       return nullptr;
2721   } else
2722     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2723   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2724 }
2725 
2726 OperandMatchResultTy
2727 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2728   // TODO: add syntactic sugar for 1/(2*PI)
2729 
2730   assert(!isRegister());
2731   assert(!isModifier());
2732 
2733   const auto& Tok = getToken();
2734   const auto& NextTok = peekToken();
2735   bool IsReal = Tok.is(AsmToken::Real);
2736   SMLoc S = getLoc();
2737   bool Negate = false;
2738 
2739   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2740     lex();
2741     IsReal = true;
2742     Negate = true;
2743   }
2744 
2745   if (IsReal) {
2746     // Floating-point expressions are not supported.
2747     // Can only allow floating-point literals with an
2748     // optional sign.
2749 
2750     StringRef Num = getTokenStr();
2751     lex();
2752 
2753     APFloat RealVal(APFloat::IEEEdouble());
2754     auto roundMode = APFloat::rmNearestTiesToEven;
2755     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2756       return MatchOperand_ParseFail;
2757     }
2758     if (Negate)
2759       RealVal.changeSign();
2760 
2761     Operands.push_back(
2762       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2763                                AMDGPUOperand::ImmTyNone, true));
2764 
2765     return MatchOperand_Success;
2766 
2767   } else {
2768     int64_t IntVal;
2769     const MCExpr *Expr;
2770     SMLoc S = getLoc();
2771 
2772     if (HasSP3AbsModifier) {
2773       // This is a workaround for handling expressions
2774       // as arguments of SP3 'abs' modifier, for example:
2775       //     |1.0|
2776       //     |-1|
2777       //     |1+x|
2778       // This syntax is not compatible with syntax of standard
2779       // MC expressions (due to the trailing '|').
2780       SMLoc EndLoc;
2781       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2782         return MatchOperand_ParseFail;
2783     } else {
2784       if (Parser.parseExpression(Expr))
2785         return MatchOperand_ParseFail;
2786     }
2787 
2788     if (Expr->evaluateAsAbsolute(IntVal)) {
2789       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2790     } else {
2791       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2792     }
2793 
2794     return MatchOperand_Success;
2795   }
2796 
2797   return MatchOperand_NoMatch;
2798 }
2799 
2800 OperandMatchResultTy
2801 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2802   if (!isRegister())
2803     return MatchOperand_NoMatch;
2804 
2805   if (auto R = parseRegister()) {
2806     assert(R->isReg());
2807     Operands.push_back(std::move(R));
2808     return MatchOperand_Success;
2809   }
2810   return MatchOperand_ParseFail;
2811 }
2812 
2813 OperandMatchResultTy
2814 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2815   auto res = parseReg(Operands);
2816   if (res != MatchOperand_NoMatch) {
2817     return res;
2818   } else if (isModifier()) {
2819     return MatchOperand_NoMatch;
2820   } else {
2821     return parseImm(Operands, HasSP3AbsMod);
2822   }
2823 }
2824 
2825 bool
2826 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2827   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2828     const auto &str = Token.getString();
2829     return str == "abs" || str == "neg" || str == "sext";
2830   }
2831   return false;
2832 }
2833 
2834 bool
2835 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2836   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2837 }
2838 
2839 bool
2840 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2841   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2842 }
2843 
2844 bool
2845 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2846   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2847 }
2848 
2849 // Check if this is an operand modifier or an opcode modifier
2850 // which may look like an expression but it is not. We should
2851 // avoid parsing these modifiers as expressions. Currently
2852 // recognized sequences are:
2853 //   |...|
2854 //   abs(...)
2855 //   neg(...)
2856 //   sext(...)
2857 //   -reg
2858 //   -|...|
2859 //   -abs(...)
2860 //   name:...
2861 // Note that simple opcode modifiers like 'gds' may be parsed as
2862 // expressions; this is a special case. See getExpressionAsToken.
2863 //
2864 bool
2865 AMDGPUAsmParser::isModifier() {
2866 
2867   AsmToken Tok = getToken();
2868   AsmToken NextToken[2];
2869   peekTokens(NextToken);
2870 
2871   return isOperandModifier(Tok, NextToken[0]) ||
2872          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2873          isOpcodeModifierWithVal(Tok, NextToken[0]);
2874 }
2875 
2876 // Check if the current token is an SP3 'neg' modifier.
2877 // Currently this modifier is allowed in the following context:
2878 //
2879 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2880 // 2. Before an 'abs' modifier: -abs(...)
2881 // 3. Before an SP3 'abs' modifier: -|...|
2882 //
2883 // In all other cases "-" is handled as a part
2884 // of an expression that follows the sign.
2885 //
2886 // Note: When "-" is followed by an integer literal,
2887 // this is interpreted as integer negation rather
2888 // than a floating-point NEG modifier applied to N.
2889 // Beside being contr-intuitive, such use of floating-point
2890 // NEG modifier would have resulted in different meaning
2891 // of integer literals used with VOP1/2/C and VOP3,
2892 // for example:
2893 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2894 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2895 // Negative fp literals with preceding "-" are
2896 // handled likewise for unifomtity
2897 //
2898 bool
2899 AMDGPUAsmParser::parseSP3NegModifier() {
2900 
2901   AsmToken NextToken[2];
2902   peekTokens(NextToken);
2903 
2904   if (isToken(AsmToken::Minus) &&
2905       (isRegister(NextToken[0], NextToken[1]) ||
2906        NextToken[0].is(AsmToken::Pipe) ||
2907        isId(NextToken[0], "abs"))) {
2908     lex();
2909     return true;
2910   }
2911 
2912   return false;
2913 }
2914 
2915 OperandMatchResultTy
2916 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2917                                               bool AllowImm) {
2918   bool Neg, SP3Neg;
2919   bool Abs, SP3Abs;
2920   SMLoc Loc;
2921 
2922   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2923   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2924     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2925     return MatchOperand_ParseFail;
2926   }
2927 
2928   SP3Neg = parseSP3NegModifier();
2929 
2930   Loc = getLoc();
2931   Neg = trySkipId("neg");
2932   if (Neg && SP3Neg) {
2933     Error(Loc, "expected register or immediate");
2934     return MatchOperand_ParseFail;
2935   }
2936   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2937     return MatchOperand_ParseFail;
2938 
2939   Abs = trySkipId("abs");
2940   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2941     return MatchOperand_ParseFail;
2942 
2943   Loc = getLoc();
2944   SP3Abs = trySkipToken(AsmToken::Pipe);
2945   if (Abs && SP3Abs) {
2946     Error(Loc, "expected register or immediate");
2947     return MatchOperand_ParseFail;
2948   }
2949 
2950   OperandMatchResultTy Res;
2951   if (AllowImm) {
2952     Res = parseRegOrImm(Operands, SP3Abs);
2953   } else {
2954     Res = parseReg(Operands);
2955   }
2956   if (Res != MatchOperand_Success) {
2957     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2958   }
2959 
2960   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2961     return MatchOperand_ParseFail;
2962   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2963     return MatchOperand_ParseFail;
2964   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2965     return MatchOperand_ParseFail;
2966 
2967   AMDGPUOperand::Modifiers Mods;
2968   Mods.Abs = Abs || SP3Abs;
2969   Mods.Neg = Neg || SP3Neg;
2970 
2971   if (Mods.hasFPModifiers()) {
2972     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2973     if (Op.isExpr()) {
2974       Error(Op.getStartLoc(), "expected an absolute expression");
2975       return MatchOperand_ParseFail;
2976     }
2977     Op.setModifiers(Mods);
2978   }
2979   return MatchOperand_Success;
2980 }
2981 
2982 OperandMatchResultTy
2983 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2984                                                bool AllowImm) {
2985   bool Sext = trySkipId("sext");
2986   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2987     return MatchOperand_ParseFail;
2988 
2989   OperandMatchResultTy Res;
2990   if (AllowImm) {
2991     Res = parseRegOrImm(Operands);
2992   } else {
2993     Res = parseReg(Operands);
2994   }
2995   if (Res != MatchOperand_Success) {
2996     return Sext? MatchOperand_ParseFail : Res;
2997   }
2998 
2999   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3000     return MatchOperand_ParseFail;
3001 
3002   AMDGPUOperand::Modifiers Mods;
3003   Mods.Sext = Sext;
3004 
3005   if (Mods.hasIntModifiers()) {
3006     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3007     if (Op.isExpr()) {
3008       Error(Op.getStartLoc(), "expected an absolute expression");
3009       return MatchOperand_ParseFail;
3010     }
3011     Op.setModifiers(Mods);
3012   }
3013 
3014   return MatchOperand_Success;
3015 }
3016 
3017 OperandMatchResultTy
3018 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3019   return parseRegOrImmWithFPInputMods(Operands, false);
3020 }
3021 
3022 OperandMatchResultTy
3023 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3024   return parseRegOrImmWithIntInputMods(Operands, false);
3025 }
3026 
3027 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3028   auto Loc = getLoc();
3029   if (trySkipId("off")) {
3030     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3031                                                 AMDGPUOperand::ImmTyOff, false));
3032     return MatchOperand_Success;
3033   }
3034 
3035   if (!isRegister())
3036     return MatchOperand_NoMatch;
3037 
3038   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3039   if (Reg) {
3040     Operands.push_back(std::move(Reg));
3041     return MatchOperand_Success;
3042   }
3043 
3044   return MatchOperand_ParseFail;
3045 
3046 }
3047 
3048 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3049   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3050 
3051   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3052       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3053       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3054       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3055     return Match_InvalidOperand;
3056 
3057   if ((TSFlags & SIInstrFlags::VOP3) &&
3058       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3059       getForcedEncodingSize() != 64)
3060     return Match_PreferE32;
3061 
3062   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3063       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3064     // v_mac_f32/16 allow only dst_sel == DWORD;
3065     auto OpNum =
3066         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3067     const auto &Op = Inst.getOperand(OpNum);
3068     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3069       return Match_InvalidOperand;
3070     }
3071   }
3072 
3073   return Match_Success;
3074 }
3075 
3076 static ArrayRef<unsigned> getAllVariants() {
3077   static const unsigned Variants[] = {
3078     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3079     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3080   };
3081 
3082   return makeArrayRef(Variants);
3083 }
3084 
3085 // What asm variants we should check
3086 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3087   if (getForcedEncodingSize() == 32) {
3088     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3089     return makeArrayRef(Variants);
3090   }
3091 
3092   if (isForcedVOP3()) {
3093     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3094     return makeArrayRef(Variants);
3095   }
3096 
3097   if (isForcedSDWA()) {
3098     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3099                                         AMDGPUAsmVariants::SDWA9};
3100     return makeArrayRef(Variants);
3101   }
3102 
3103   if (isForcedDPP()) {
3104     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3105     return makeArrayRef(Variants);
3106   }
3107 
3108   return getAllVariants();
3109 }
3110 
3111 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3112   if (getForcedEncodingSize() == 32)
3113     return "e32";
3114 
3115   if (isForcedVOP3())
3116     return "e64";
3117 
3118   if (isForcedSDWA())
3119     return "sdwa";
3120 
3121   if (isForcedDPP())
3122     return "dpp";
3123 
3124   return "";
3125 }
3126 
3127 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3128   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3129   const unsigned Num = Desc.getNumImplicitUses();
3130   for (unsigned i = 0; i < Num; ++i) {
3131     unsigned Reg = Desc.ImplicitUses[i];
3132     switch (Reg) {
3133     case AMDGPU::FLAT_SCR:
3134     case AMDGPU::VCC:
3135     case AMDGPU::VCC_LO:
3136     case AMDGPU::VCC_HI:
3137     case AMDGPU::M0:
3138       return Reg;
3139     default:
3140       break;
3141     }
3142   }
3143   return AMDGPU::NoRegister;
3144 }
3145 
3146 // NB: This code is correct only when used to check constant
3147 // bus limitations because GFX7 support no f16 inline constants.
3148 // Note that there are no cases when a GFX7 opcode violates
3149 // constant bus limitations due to the use of an f16 constant.
3150 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3151                                        unsigned OpIdx) const {
3152   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3153 
3154   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3155     return false;
3156   }
3157 
3158   const MCOperand &MO = Inst.getOperand(OpIdx);
3159 
3160   int64_t Val = MO.getImm();
3161   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3162 
3163   switch (OpSize) { // expected operand size
3164   case 8:
3165     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3166   case 4:
3167     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3168   case 2: {
3169     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3170     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3171         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3172         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3173       return AMDGPU::isInlinableIntLiteral(Val);
3174 
3175     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3176         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3177         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3178       return AMDGPU::isInlinableIntLiteralV216(Val);
3179 
3180     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3181         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3182         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3183       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3184 
3185     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3186   }
3187   default:
3188     llvm_unreachable("invalid operand size");
3189   }
3190 }
3191 
3192 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3193   if (!isGFX10Plus())
3194     return 1;
3195 
3196   switch (Opcode) {
3197   // 64-bit shift instructions can use only one scalar value input
3198   case AMDGPU::V_LSHLREV_B64_e64:
3199   case AMDGPU::V_LSHLREV_B64_gfx10:
3200   case AMDGPU::V_LSHRREV_B64_e64:
3201   case AMDGPU::V_LSHRREV_B64_gfx10:
3202   case AMDGPU::V_ASHRREV_I64_e64:
3203   case AMDGPU::V_ASHRREV_I64_gfx10:
3204   case AMDGPU::V_LSHL_B64_e64:
3205   case AMDGPU::V_LSHR_B64_e64:
3206   case AMDGPU::V_ASHR_I64_e64:
3207     return 1;
3208   default:
3209     return 2;
3210   }
3211 }
3212 
3213 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3214   const MCOperand &MO = Inst.getOperand(OpIdx);
3215   if (MO.isImm()) {
3216     return !isInlineConstant(Inst, OpIdx);
3217   } else if (MO.isReg()) {
3218     auto Reg = MO.getReg();
3219     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3220     auto PReg = mc2PseudoReg(Reg);
3221     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3222   } else {
3223     return true;
3224   }
3225 }
3226 
3227 bool
3228 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3229                                                 const OperandVector &Operands) {
3230   const unsigned Opcode = Inst.getOpcode();
3231   const MCInstrDesc &Desc = MII.get(Opcode);
3232   unsigned LastSGPR = AMDGPU::NoRegister;
3233   unsigned ConstantBusUseCount = 0;
3234   unsigned NumLiterals = 0;
3235   unsigned LiteralSize;
3236 
3237   if (Desc.TSFlags &
3238       (SIInstrFlags::VOPC |
3239        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3240        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3241        SIInstrFlags::SDWA)) {
3242     // Check special imm operands (used by madmk, etc)
3243     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3244       ++ConstantBusUseCount;
3245     }
3246 
3247     SmallDenseSet<unsigned> SGPRsUsed;
3248     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3249     if (SGPRUsed != AMDGPU::NoRegister) {
3250       SGPRsUsed.insert(SGPRUsed);
3251       ++ConstantBusUseCount;
3252     }
3253 
3254     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3255     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3256     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3257 
3258     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3259 
3260     for (int OpIdx : OpIndices) {
3261       if (OpIdx == -1) break;
3262 
3263       const MCOperand &MO = Inst.getOperand(OpIdx);
3264       if (usesConstantBus(Inst, OpIdx)) {
3265         if (MO.isReg()) {
3266           LastSGPR = mc2PseudoReg(MO.getReg());
3267           // Pairs of registers with a partial intersections like these
3268           //   s0, s[0:1]
3269           //   flat_scratch_lo, flat_scratch
3270           //   flat_scratch_lo, flat_scratch_hi
3271           // are theoretically valid but they are disabled anyway.
3272           // Note that this code mimics SIInstrInfo::verifyInstruction
3273           if (!SGPRsUsed.count(LastSGPR)) {
3274             SGPRsUsed.insert(LastSGPR);
3275             ++ConstantBusUseCount;
3276           }
3277         } else { // Expression or a literal
3278 
3279           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3280             continue; // special operand like VINTERP attr_chan
3281 
3282           // An instruction may use only one literal.
3283           // This has been validated on the previous step.
3284           // See validateVOP3Literal.
3285           // This literal may be used as more than one operand.
3286           // If all these operands are of the same size,
3287           // this literal counts as one scalar value.
3288           // Otherwise it counts as 2 scalar values.
3289           // See "GFX10 Shader Programming", section 3.6.2.3.
3290 
3291           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3292           if (Size < 4) Size = 4;
3293 
3294           if (NumLiterals == 0) {
3295             NumLiterals = 1;
3296             LiteralSize = Size;
3297           } else if (LiteralSize != Size) {
3298             NumLiterals = 2;
3299           }
3300         }
3301       }
3302     }
3303   }
3304   ConstantBusUseCount += NumLiterals;
3305 
3306   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3307     return true;
3308 
3309   SMLoc LitLoc = getLitLoc(Operands);
3310   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3311   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3312   Error(Loc, "invalid operand (violates constant bus restrictions)");
3313   return false;
3314 }
3315 
3316 bool
3317 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3318                                                  const OperandVector &Operands) {
3319   const unsigned Opcode = Inst.getOpcode();
3320   const MCInstrDesc &Desc = MII.get(Opcode);
3321 
3322   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3323   if (DstIdx == -1 ||
3324       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3325     return true;
3326   }
3327 
3328   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3329 
3330   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3331   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3332   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3333 
3334   assert(DstIdx != -1);
3335   const MCOperand &Dst = Inst.getOperand(DstIdx);
3336   assert(Dst.isReg());
3337   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3338 
3339   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3340 
3341   for (int SrcIdx : SrcIndices) {
3342     if (SrcIdx == -1) break;
3343     const MCOperand &Src = Inst.getOperand(SrcIdx);
3344     if (Src.isReg()) {
3345       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3346       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3347         Error(getRegLoc(SrcReg, Operands),
3348           "destination must be different than all sources");
3349         return false;
3350       }
3351     }
3352   }
3353 
3354   return true;
3355 }
3356 
3357 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3358 
3359   const unsigned Opc = Inst.getOpcode();
3360   const MCInstrDesc &Desc = MII.get(Opc);
3361 
3362   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3363     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3364     assert(ClampIdx != -1);
3365     return Inst.getOperand(ClampIdx).getImm() == 0;
3366   }
3367 
3368   return true;
3369 }
3370 
3371 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3372 
3373   const unsigned Opc = Inst.getOpcode();
3374   const MCInstrDesc &Desc = MII.get(Opc);
3375 
3376   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3377     return true;
3378 
3379   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3380   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3381   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3382 
3383   assert(VDataIdx != -1);
3384 
3385   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3386     return true;
3387 
3388   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3389   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3390   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3391   if (DMask == 0)
3392     DMask = 1;
3393 
3394   unsigned DataSize =
3395     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3396   if (hasPackedD16()) {
3397     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3398     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3399       DataSize = (DataSize + 1) / 2;
3400   }
3401 
3402   return (VDataSize / 4) == DataSize + TFESize;
3403 }
3404 
3405 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3406   const unsigned Opc = Inst.getOpcode();
3407   const MCInstrDesc &Desc = MII.get(Opc);
3408 
3409   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3410     return true;
3411 
3412   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3413 
3414   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3415       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3416   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3417   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3418   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3419 
3420   assert(VAddr0Idx != -1);
3421   assert(SrsrcIdx != -1);
3422   assert(SrsrcIdx > VAddr0Idx);
3423 
3424   if (DimIdx == -1)
3425     return true; // intersect_ray
3426 
3427   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3428   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3429   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3430   unsigned VAddrSize =
3431       IsNSA ? SrsrcIdx - VAddr0Idx
3432             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3433 
3434   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3435                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3436                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3437                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3438   if (!IsNSA) {
3439     if (AddrSize > 8)
3440       AddrSize = 16;
3441     else if (AddrSize > 4)
3442       AddrSize = 8;
3443   }
3444 
3445   return VAddrSize == AddrSize;
3446 }
3447 
3448 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3449 
3450   const unsigned Opc = Inst.getOpcode();
3451   const MCInstrDesc &Desc = MII.get(Opc);
3452 
3453   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3454     return true;
3455   if (!Desc.mayLoad() || !Desc.mayStore())
3456     return true; // Not atomic
3457 
3458   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3459   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3460 
3461   // This is an incomplete check because image_atomic_cmpswap
3462   // may only use 0x3 and 0xf while other atomic operations
3463   // may use 0x1 and 0x3. However these limitations are
3464   // verified when we check that dmask matches dst size.
3465   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3466 }
3467 
3468 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3469 
3470   const unsigned Opc = Inst.getOpcode();
3471   const MCInstrDesc &Desc = MII.get(Opc);
3472 
3473   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3474     return true;
3475 
3476   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3477   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3478 
3479   // GATHER4 instructions use dmask in a different fashion compared to
3480   // other MIMG instructions. The only useful DMASK values are
3481   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3482   // (red,red,red,red) etc.) The ISA document doesn't mention
3483   // this.
3484   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3485 }
3486 
3487 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3488   const unsigned Opc = Inst.getOpcode();
3489   const MCInstrDesc &Desc = MII.get(Opc);
3490 
3491   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3492     return true;
3493 
3494   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3495   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3496       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3497 
3498   if (!BaseOpcode->MSAA)
3499     return true;
3500 
3501   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3502   assert(DimIdx != -1);
3503 
3504   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3505   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3506 
3507   return DimInfo->MSAA;
3508 }
3509 
3510 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3511 {
3512   switch (Opcode) {
3513   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3514   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3515   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3516     return true;
3517   default:
3518     return false;
3519   }
3520 }
3521 
3522 // movrels* opcodes should only allow VGPRS as src0.
3523 // This is specified in .td description for vop1/vop3,
3524 // but sdwa is handled differently. See isSDWAOperand.
3525 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3526                                       const OperandVector &Operands) {
3527 
3528   const unsigned Opc = Inst.getOpcode();
3529   const MCInstrDesc &Desc = MII.get(Opc);
3530 
3531   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3532     return true;
3533 
3534   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3535   assert(Src0Idx != -1);
3536 
3537   SMLoc ErrLoc;
3538   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3539   if (Src0.isReg()) {
3540     auto Reg = mc2PseudoReg(Src0.getReg());
3541     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3542     if (!isSGPR(Reg, TRI))
3543       return true;
3544     ErrLoc = getRegLoc(Reg, Operands);
3545   } else {
3546     ErrLoc = getConstLoc(Operands);
3547   }
3548 
3549   Error(ErrLoc, "source operand must be a VGPR");
3550   return false;
3551 }
3552 
3553 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3554                                           const OperandVector &Operands) {
3555 
3556   const unsigned Opc = Inst.getOpcode();
3557 
3558   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3559     return true;
3560 
3561   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3562   assert(Src0Idx != -1);
3563 
3564   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3565   if (!Src0.isReg())
3566     return true;
3567 
3568   auto Reg = mc2PseudoReg(Src0.getReg());
3569   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3570   if (isSGPR(Reg, TRI)) {
3571     Error(getRegLoc(Reg, Operands),
3572           "source operand must be either a VGPR or an inline constant");
3573     return false;
3574   }
3575 
3576   return true;
3577 }
3578 
3579 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3580   switch (Inst.getOpcode()) {
3581   default:
3582     return true;
3583   case V_DIV_SCALE_F32_gfx6_gfx7:
3584   case V_DIV_SCALE_F32_vi:
3585   case V_DIV_SCALE_F32_gfx10:
3586   case V_DIV_SCALE_F64_gfx6_gfx7:
3587   case V_DIV_SCALE_F64_vi:
3588   case V_DIV_SCALE_F64_gfx10:
3589     break;
3590   }
3591 
3592   // TODO: Check that src0 = src1 or src2.
3593 
3594   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3595                     AMDGPU::OpName::src2_modifiers,
3596                     AMDGPU::OpName::src2_modifiers}) {
3597     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3598             .getImm() &
3599         SISrcMods::ABS) {
3600       return false;
3601     }
3602   }
3603 
3604   return true;
3605 }
3606 
3607 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3608 
3609   const unsigned Opc = Inst.getOpcode();
3610   const MCInstrDesc &Desc = MII.get(Opc);
3611 
3612   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3613     return true;
3614 
3615   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3616   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3617     if (isCI() || isSI())
3618       return false;
3619   }
3620 
3621   return true;
3622 }
3623 
3624 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3625   const unsigned Opc = Inst.getOpcode();
3626   const MCInstrDesc &Desc = MII.get(Opc);
3627 
3628   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3629     return true;
3630 
3631   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3632   if (DimIdx < 0)
3633     return true;
3634 
3635   long Imm = Inst.getOperand(DimIdx).getImm();
3636   if (Imm < 0 || Imm >= 8)
3637     return false;
3638 
3639   return true;
3640 }
3641 
3642 static bool IsRevOpcode(const unsigned Opcode)
3643 {
3644   switch (Opcode) {
3645   case AMDGPU::V_SUBREV_F32_e32:
3646   case AMDGPU::V_SUBREV_F32_e64:
3647   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3648   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3649   case AMDGPU::V_SUBREV_F32_e32_vi:
3650   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3651   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3652   case AMDGPU::V_SUBREV_F32_e64_vi:
3653 
3654   case AMDGPU::V_SUBREV_CO_U32_e32:
3655   case AMDGPU::V_SUBREV_CO_U32_e64:
3656   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3657   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3658 
3659   case AMDGPU::V_SUBBREV_U32_e32:
3660   case AMDGPU::V_SUBBREV_U32_e64:
3661   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3662   case AMDGPU::V_SUBBREV_U32_e32_vi:
3663   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3664   case AMDGPU::V_SUBBREV_U32_e64_vi:
3665 
3666   case AMDGPU::V_SUBREV_U32_e32:
3667   case AMDGPU::V_SUBREV_U32_e64:
3668   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3669   case AMDGPU::V_SUBREV_U32_e32_vi:
3670   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3671   case AMDGPU::V_SUBREV_U32_e64_vi:
3672 
3673   case AMDGPU::V_SUBREV_F16_e32:
3674   case AMDGPU::V_SUBREV_F16_e64:
3675   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3676   case AMDGPU::V_SUBREV_F16_e32_vi:
3677   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3678   case AMDGPU::V_SUBREV_F16_e64_vi:
3679 
3680   case AMDGPU::V_SUBREV_U16_e32:
3681   case AMDGPU::V_SUBREV_U16_e64:
3682   case AMDGPU::V_SUBREV_U16_e32_vi:
3683   case AMDGPU::V_SUBREV_U16_e64_vi:
3684 
3685   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3686   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3687   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3688 
3689   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3690   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3691 
3692   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3693   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3694 
3695   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3696   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3697 
3698   case AMDGPU::V_LSHRREV_B32_e32:
3699   case AMDGPU::V_LSHRREV_B32_e64:
3700   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3701   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3702   case AMDGPU::V_LSHRREV_B32_e32_vi:
3703   case AMDGPU::V_LSHRREV_B32_e64_vi:
3704   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3705   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3706 
3707   case AMDGPU::V_ASHRREV_I32_e32:
3708   case AMDGPU::V_ASHRREV_I32_e64:
3709   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3710   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3711   case AMDGPU::V_ASHRREV_I32_e32_vi:
3712   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3713   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3714   case AMDGPU::V_ASHRREV_I32_e64_vi:
3715 
3716   case AMDGPU::V_LSHLREV_B32_e32:
3717   case AMDGPU::V_LSHLREV_B32_e64:
3718   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3719   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3720   case AMDGPU::V_LSHLREV_B32_e32_vi:
3721   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3722   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3723   case AMDGPU::V_LSHLREV_B32_e64_vi:
3724 
3725   case AMDGPU::V_LSHLREV_B16_e32:
3726   case AMDGPU::V_LSHLREV_B16_e64:
3727   case AMDGPU::V_LSHLREV_B16_e32_vi:
3728   case AMDGPU::V_LSHLREV_B16_e64_vi:
3729   case AMDGPU::V_LSHLREV_B16_gfx10:
3730 
3731   case AMDGPU::V_LSHRREV_B16_e32:
3732   case AMDGPU::V_LSHRREV_B16_e64:
3733   case AMDGPU::V_LSHRREV_B16_e32_vi:
3734   case AMDGPU::V_LSHRREV_B16_e64_vi:
3735   case AMDGPU::V_LSHRREV_B16_gfx10:
3736 
3737   case AMDGPU::V_ASHRREV_I16_e32:
3738   case AMDGPU::V_ASHRREV_I16_e64:
3739   case AMDGPU::V_ASHRREV_I16_e32_vi:
3740   case AMDGPU::V_ASHRREV_I16_e64_vi:
3741   case AMDGPU::V_ASHRREV_I16_gfx10:
3742 
3743   case AMDGPU::V_LSHLREV_B64_e64:
3744   case AMDGPU::V_LSHLREV_B64_gfx10:
3745   case AMDGPU::V_LSHLREV_B64_vi:
3746 
3747   case AMDGPU::V_LSHRREV_B64_e64:
3748   case AMDGPU::V_LSHRREV_B64_gfx10:
3749   case AMDGPU::V_LSHRREV_B64_vi:
3750 
3751   case AMDGPU::V_ASHRREV_I64_e64:
3752   case AMDGPU::V_ASHRREV_I64_gfx10:
3753   case AMDGPU::V_ASHRREV_I64_vi:
3754 
3755   case AMDGPU::V_PK_LSHLREV_B16:
3756   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3757   case AMDGPU::V_PK_LSHLREV_B16_vi:
3758 
3759   case AMDGPU::V_PK_LSHRREV_B16:
3760   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3761   case AMDGPU::V_PK_LSHRREV_B16_vi:
3762   case AMDGPU::V_PK_ASHRREV_I16:
3763   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3764   case AMDGPU::V_PK_ASHRREV_I16_vi:
3765     return true;
3766   default:
3767     return false;
3768   }
3769 }
3770 
3771 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3772 
3773   using namespace SIInstrFlags;
3774   const unsigned Opcode = Inst.getOpcode();
3775   const MCInstrDesc &Desc = MII.get(Opcode);
3776 
3777   // lds_direct register is defined so that it can be used
3778   // with 9-bit operands only. Ignore encodings which do not accept these.
3779   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3780     return true;
3781 
3782   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3783   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3784   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3785 
3786   const int SrcIndices[] = { Src1Idx, Src2Idx };
3787 
3788   // lds_direct cannot be specified as either src1 or src2.
3789   for (int SrcIdx : SrcIndices) {
3790     if (SrcIdx == -1) break;
3791     const MCOperand &Src = Inst.getOperand(SrcIdx);
3792     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3793       return false;
3794     }
3795   }
3796 
3797   if (Src0Idx == -1)
3798     return true;
3799 
3800   const MCOperand &Src = Inst.getOperand(Src0Idx);
3801   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3802     return true;
3803 
3804   // lds_direct is specified as src0. Check additional limitations.
3805   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3806 }
3807 
3808 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3809   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3810     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3811     if (Op.isFlatOffset())
3812       return Op.getStartLoc();
3813   }
3814   return getLoc();
3815 }
3816 
3817 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3818                                          const OperandVector &Operands) {
3819   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3820   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3821     return true;
3822 
3823   auto Opcode = Inst.getOpcode();
3824   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3825   assert(OpNum != -1);
3826 
3827   const auto &Op = Inst.getOperand(OpNum);
3828   if (!hasFlatOffsets() && Op.getImm() != 0) {
3829     Error(getFlatOffsetLoc(Operands),
3830           "flat offset modifier is not supported on this GPU");
3831     return false;
3832   }
3833 
3834   // For FLAT segment the offset must be positive;
3835   // MSB is ignored and forced to zero.
3836   if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) {
3837     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3838     if (!isIntN(OffsetSize, Op.getImm())) {
3839       Error(getFlatOffsetLoc(Operands),
3840             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3841       return false;
3842     }
3843   } else {
3844     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3845     if (!isUIntN(OffsetSize, Op.getImm())) {
3846       Error(getFlatOffsetLoc(Operands),
3847             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3848       return false;
3849     }
3850   }
3851 
3852   return true;
3853 }
3854 
3855 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3856   // Start with second operand because SMEM Offset cannot be dst or src0.
3857   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3858     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3859     if (Op.isSMEMOffset())
3860       return Op.getStartLoc();
3861   }
3862   return getLoc();
3863 }
3864 
3865 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3866                                          const OperandVector &Operands) {
3867   if (isCI() || isSI())
3868     return true;
3869 
3870   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3871   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3872     return true;
3873 
3874   auto Opcode = Inst.getOpcode();
3875   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3876   if (OpNum == -1)
3877     return true;
3878 
3879   const auto &Op = Inst.getOperand(OpNum);
3880   if (!Op.isImm())
3881     return true;
3882 
3883   uint64_t Offset = Op.getImm();
3884   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3885   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3886       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3887     return true;
3888 
3889   Error(getSMEMOffsetLoc(Operands),
3890         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3891                                "expected a 21-bit signed offset");
3892 
3893   return false;
3894 }
3895 
3896 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3897   unsigned Opcode = Inst.getOpcode();
3898   const MCInstrDesc &Desc = MII.get(Opcode);
3899   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3900     return true;
3901 
3902   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3903   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3904 
3905   const int OpIndices[] = { Src0Idx, Src1Idx };
3906 
3907   unsigned NumExprs = 0;
3908   unsigned NumLiterals = 0;
3909   uint32_t LiteralValue;
3910 
3911   for (int OpIdx : OpIndices) {
3912     if (OpIdx == -1) break;
3913 
3914     const MCOperand &MO = Inst.getOperand(OpIdx);
3915     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3916     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3917       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3918         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3919         if (NumLiterals == 0 || LiteralValue != Value) {
3920           LiteralValue = Value;
3921           ++NumLiterals;
3922         }
3923       } else if (MO.isExpr()) {
3924         ++NumExprs;
3925       }
3926     }
3927   }
3928 
3929   return NumLiterals + NumExprs <= 1;
3930 }
3931 
3932 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3933   const unsigned Opc = Inst.getOpcode();
3934   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3935       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3936     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3937     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3938 
3939     if (OpSel & ~3)
3940       return false;
3941   }
3942   return true;
3943 }
3944 
3945 // Check if VCC register matches wavefront size
3946 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3947   auto FB = getFeatureBits();
3948   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3949     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3950 }
3951 
3952 // VOP3 literal is only allowed in GFX10+ and only one can be used
3953 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3954                                           const OperandVector &Operands) {
3955   unsigned Opcode = Inst.getOpcode();
3956   const MCInstrDesc &Desc = MII.get(Opcode);
3957   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3958     return true;
3959 
3960   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3961   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3962   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3963 
3964   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3965 
3966   unsigned NumExprs = 0;
3967   unsigned NumLiterals = 0;
3968   uint32_t LiteralValue;
3969 
3970   for (int OpIdx : OpIndices) {
3971     if (OpIdx == -1) break;
3972 
3973     const MCOperand &MO = Inst.getOperand(OpIdx);
3974     if (!MO.isImm() && !MO.isExpr())
3975       continue;
3976     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3977       continue;
3978 
3979     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3980         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
3981       Error(getConstLoc(Operands),
3982             "inline constants are not allowed for this operand");
3983       return false;
3984     }
3985 
3986     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3987       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3988       if (NumLiterals == 0 || LiteralValue != Value) {
3989         LiteralValue = Value;
3990         ++NumLiterals;
3991       }
3992     } else if (MO.isExpr()) {
3993       ++NumExprs;
3994     }
3995   }
3996   NumLiterals += NumExprs;
3997 
3998   if (!NumLiterals)
3999     return true;
4000 
4001   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4002     Error(getLitLoc(Operands), "literal operands are not supported");
4003     return false;
4004   }
4005 
4006   if (NumLiterals > 1) {
4007     Error(getLitLoc(Operands), "only one literal operand is allowed");
4008     return false;
4009   }
4010 
4011   return true;
4012 }
4013 
4014 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4015 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4016                          const MCRegisterInfo *MRI) {
4017   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4018   if (OpIdx < 0)
4019     return -1;
4020 
4021   const MCOperand &Op = Inst.getOperand(OpIdx);
4022   if (!Op.isReg())
4023     return -1;
4024 
4025   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4026   auto Reg = Sub ? Sub : Op.getReg();
4027   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4028   return AGRP32.contains(Reg) ? 1 : 0;
4029 }
4030 
4031 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4032   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4033   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4034                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4035                   SIInstrFlags::DS)) == 0)
4036     return true;
4037 
4038   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4039                                                       : AMDGPU::OpName::vdata;
4040 
4041   const MCRegisterInfo *MRI = getMRI();
4042   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4043   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4044 
4045   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4046     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4047     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4048       return false;
4049   }
4050 
4051   auto FB = getFeatureBits();
4052   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4053     if (DataAreg < 0 || DstAreg < 0)
4054       return true;
4055     return DstAreg == DataAreg;
4056   }
4057 
4058   return DstAreg < 1 && DataAreg < 1;
4059 }
4060 
4061 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4062   auto FB = getFeatureBits();
4063   if (!FB[AMDGPU::FeatureGFX90AInsts])
4064     return true;
4065 
4066   const MCRegisterInfo *MRI = getMRI();
4067   const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4068   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4069   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4070     const MCOperand &Op = Inst.getOperand(I);
4071     if (!Op.isReg())
4072       continue;
4073 
4074     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4075     if (!Sub)
4076       continue;
4077 
4078     if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4079       return false;
4080     if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4081       return false;
4082   }
4083 
4084   return true;
4085 }
4086 
4087 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4088                                             const OperandVector &Operands,
4089                                             const SMLoc &IDLoc) {
4090   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4091                                            AMDGPU::OpName::cpol);
4092   if (CPolPos == -1)
4093     return true;
4094 
4095   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4096 
4097   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4098   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4099       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4100     Error(IDLoc, "invalid cache policy for SMRD instruction");
4101     return false;
4102   }
4103 
4104   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4105     return true;
4106 
4107   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4108     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4109       Error(IDLoc, "instruction must use glc");
4110       return false;
4111     }
4112   } else {
4113     if (CPol & CPol::GLC) {
4114       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4115       StringRef CStr(S.getPointer());
4116       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4117       Error(S, "instruction must not use glc");
4118       return false;
4119     }
4120   }
4121 
4122   if (isGFX90A() && (CPol & CPol::SCC) && (TSFlags & SIInstrFlags::FPAtomic)) {
4123     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4124     StringRef CStr(S.getPointer());
4125     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4126     Error(S, "instruction must not use scc");
4127     return false;
4128   }
4129 
4130   return true;
4131 }
4132 
4133 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4134                                           const SMLoc &IDLoc,
4135                                           const OperandVector &Operands) {
4136   if (!validateLdsDirect(Inst)) {
4137     Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands),
4138       "invalid use of lds_direct");
4139     return false;
4140   }
4141   if (!validateSOPLiteral(Inst)) {
4142     Error(getLitLoc(Operands),
4143       "only one literal operand is allowed");
4144     return false;
4145   }
4146   if (!validateVOP3Literal(Inst, Operands)) {
4147     return false;
4148   }
4149   if (!validateConstantBusLimitations(Inst, Operands)) {
4150     return false;
4151   }
4152   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4153     return false;
4154   }
4155   if (!validateIntClampSupported(Inst)) {
4156     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4157       "integer clamping is not supported on this GPU");
4158     return false;
4159   }
4160   if (!validateOpSel(Inst)) {
4161     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4162       "invalid op_sel operand");
4163     return false;
4164   }
4165   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4166   if (!validateMIMGD16(Inst)) {
4167     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4168       "d16 modifier is not supported on this GPU");
4169     return false;
4170   }
4171   if (!validateMIMGDim(Inst)) {
4172     Error(IDLoc, "dim modifier is required on this GPU");
4173     return false;
4174   }
4175   if (!validateMIMGMSAA(Inst)) {
4176     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4177           "invalid dim; must be MSAA type");
4178     return false;
4179   }
4180   if (!validateMIMGDataSize(Inst)) {
4181     Error(IDLoc,
4182       "image data size does not match dmask and tfe");
4183     return false;
4184   }
4185   if (!validateMIMGAddrSize(Inst)) {
4186     Error(IDLoc,
4187       "image address size does not match dim and a16");
4188     return false;
4189   }
4190   if (!validateMIMGAtomicDMask(Inst)) {
4191     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4192       "invalid atomic image dmask");
4193     return false;
4194   }
4195   if (!validateMIMGGatherDMask(Inst)) {
4196     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4197       "invalid image_gather dmask: only one bit must be set");
4198     return false;
4199   }
4200   if (!validateMovrels(Inst, Operands)) {
4201     return false;
4202   }
4203   if (!validateFlatOffset(Inst, Operands)) {
4204     return false;
4205   }
4206   if (!validateSMEMOffset(Inst, Operands)) {
4207     return false;
4208   }
4209   if (!validateMAIAccWrite(Inst, Operands)) {
4210     return false;
4211   }
4212   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4213     return false;
4214   }
4215 
4216   if (!validateAGPRLdSt(Inst)) {
4217     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4218     ? "invalid register class: data and dst should be all VGPR or AGPR"
4219     : "invalid register class: agpr loads and stores not supported on this GPU"
4220     );
4221     return false;
4222   }
4223   if (!validateVGPRAlign(Inst)) {
4224     Error(IDLoc,
4225       "invalid register class: vgpr tuples must be 64 bit aligned");
4226     return false;
4227   }
4228 
4229   if (!validateDivScale(Inst)) {
4230     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4231     return false;
4232   }
4233   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4234     return false;
4235   }
4236 
4237   return true;
4238 }
4239 
4240 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4241                                             const FeatureBitset &FBS,
4242                                             unsigned VariantID = 0);
4243 
4244 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4245                                 const FeatureBitset &AvailableFeatures,
4246                                 unsigned VariantID);
4247 
4248 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4249                                        const FeatureBitset &FBS) {
4250   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4251 }
4252 
4253 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4254                                        const FeatureBitset &FBS,
4255                                        ArrayRef<unsigned> Variants) {
4256   for (auto Variant : Variants) {
4257     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4258       return true;
4259   }
4260 
4261   return false;
4262 }
4263 
4264 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4265                                                   const SMLoc &IDLoc) {
4266   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4267 
4268   // Check if requested instruction variant is supported.
4269   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4270     return false;
4271 
4272   // This instruction is not supported.
4273   // Clear any other pending errors because they are no longer relevant.
4274   getParser().clearPendingErrors();
4275 
4276   // Requested instruction variant is not supported.
4277   // Check if any other variants are supported.
4278   StringRef VariantName = getMatchedVariantName();
4279   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4280     return Error(IDLoc,
4281                  Twine(VariantName,
4282                        " variant of this instruction is not supported"));
4283   }
4284 
4285   // Finally check if this instruction is supported on any other GPU.
4286   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4287     return Error(IDLoc, "instruction not supported on this GPU");
4288   }
4289 
4290   // Instruction not supported on any GPU. Probably a typo.
4291   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4292   return Error(IDLoc, "invalid instruction" + Suggestion);
4293 }
4294 
4295 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4296                                               OperandVector &Operands,
4297                                               MCStreamer &Out,
4298                                               uint64_t &ErrorInfo,
4299                                               bool MatchingInlineAsm) {
4300   MCInst Inst;
4301   unsigned Result = Match_Success;
4302   for (auto Variant : getMatchedVariants()) {
4303     uint64_t EI;
4304     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4305                                   Variant);
4306     // We order match statuses from least to most specific. We use most specific
4307     // status as resulting
4308     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4309     if ((R == Match_Success) ||
4310         (R == Match_PreferE32) ||
4311         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4312         (R == Match_InvalidOperand && Result != Match_MissingFeature
4313                                    && Result != Match_PreferE32) ||
4314         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4315                                    && Result != Match_MissingFeature
4316                                    && Result != Match_PreferE32)) {
4317       Result = R;
4318       ErrorInfo = EI;
4319     }
4320     if (R == Match_Success)
4321       break;
4322   }
4323 
4324   if (Result == Match_Success) {
4325     if (!validateInstruction(Inst, IDLoc, Operands)) {
4326       return true;
4327     }
4328     Inst.setLoc(IDLoc);
4329     Out.emitInstruction(Inst, getSTI());
4330     return false;
4331   }
4332 
4333   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4334   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4335     return true;
4336   }
4337 
4338   switch (Result) {
4339   default: break;
4340   case Match_MissingFeature:
4341     // It has been verified that the specified instruction
4342     // mnemonic is valid. A match was found but it requires
4343     // features which are not supported on this GPU.
4344     return Error(IDLoc, "operands are not valid for this GPU or mode");
4345 
4346   case Match_InvalidOperand: {
4347     SMLoc ErrorLoc = IDLoc;
4348     if (ErrorInfo != ~0ULL) {
4349       if (ErrorInfo >= Operands.size()) {
4350         return Error(IDLoc, "too few operands for instruction");
4351       }
4352       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4353       if (ErrorLoc == SMLoc())
4354         ErrorLoc = IDLoc;
4355     }
4356     return Error(ErrorLoc, "invalid operand for instruction");
4357   }
4358 
4359   case Match_PreferE32:
4360     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4361                         "should be encoded as e32");
4362   case Match_MnemonicFail:
4363     llvm_unreachable("Invalid instructions should have been handled already");
4364   }
4365   llvm_unreachable("Implement any new match types added!");
4366 }
4367 
4368 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4369   int64_t Tmp = -1;
4370   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4371     return true;
4372   }
4373   if (getParser().parseAbsoluteExpression(Tmp)) {
4374     return true;
4375   }
4376   Ret = static_cast<uint32_t>(Tmp);
4377   return false;
4378 }
4379 
4380 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4381                                                uint32_t &Minor) {
4382   if (ParseAsAbsoluteExpression(Major))
4383     return TokError("invalid major version");
4384 
4385   if (!trySkipToken(AsmToken::Comma))
4386     return TokError("minor version number required, comma expected");
4387 
4388   if (ParseAsAbsoluteExpression(Minor))
4389     return TokError("invalid minor version");
4390 
4391   return false;
4392 }
4393 
4394 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4395   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4396     return TokError("directive only supported for amdgcn architecture");
4397 
4398   std::string Target;
4399 
4400   SMLoc TargetStart = getLoc();
4401   if (getParser().parseEscapedString(Target))
4402     return true;
4403   SMRange TargetRange = SMRange(TargetStart, getLoc());
4404 
4405   std::string ExpectedTarget;
4406   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
4407   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
4408 
4409   if (Target != ExpectedTargetOS.str())
4410     return Error(TargetRange.Start, "target must match options", TargetRange);
4411 
4412   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
4413   return false;
4414 }
4415 
4416 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4417   return Error(Range.Start, "value out of range", Range);
4418 }
4419 
4420 bool AMDGPUAsmParser::calculateGPRBlocks(
4421     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4422     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4423     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4424     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4425   // TODO(scott.linder): These calculations are duplicated from
4426   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4427   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4428 
4429   unsigned NumVGPRs = NextFreeVGPR;
4430   unsigned NumSGPRs = NextFreeSGPR;
4431 
4432   if (Version.Major >= 10)
4433     NumSGPRs = 0;
4434   else {
4435     unsigned MaxAddressableNumSGPRs =
4436         IsaInfo::getAddressableNumSGPRs(&getSTI());
4437 
4438     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4439         NumSGPRs > MaxAddressableNumSGPRs)
4440       return OutOfRangeError(SGPRRange);
4441 
4442     NumSGPRs +=
4443         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4444 
4445     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4446         NumSGPRs > MaxAddressableNumSGPRs)
4447       return OutOfRangeError(SGPRRange);
4448 
4449     if (Features.test(FeatureSGPRInitBug))
4450       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4451   }
4452 
4453   VGPRBlocks =
4454       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4455   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4456 
4457   return false;
4458 }
4459 
4460 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4461   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4462     return TokError("directive only supported for amdgcn architecture");
4463 
4464   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4465     return TokError("directive only supported for amdhsa OS");
4466 
4467   StringRef KernelName;
4468   if (getParser().parseIdentifier(KernelName))
4469     return true;
4470 
4471   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4472 
4473   StringSet<> Seen;
4474 
4475   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4476 
4477   SMRange VGPRRange;
4478   uint64_t NextFreeVGPR = 0;
4479   uint64_t AccumOffset = 0;
4480   SMRange SGPRRange;
4481   uint64_t NextFreeSGPR = 0;
4482   unsigned UserSGPRCount = 0;
4483   bool ReserveVCC = true;
4484   bool ReserveFlatScr = true;
4485   bool ReserveXNACK = hasXNACK();
4486   Optional<bool> EnableWavefrontSize32;
4487 
4488   while (true) {
4489     while (trySkipToken(AsmToken::EndOfStatement));
4490 
4491     StringRef ID;
4492     SMRange IDRange = getTok().getLocRange();
4493     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4494       return true;
4495 
4496     if (ID == ".end_amdhsa_kernel")
4497       break;
4498 
4499     if (Seen.find(ID) != Seen.end())
4500       return TokError(".amdhsa_ directives cannot be repeated");
4501     Seen.insert(ID);
4502 
4503     SMLoc ValStart = getLoc();
4504     int64_t IVal;
4505     if (getParser().parseAbsoluteExpression(IVal))
4506       return true;
4507     SMLoc ValEnd = getLoc();
4508     SMRange ValRange = SMRange(ValStart, ValEnd);
4509 
4510     if (IVal < 0)
4511       return OutOfRangeError(ValRange);
4512 
4513     uint64_t Val = IVal;
4514 
4515 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4516   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4517     return OutOfRangeError(RANGE);                                             \
4518   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4519 
4520     if (ID == ".amdhsa_group_segment_fixed_size") {
4521       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4522         return OutOfRangeError(ValRange);
4523       KD.group_segment_fixed_size = Val;
4524     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4525       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4526         return OutOfRangeError(ValRange);
4527       KD.private_segment_fixed_size = Val;
4528     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4529       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4530                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4531                        Val, ValRange);
4532       if (Val)
4533         UserSGPRCount += 4;
4534     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4535       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4536                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4537                        ValRange);
4538       if (Val)
4539         UserSGPRCount += 2;
4540     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4541       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4542                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4543                        ValRange);
4544       if (Val)
4545         UserSGPRCount += 2;
4546     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4547       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4548                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4549                        Val, ValRange);
4550       if (Val)
4551         UserSGPRCount += 2;
4552     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4553       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4554                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4555                        ValRange);
4556       if (Val)
4557         UserSGPRCount += 2;
4558     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4559       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4560                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4561                        ValRange);
4562       if (Val)
4563         UserSGPRCount += 2;
4564     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4565       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4566                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4567                        Val, ValRange);
4568       if (Val)
4569         UserSGPRCount += 1;
4570     } else if (ID == ".amdhsa_wavefront_size32") {
4571       if (IVersion.Major < 10)
4572         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4573       EnableWavefrontSize32 = Val;
4574       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4575                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4576                        Val, ValRange);
4577     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4578       PARSE_BITS_ENTRY(
4579           KD.compute_pgm_rsrc2,
4580           COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,
4581           ValRange);
4582     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4583       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4584                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4585                        ValRange);
4586     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4587       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4588                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4589                        ValRange);
4590     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4591       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4592                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4593                        ValRange);
4594     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4595       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4596                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4597                        ValRange);
4598     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4599       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4600                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4601                        ValRange);
4602     } else if (ID == ".amdhsa_next_free_vgpr") {
4603       VGPRRange = ValRange;
4604       NextFreeVGPR = Val;
4605     } else if (ID == ".amdhsa_next_free_sgpr") {
4606       SGPRRange = ValRange;
4607       NextFreeSGPR = Val;
4608     } else if (ID == ".amdhsa_accum_offset") {
4609       if (!isGFX90A())
4610         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4611       AccumOffset = Val;
4612     } else if (ID == ".amdhsa_reserve_vcc") {
4613       if (!isUInt<1>(Val))
4614         return OutOfRangeError(ValRange);
4615       ReserveVCC = Val;
4616     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4617       if (IVersion.Major < 7)
4618         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4619       if (!isUInt<1>(Val))
4620         return OutOfRangeError(ValRange);
4621       ReserveFlatScr = Val;
4622     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4623       if (IVersion.Major < 8)
4624         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4625       if (!isUInt<1>(Val))
4626         return OutOfRangeError(ValRange);
4627       ReserveXNACK = Val;
4628     } else if (ID == ".amdhsa_float_round_mode_32") {
4629       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4630                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4631     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4632       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4633                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4634     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4635       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4636                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4637     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4638       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4639                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4640                        ValRange);
4641     } else if (ID == ".amdhsa_dx10_clamp") {
4642       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4643                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4644     } else if (ID == ".amdhsa_ieee_mode") {
4645       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4646                        Val, ValRange);
4647     } else if (ID == ".amdhsa_fp16_overflow") {
4648       if (IVersion.Major < 9)
4649         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4650       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4651                        ValRange);
4652     } else if (ID == ".amdhsa_tg_split") {
4653       if (!isGFX90A())
4654         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4655       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4656                        ValRange);
4657     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4658       if (IVersion.Major < 10)
4659         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4660       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4661                        ValRange);
4662     } else if (ID == ".amdhsa_memory_ordered") {
4663       if (IVersion.Major < 10)
4664         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4665       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4666                        ValRange);
4667     } else if (ID == ".amdhsa_forward_progress") {
4668       if (IVersion.Major < 10)
4669         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4670       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4671                        ValRange);
4672     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4673       PARSE_BITS_ENTRY(
4674           KD.compute_pgm_rsrc2,
4675           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4676           ValRange);
4677     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4678       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4679                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4680                        Val, ValRange);
4681     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4682       PARSE_BITS_ENTRY(
4683           KD.compute_pgm_rsrc2,
4684           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4685           ValRange);
4686     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4687       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4688                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4689                        Val, ValRange);
4690     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4691       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4692                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4693                        Val, ValRange);
4694     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4695       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4696                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4697                        Val, ValRange);
4698     } else if (ID == ".amdhsa_exception_int_div_zero") {
4699       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4700                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4701                        Val, ValRange);
4702     } else {
4703       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4704     }
4705 
4706 #undef PARSE_BITS_ENTRY
4707   }
4708 
4709   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4710     return TokError(".amdhsa_next_free_vgpr directive is required");
4711 
4712   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4713     return TokError(".amdhsa_next_free_sgpr directive is required");
4714 
4715   unsigned VGPRBlocks;
4716   unsigned SGPRBlocks;
4717   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4718                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4719                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4720                          SGPRBlocks))
4721     return true;
4722 
4723   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4724           VGPRBlocks))
4725     return OutOfRangeError(VGPRRange);
4726   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4727                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4728 
4729   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4730           SGPRBlocks))
4731     return OutOfRangeError(SGPRRange);
4732   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4733                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4734                   SGPRBlocks);
4735 
4736   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4737     return TokError("too many user SGPRs enabled");
4738   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4739                   UserSGPRCount);
4740 
4741   if (isGFX90A()) {
4742     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4743       return TokError(".amdhsa_accum_offset directive is required");
4744     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4745       return TokError("accum_offset should be in range [4..256] in "
4746                       "increments of 4");
4747     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4748       return TokError("accum_offset exceeds total VGPR allocation");
4749     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4750                     (AccumOffset / 4 - 1));
4751   }
4752 
4753   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4754       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4755       ReserveFlatScr, ReserveXNACK);
4756   return false;
4757 }
4758 
4759 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4760   uint32_t Major;
4761   uint32_t Minor;
4762 
4763   if (ParseDirectiveMajorMinor(Major, Minor))
4764     return true;
4765 
4766   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4767   return false;
4768 }
4769 
4770 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4771   uint32_t Major;
4772   uint32_t Minor;
4773   uint32_t Stepping;
4774   StringRef VendorName;
4775   StringRef ArchName;
4776 
4777   // If this directive has no arguments, then use the ISA version for the
4778   // targeted GPU.
4779   if (isToken(AsmToken::EndOfStatement)) {
4780     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4781     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4782                                                       ISA.Stepping,
4783                                                       "AMD", "AMDGPU");
4784     return false;
4785   }
4786 
4787   if (ParseDirectiveMajorMinor(Major, Minor))
4788     return true;
4789 
4790   if (!trySkipToken(AsmToken::Comma))
4791     return TokError("stepping version number required, comma expected");
4792 
4793   if (ParseAsAbsoluteExpression(Stepping))
4794     return TokError("invalid stepping version");
4795 
4796   if (!trySkipToken(AsmToken::Comma))
4797     return TokError("vendor name required, comma expected");
4798 
4799   if (!parseString(VendorName, "invalid vendor name"))
4800     return true;
4801 
4802   if (!trySkipToken(AsmToken::Comma))
4803     return TokError("arch name required, comma expected");
4804 
4805   if (!parseString(ArchName, "invalid arch name"))
4806     return true;
4807 
4808   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4809                                                     VendorName, ArchName);
4810   return false;
4811 }
4812 
4813 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4814                                                amd_kernel_code_t &Header) {
4815   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4816   // assembly for backwards compatibility.
4817   if (ID == "max_scratch_backing_memory_byte_size") {
4818     Parser.eatToEndOfStatement();
4819     return false;
4820   }
4821 
4822   SmallString<40> ErrStr;
4823   raw_svector_ostream Err(ErrStr);
4824   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4825     return TokError(Err.str());
4826   }
4827   Lex();
4828 
4829   if (ID == "enable_wavefront_size32") {
4830     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4831       if (!isGFX10Plus())
4832         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4833       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4834         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4835     } else {
4836       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4837         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4838     }
4839   }
4840 
4841   if (ID == "wavefront_size") {
4842     if (Header.wavefront_size == 5) {
4843       if (!isGFX10Plus())
4844         return TokError("wavefront_size=5 is only allowed on GFX10+");
4845       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4846         return TokError("wavefront_size=5 requires +WavefrontSize32");
4847     } else if (Header.wavefront_size == 6) {
4848       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4849         return TokError("wavefront_size=6 requires +WavefrontSize64");
4850     }
4851   }
4852 
4853   if (ID == "enable_wgp_mode") {
4854     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4855         !isGFX10Plus())
4856       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4857   }
4858 
4859   if (ID == "enable_mem_ordered") {
4860     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4861         !isGFX10Plus())
4862       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4863   }
4864 
4865   if (ID == "enable_fwd_progress") {
4866     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4867         !isGFX10Plus())
4868       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4869   }
4870 
4871   return false;
4872 }
4873 
4874 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4875   amd_kernel_code_t Header;
4876   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4877 
4878   while (true) {
4879     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4880     // will set the current token to EndOfStatement.
4881     while(trySkipToken(AsmToken::EndOfStatement));
4882 
4883     StringRef ID;
4884     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4885       return true;
4886 
4887     if (ID == ".end_amd_kernel_code_t")
4888       break;
4889 
4890     if (ParseAMDKernelCodeTValue(ID, Header))
4891       return true;
4892   }
4893 
4894   getTargetStreamer().EmitAMDKernelCodeT(Header);
4895 
4896   return false;
4897 }
4898 
4899 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4900   StringRef KernelName;
4901   if (!parseId(KernelName, "expected symbol name"))
4902     return true;
4903 
4904   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4905                                            ELF::STT_AMDGPU_HSA_KERNEL);
4906 
4907   KernelScope.initialize(getContext());
4908   return false;
4909 }
4910 
4911 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4912   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4913     return Error(getLoc(),
4914                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4915                  "architectures");
4916   }
4917 
4918   auto ISAVersionStringFromASM = getToken().getStringContents();
4919 
4920   std::string ISAVersionStringFromSTI;
4921   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4922   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4923 
4924   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4925     return Error(getLoc(),
4926                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4927                  "arguments specified through the command line");
4928   }
4929 
4930   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4931   Lex();
4932 
4933   return false;
4934 }
4935 
4936 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4937   const char *AssemblerDirectiveBegin;
4938   const char *AssemblerDirectiveEnd;
4939   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4940       isHsaAbiVersion3(&getSTI())
4941           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4942                             HSAMD::V3::AssemblerDirectiveEnd)
4943           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4944                             HSAMD::AssemblerDirectiveEnd);
4945 
4946   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4947     return Error(getLoc(),
4948                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4949                  "not available on non-amdhsa OSes")).str());
4950   }
4951 
4952   std::string HSAMetadataString;
4953   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4954                           HSAMetadataString))
4955     return true;
4956 
4957   if (isHsaAbiVersion3(&getSTI())) {
4958     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4959       return Error(getLoc(), "invalid HSA metadata");
4960   } else {
4961     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4962       return Error(getLoc(), "invalid HSA metadata");
4963   }
4964 
4965   return false;
4966 }
4967 
4968 /// Common code to parse out a block of text (typically YAML) between start and
4969 /// end directives.
4970 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4971                                           const char *AssemblerDirectiveEnd,
4972                                           std::string &CollectString) {
4973 
4974   raw_string_ostream CollectStream(CollectString);
4975 
4976   getLexer().setSkipSpace(false);
4977 
4978   bool FoundEnd = false;
4979   while (!isToken(AsmToken::Eof)) {
4980     while (isToken(AsmToken::Space)) {
4981       CollectStream << getTokenStr();
4982       Lex();
4983     }
4984 
4985     if (trySkipId(AssemblerDirectiveEnd)) {
4986       FoundEnd = true;
4987       break;
4988     }
4989 
4990     CollectStream << Parser.parseStringToEndOfStatement()
4991                   << getContext().getAsmInfo()->getSeparatorString();
4992 
4993     Parser.eatToEndOfStatement();
4994   }
4995 
4996   getLexer().setSkipSpace(true);
4997 
4998   if (isToken(AsmToken::Eof) && !FoundEnd) {
4999     return TokError(Twine("expected directive ") +
5000                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5001   }
5002 
5003   CollectStream.flush();
5004   return false;
5005 }
5006 
5007 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5008 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5009   std::string String;
5010   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5011                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5012     return true;
5013 
5014   auto PALMetadata = getTargetStreamer().getPALMetadata();
5015   if (!PALMetadata->setFromString(String))
5016     return Error(getLoc(), "invalid PAL metadata");
5017   return false;
5018 }
5019 
5020 /// Parse the assembler directive for old linear-format PAL metadata.
5021 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5022   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5023     return Error(getLoc(),
5024                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5025                  "not available on non-amdpal OSes")).str());
5026   }
5027 
5028   auto PALMetadata = getTargetStreamer().getPALMetadata();
5029   PALMetadata->setLegacy();
5030   for (;;) {
5031     uint32_t Key, Value;
5032     if (ParseAsAbsoluteExpression(Key)) {
5033       return TokError(Twine("invalid value in ") +
5034                       Twine(PALMD::AssemblerDirective));
5035     }
5036     if (!trySkipToken(AsmToken::Comma)) {
5037       return TokError(Twine("expected an even number of values in ") +
5038                       Twine(PALMD::AssemblerDirective));
5039     }
5040     if (ParseAsAbsoluteExpression(Value)) {
5041       return TokError(Twine("invalid value in ") +
5042                       Twine(PALMD::AssemblerDirective));
5043     }
5044     PALMetadata->setRegister(Key, Value);
5045     if (!trySkipToken(AsmToken::Comma))
5046       break;
5047   }
5048   return false;
5049 }
5050 
5051 /// ParseDirectiveAMDGPULDS
5052 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5053 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5054   if (getParser().checkForValidSection())
5055     return true;
5056 
5057   StringRef Name;
5058   SMLoc NameLoc = getLoc();
5059   if (getParser().parseIdentifier(Name))
5060     return TokError("expected identifier in directive");
5061 
5062   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5063   if (parseToken(AsmToken::Comma, "expected ','"))
5064     return true;
5065 
5066   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5067 
5068   int64_t Size;
5069   SMLoc SizeLoc = getLoc();
5070   if (getParser().parseAbsoluteExpression(Size))
5071     return true;
5072   if (Size < 0)
5073     return Error(SizeLoc, "size must be non-negative");
5074   if (Size > LocalMemorySize)
5075     return Error(SizeLoc, "size is too large");
5076 
5077   int64_t Alignment = 4;
5078   if (trySkipToken(AsmToken::Comma)) {
5079     SMLoc AlignLoc = getLoc();
5080     if (getParser().parseAbsoluteExpression(Alignment))
5081       return true;
5082     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5083       return Error(AlignLoc, "alignment must be a power of two");
5084 
5085     // Alignment larger than the size of LDS is possible in theory, as long
5086     // as the linker manages to place to symbol at address 0, but we do want
5087     // to make sure the alignment fits nicely into a 32-bit integer.
5088     if (Alignment >= 1u << 31)
5089       return Error(AlignLoc, "alignment is too large");
5090   }
5091 
5092   if (parseToken(AsmToken::EndOfStatement,
5093                  "unexpected token in '.amdgpu_lds' directive"))
5094     return true;
5095 
5096   Symbol->redefineIfPossible();
5097   if (!Symbol->isUndefined())
5098     return Error(NameLoc, "invalid symbol redefinition");
5099 
5100   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5101   return false;
5102 }
5103 
5104 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5105   StringRef IDVal = DirectiveID.getString();
5106 
5107   if (isHsaAbiVersion3(&getSTI())) {
5108     if (IDVal == ".amdgcn_target")
5109       return ParseDirectiveAMDGCNTarget();
5110 
5111     if (IDVal == ".amdhsa_kernel")
5112       return ParseDirectiveAMDHSAKernel();
5113 
5114     // TODO: Restructure/combine with PAL metadata directive.
5115     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5116       return ParseDirectiveHSAMetadata();
5117   } else {
5118     if (IDVal == ".hsa_code_object_version")
5119       return ParseDirectiveHSACodeObjectVersion();
5120 
5121     if (IDVal == ".hsa_code_object_isa")
5122       return ParseDirectiveHSACodeObjectISA();
5123 
5124     if (IDVal == ".amd_kernel_code_t")
5125       return ParseDirectiveAMDKernelCodeT();
5126 
5127     if (IDVal == ".amdgpu_hsa_kernel")
5128       return ParseDirectiveAMDGPUHsaKernel();
5129 
5130     if (IDVal == ".amd_amdgpu_isa")
5131       return ParseDirectiveISAVersion();
5132 
5133     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5134       return ParseDirectiveHSAMetadata();
5135   }
5136 
5137   if (IDVal == ".amdgpu_lds")
5138     return ParseDirectiveAMDGPULDS();
5139 
5140   if (IDVal == PALMD::AssemblerDirectiveBegin)
5141     return ParseDirectivePALMetadataBegin();
5142 
5143   if (IDVal == PALMD::AssemblerDirective)
5144     return ParseDirectivePALMetadata();
5145 
5146   return true;
5147 }
5148 
5149 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5150                                            unsigned RegNo) const {
5151 
5152   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5153        R.isValid(); ++R) {
5154     if (*R == RegNo)
5155       return isGFX9Plus();
5156   }
5157 
5158   // GFX10 has 2 more SGPRs 104 and 105.
5159   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5160        R.isValid(); ++R) {
5161     if (*R == RegNo)
5162       return hasSGPR104_SGPR105();
5163   }
5164 
5165   switch (RegNo) {
5166   case AMDGPU::SRC_SHARED_BASE:
5167   case AMDGPU::SRC_SHARED_LIMIT:
5168   case AMDGPU::SRC_PRIVATE_BASE:
5169   case AMDGPU::SRC_PRIVATE_LIMIT:
5170   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5171     return isGFX9Plus();
5172   case AMDGPU::TBA:
5173   case AMDGPU::TBA_LO:
5174   case AMDGPU::TBA_HI:
5175   case AMDGPU::TMA:
5176   case AMDGPU::TMA_LO:
5177   case AMDGPU::TMA_HI:
5178     return !isGFX9Plus();
5179   case AMDGPU::XNACK_MASK:
5180   case AMDGPU::XNACK_MASK_LO:
5181   case AMDGPU::XNACK_MASK_HI:
5182     return (isVI() || isGFX9()) && hasXNACK();
5183   case AMDGPU::SGPR_NULL:
5184     return isGFX10Plus();
5185   default:
5186     break;
5187   }
5188 
5189   if (isCI())
5190     return true;
5191 
5192   if (isSI() || isGFX10Plus()) {
5193     // No flat_scr on SI.
5194     // On GFX10 flat scratch is not a valid register operand and can only be
5195     // accessed with s_setreg/s_getreg.
5196     switch (RegNo) {
5197     case AMDGPU::FLAT_SCR:
5198     case AMDGPU::FLAT_SCR_LO:
5199     case AMDGPU::FLAT_SCR_HI:
5200       return false;
5201     default:
5202       return true;
5203     }
5204   }
5205 
5206   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5207   // SI/CI have.
5208   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5209        R.isValid(); ++R) {
5210     if (*R == RegNo)
5211       return hasSGPR102_SGPR103();
5212   }
5213 
5214   return true;
5215 }
5216 
5217 OperandMatchResultTy
5218 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5219                               OperandMode Mode) {
5220   // Try to parse with a custom parser
5221   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5222 
5223   // If we successfully parsed the operand or if there as an error parsing,
5224   // we are done.
5225   //
5226   // If we are parsing after we reach EndOfStatement then this means we
5227   // are appending default values to the Operands list.  This is only done
5228   // by custom parser, so we shouldn't continue on to the generic parsing.
5229   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5230       isToken(AsmToken::EndOfStatement))
5231     return ResTy;
5232 
5233   SMLoc RBraceLoc;
5234   SMLoc LBraceLoc = getLoc();
5235   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5236     unsigned Prefix = Operands.size();
5237 
5238     for (;;) {
5239       auto Loc = getLoc();
5240       ResTy = parseReg(Operands);
5241       if (ResTy == MatchOperand_NoMatch)
5242         Error(Loc, "expected a register");
5243       if (ResTy != MatchOperand_Success)
5244         return MatchOperand_ParseFail;
5245 
5246       RBraceLoc = getLoc();
5247       if (trySkipToken(AsmToken::RBrac))
5248         break;
5249 
5250       if (!skipToken(AsmToken::Comma,
5251                      "expected a comma or a closing square bracket")) {
5252         return MatchOperand_ParseFail;
5253       }
5254     }
5255 
5256     if (Operands.size() - Prefix > 1) {
5257       Operands.insert(Operands.begin() + Prefix,
5258                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5259       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5260     }
5261 
5262     return MatchOperand_Success;
5263   }
5264 
5265   return parseRegOrImm(Operands);
5266 }
5267 
5268 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5269   // Clear any forced encodings from the previous instruction.
5270   setForcedEncodingSize(0);
5271   setForcedDPP(false);
5272   setForcedSDWA(false);
5273 
5274   if (Name.endswith("_e64")) {
5275     setForcedEncodingSize(64);
5276     return Name.substr(0, Name.size() - 4);
5277   } else if (Name.endswith("_e32")) {
5278     setForcedEncodingSize(32);
5279     return Name.substr(0, Name.size() - 4);
5280   } else if (Name.endswith("_dpp")) {
5281     setForcedDPP(true);
5282     return Name.substr(0, Name.size() - 4);
5283   } else if (Name.endswith("_sdwa")) {
5284     setForcedSDWA(true);
5285     return Name.substr(0, Name.size() - 5);
5286   }
5287   return Name;
5288 }
5289 
5290 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5291                                        StringRef Name,
5292                                        SMLoc NameLoc, OperandVector &Operands) {
5293   // Add the instruction mnemonic
5294   Name = parseMnemonicSuffix(Name);
5295   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5296 
5297   bool IsMIMG = Name.startswith("image_");
5298 
5299   while (!trySkipToken(AsmToken::EndOfStatement)) {
5300     OperandMode Mode = OperandMode_Default;
5301     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5302       Mode = OperandMode_NSA;
5303     CPolSeen = 0;
5304     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5305 
5306     if (Res != MatchOperand_Success) {
5307       checkUnsupportedInstruction(Name, NameLoc);
5308       if (!Parser.hasPendingError()) {
5309         // FIXME: use real operand location rather than the current location.
5310         StringRef Msg =
5311           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5312                                             "not a valid operand.";
5313         Error(getLoc(), Msg);
5314       }
5315       while (!trySkipToken(AsmToken::EndOfStatement)) {
5316         lex();
5317       }
5318       return true;
5319     }
5320 
5321     // Eat the comma or space if there is one.
5322     trySkipToken(AsmToken::Comma);
5323   }
5324 
5325   return false;
5326 }
5327 
5328 //===----------------------------------------------------------------------===//
5329 // Utility functions
5330 //===----------------------------------------------------------------------===//
5331 
5332 OperandMatchResultTy
5333 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5334 
5335   if (!trySkipId(Prefix, AsmToken::Colon))
5336     return MatchOperand_NoMatch;
5337 
5338   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5339 }
5340 
5341 OperandMatchResultTy
5342 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5343                                     AMDGPUOperand::ImmTy ImmTy,
5344                                     bool (*ConvertResult)(int64_t&)) {
5345   SMLoc S = getLoc();
5346   int64_t Value = 0;
5347 
5348   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5349   if (Res != MatchOperand_Success)
5350     return Res;
5351 
5352   if (ConvertResult && !ConvertResult(Value)) {
5353     Error(S, "invalid " + StringRef(Prefix) + " value.");
5354   }
5355 
5356   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5357   return MatchOperand_Success;
5358 }
5359 
5360 OperandMatchResultTy
5361 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5362                                              OperandVector &Operands,
5363                                              AMDGPUOperand::ImmTy ImmTy,
5364                                              bool (*ConvertResult)(int64_t&)) {
5365   SMLoc S = getLoc();
5366   if (!trySkipId(Prefix, AsmToken::Colon))
5367     return MatchOperand_NoMatch;
5368 
5369   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5370     return MatchOperand_ParseFail;
5371 
5372   unsigned Val = 0;
5373   const unsigned MaxSize = 4;
5374 
5375   // FIXME: How to verify the number of elements matches the number of src
5376   // operands?
5377   for (int I = 0; ; ++I) {
5378     int64_t Op;
5379     SMLoc Loc = getLoc();
5380     if (!parseExpr(Op))
5381       return MatchOperand_ParseFail;
5382 
5383     if (Op != 0 && Op != 1) {
5384       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5385       return MatchOperand_ParseFail;
5386     }
5387 
5388     Val |= (Op << I);
5389 
5390     if (trySkipToken(AsmToken::RBrac))
5391       break;
5392 
5393     if (I + 1 == MaxSize) {
5394       Error(getLoc(), "expected a closing square bracket");
5395       return MatchOperand_ParseFail;
5396     }
5397 
5398     if (!skipToken(AsmToken::Comma, "expected a comma"))
5399       return MatchOperand_ParseFail;
5400   }
5401 
5402   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5403   return MatchOperand_Success;
5404 }
5405 
5406 OperandMatchResultTy
5407 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5408                                AMDGPUOperand::ImmTy ImmTy) {
5409   int64_t Bit;
5410   SMLoc S = getLoc();
5411 
5412   if (trySkipId(Name)) {
5413     Bit = 1;
5414   } else if (trySkipId("no", Name)) {
5415     Bit = 0;
5416   } else {
5417     return MatchOperand_NoMatch;
5418   }
5419 
5420   if (Name == "r128" && !hasMIMG_R128()) {
5421     Error(S, "r128 modifier is not supported on this GPU");
5422     return MatchOperand_ParseFail;
5423   }
5424   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5425     Error(S, "a16 modifier is not supported on this GPU");
5426     return MatchOperand_ParseFail;
5427   }
5428 
5429   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5430     ImmTy = AMDGPUOperand::ImmTyR128A16;
5431 
5432   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5433   return MatchOperand_Success;
5434 }
5435 
5436 OperandMatchResultTy
5437 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5438   unsigned CPolOn = 0;
5439   unsigned CPolOff = 0;
5440   SMLoc S = getLoc();
5441 
5442   if (trySkipId("glc"))
5443     CPolOn = AMDGPU::CPol::GLC;
5444   else if (trySkipId("noglc"))
5445     CPolOff = AMDGPU::CPol::GLC;
5446   else if (trySkipId("slc"))
5447     CPolOn = AMDGPU::CPol::SLC;
5448   else if (trySkipId("noslc"))
5449     CPolOff = AMDGPU::CPol::SLC;
5450   else if (trySkipId("dlc"))
5451     CPolOn = AMDGPU::CPol::DLC;
5452   else if (trySkipId("nodlc"))
5453     CPolOff = AMDGPU::CPol::DLC;
5454   else if (trySkipId("scc"))
5455     CPolOn = AMDGPU::CPol::SCC;
5456   else if (trySkipId("noscc"))
5457     CPolOff = AMDGPU::CPol::SCC;
5458   else
5459     return MatchOperand_NoMatch;
5460 
5461   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5462     Error(S, "dlc modifier is not supported on this GPU");
5463     return MatchOperand_ParseFail;
5464   }
5465 
5466   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5467     Error(S, "scc modifier is not supported on this GPU");
5468     return MatchOperand_ParseFail;
5469   }
5470 
5471   if (CPolSeen & (CPolOn | CPolOff)) {
5472     Error(S, "duplicate cache policy modifier");
5473     return MatchOperand_ParseFail;
5474   }
5475 
5476   CPolSeen |= (CPolOn | CPolOff);
5477 
5478   for (unsigned I = 1; I != Operands.size(); ++I) {
5479     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5480     if (Op.isCPol()) {
5481       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5482       return MatchOperand_Success;
5483     }
5484   }
5485 
5486   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5487                                               AMDGPUOperand::ImmTyCPol));
5488 
5489   return MatchOperand_Success;
5490 }
5491 
5492 static void addOptionalImmOperand(
5493   MCInst& Inst, const OperandVector& Operands,
5494   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5495   AMDGPUOperand::ImmTy ImmT,
5496   int64_t Default = 0) {
5497   auto i = OptionalIdx.find(ImmT);
5498   if (i != OptionalIdx.end()) {
5499     unsigned Idx = i->second;
5500     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5501   } else {
5502     Inst.addOperand(MCOperand::createImm(Default));
5503   }
5504 }
5505 
5506 OperandMatchResultTy
5507 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5508                                        StringRef &Value,
5509                                        SMLoc &StringLoc) {
5510   if (!trySkipId(Prefix, AsmToken::Colon))
5511     return MatchOperand_NoMatch;
5512 
5513   StringLoc = getLoc();
5514   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5515                                                   : MatchOperand_ParseFail;
5516 }
5517 
5518 //===----------------------------------------------------------------------===//
5519 // MTBUF format
5520 //===----------------------------------------------------------------------===//
5521 
5522 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5523                                   int64_t MaxVal,
5524                                   int64_t &Fmt) {
5525   int64_t Val;
5526   SMLoc Loc = getLoc();
5527 
5528   auto Res = parseIntWithPrefix(Pref, Val);
5529   if (Res == MatchOperand_ParseFail)
5530     return false;
5531   if (Res == MatchOperand_NoMatch)
5532     return true;
5533 
5534   if (Val < 0 || Val > MaxVal) {
5535     Error(Loc, Twine("out of range ", StringRef(Pref)));
5536     return false;
5537   }
5538 
5539   Fmt = Val;
5540   return true;
5541 }
5542 
5543 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5544 // values to live in a joint format operand in the MCInst encoding.
5545 OperandMatchResultTy
5546 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5547   using namespace llvm::AMDGPU::MTBUFFormat;
5548 
5549   int64_t Dfmt = DFMT_UNDEF;
5550   int64_t Nfmt = NFMT_UNDEF;
5551 
5552   // dfmt and nfmt can appear in either order, and each is optional.
5553   for (int I = 0; I < 2; ++I) {
5554     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5555       return MatchOperand_ParseFail;
5556 
5557     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5558       return MatchOperand_ParseFail;
5559     }
5560     // Skip optional comma between dfmt/nfmt
5561     // but guard against 2 commas following each other.
5562     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5563         !peekToken().is(AsmToken::Comma)) {
5564       trySkipToken(AsmToken::Comma);
5565     }
5566   }
5567 
5568   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5569     return MatchOperand_NoMatch;
5570 
5571   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5572   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5573 
5574   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5575   return MatchOperand_Success;
5576 }
5577 
5578 OperandMatchResultTy
5579 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5580   using namespace llvm::AMDGPU::MTBUFFormat;
5581 
5582   int64_t Fmt = UFMT_UNDEF;
5583 
5584   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5585     return MatchOperand_ParseFail;
5586 
5587   if (Fmt == UFMT_UNDEF)
5588     return MatchOperand_NoMatch;
5589 
5590   Format = Fmt;
5591   return MatchOperand_Success;
5592 }
5593 
5594 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5595                                     int64_t &Nfmt,
5596                                     StringRef FormatStr,
5597                                     SMLoc Loc) {
5598   using namespace llvm::AMDGPU::MTBUFFormat;
5599   int64_t Format;
5600 
5601   Format = getDfmt(FormatStr);
5602   if (Format != DFMT_UNDEF) {
5603     Dfmt = Format;
5604     return true;
5605   }
5606 
5607   Format = getNfmt(FormatStr, getSTI());
5608   if (Format != NFMT_UNDEF) {
5609     Nfmt = Format;
5610     return true;
5611   }
5612 
5613   Error(Loc, "unsupported format");
5614   return false;
5615 }
5616 
5617 OperandMatchResultTy
5618 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5619                                           SMLoc FormatLoc,
5620                                           int64_t &Format) {
5621   using namespace llvm::AMDGPU::MTBUFFormat;
5622 
5623   int64_t Dfmt = DFMT_UNDEF;
5624   int64_t Nfmt = NFMT_UNDEF;
5625   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5626     return MatchOperand_ParseFail;
5627 
5628   if (trySkipToken(AsmToken::Comma)) {
5629     StringRef Str;
5630     SMLoc Loc = getLoc();
5631     if (!parseId(Str, "expected a format string") ||
5632         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5633       return MatchOperand_ParseFail;
5634     }
5635     if (Dfmt == DFMT_UNDEF) {
5636       Error(Loc, "duplicate numeric format");
5637       return MatchOperand_ParseFail;
5638     } else if (Nfmt == NFMT_UNDEF) {
5639       Error(Loc, "duplicate data format");
5640       return MatchOperand_ParseFail;
5641     }
5642   }
5643 
5644   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5645   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5646 
5647   if (isGFX10Plus()) {
5648     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5649     if (Ufmt == UFMT_UNDEF) {
5650       Error(FormatLoc, "unsupported format");
5651       return MatchOperand_ParseFail;
5652     }
5653     Format = Ufmt;
5654   } else {
5655     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5656   }
5657 
5658   return MatchOperand_Success;
5659 }
5660 
5661 OperandMatchResultTy
5662 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5663                                             SMLoc Loc,
5664                                             int64_t &Format) {
5665   using namespace llvm::AMDGPU::MTBUFFormat;
5666 
5667   auto Id = getUnifiedFormat(FormatStr);
5668   if (Id == UFMT_UNDEF)
5669     return MatchOperand_NoMatch;
5670 
5671   if (!isGFX10Plus()) {
5672     Error(Loc, "unified format is not supported on this GPU");
5673     return MatchOperand_ParseFail;
5674   }
5675 
5676   Format = Id;
5677   return MatchOperand_Success;
5678 }
5679 
5680 OperandMatchResultTy
5681 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5682   using namespace llvm::AMDGPU::MTBUFFormat;
5683   SMLoc Loc = getLoc();
5684 
5685   if (!parseExpr(Format))
5686     return MatchOperand_ParseFail;
5687   if (!isValidFormatEncoding(Format, getSTI())) {
5688     Error(Loc, "out of range format");
5689     return MatchOperand_ParseFail;
5690   }
5691 
5692   return MatchOperand_Success;
5693 }
5694 
5695 OperandMatchResultTy
5696 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5697   using namespace llvm::AMDGPU::MTBUFFormat;
5698 
5699   if (!trySkipId("format", AsmToken::Colon))
5700     return MatchOperand_NoMatch;
5701 
5702   if (trySkipToken(AsmToken::LBrac)) {
5703     StringRef FormatStr;
5704     SMLoc Loc = getLoc();
5705     if (!parseId(FormatStr, "expected a format string"))
5706       return MatchOperand_ParseFail;
5707 
5708     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5709     if (Res == MatchOperand_NoMatch)
5710       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5711     if (Res != MatchOperand_Success)
5712       return Res;
5713 
5714     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5715       return MatchOperand_ParseFail;
5716 
5717     return MatchOperand_Success;
5718   }
5719 
5720   return parseNumericFormat(Format);
5721 }
5722 
5723 OperandMatchResultTy
5724 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5725   using namespace llvm::AMDGPU::MTBUFFormat;
5726 
5727   int64_t Format = getDefaultFormatEncoding(getSTI());
5728   OperandMatchResultTy Res;
5729   SMLoc Loc = getLoc();
5730 
5731   // Parse legacy format syntax.
5732   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5733   if (Res == MatchOperand_ParseFail)
5734     return Res;
5735 
5736   bool FormatFound = (Res == MatchOperand_Success);
5737 
5738   Operands.push_back(
5739     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5740 
5741   if (FormatFound)
5742     trySkipToken(AsmToken::Comma);
5743 
5744   if (isToken(AsmToken::EndOfStatement)) {
5745     // We are expecting an soffset operand,
5746     // but let matcher handle the error.
5747     return MatchOperand_Success;
5748   }
5749 
5750   // Parse soffset.
5751   Res = parseRegOrImm(Operands);
5752   if (Res != MatchOperand_Success)
5753     return Res;
5754 
5755   trySkipToken(AsmToken::Comma);
5756 
5757   if (!FormatFound) {
5758     Res = parseSymbolicOrNumericFormat(Format);
5759     if (Res == MatchOperand_ParseFail)
5760       return Res;
5761     if (Res == MatchOperand_Success) {
5762       auto Size = Operands.size();
5763       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5764       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5765       Op.setImm(Format);
5766     }
5767     return MatchOperand_Success;
5768   }
5769 
5770   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5771     Error(getLoc(), "duplicate format");
5772     return MatchOperand_ParseFail;
5773   }
5774   return MatchOperand_Success;
5775 }
5776 
5777 //===----------------------------------------------------------------------===//
5778 // ds
5779 //===----------------------------------------------------------------------===//
5780 
5781 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5782                                     const OperandVector &Operands) {
5783   OptionalImmIndexMap OptionalIdx;
5784 
5785   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5786     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5787 
5788     // Add the register arguments
5789     if (Op.isReg()) {
5790       Op.addRegOperands(Inst, 1);
5791       continue;
5792     }
5793 
5794     // Handle optional arguments
5795     OptionalIdx[Op.getImmTy()] = i;
5796   }
5797 
5798   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5799   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5800   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5801 
5802   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5803 }
5804 
5805 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5806                                 bool IsGdsHardcoded) {
5807   OptionalImmIndexMap OptionalIdx;
5808 
5809   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5810     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5811 
5812     // Add the register arguments
5813     if (Op.isReg()) {
5814       Op.addRegOperands(Inst, 1);
5815       continue;
5816     }
5817 
5818     if (Op.isToken() && Op.getToken() == "gds") {
5819       IsGdsHardcoded = true;
5820       continue;
5821     }
5822 
5823     // Handle optional arguments
5824     OptionalIdx[Op.getImmTy()] = i;
5825   }
5826 
5827   AMDGPUOperand::ImmTy OffsetType =
5828     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5829      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5830      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5831                                                       AMDGPUOperand::ImmTyOffset;
5832 
5833   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5834 
5835   if (!IsGdsHardcoded) {
5836     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5837   }
5838   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5839 }
5840 
5841 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5842   OptionalImmIndexMap OptionalIdx;
5843 
5844   unsigned OperandIdx[4];
5845   unsigned EnMask = 0;
5846   int SrcIdx = 0;
5847 
5848   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5849     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5850 
5851     // Add the register arguments
5852     if (Op.isReg()) {
5853       assert(SrcIdx < 4);
5854       OperandIdx[SrcIdx] = Inst.size();
5855       Op.addRegOperands(Inst, 1);
5856       ++SrcIdx;
5857       continue;
5858     }
5859 
5860     if (Op.isOff()) {
5861       assert(SrcIdx < 4);
5862       OperandIdx[SrcIdx] = Inst.size();
5863       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5864       ++SrcIdx;
5865       continue;
5866     }
5867 
5868     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5869       Op.addImmOperands(Inst, 1);
5870       continue;
5871     }
5872 
5873     if (Op.isToken() && Op.getToken() == "done")
5874       continue;
5875 
5876     // Handle optional arguments
5877     OptionalIdx[Op.getImmTy()] = i;
5878   }
5879 
5880   assert(SrcIdx == 4);
5881 
5882   bool Compr = false;
5883   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5884     Compr = true;
5885     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5886     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5887     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5888   }
5889 
5890   for (auto i = 0; i < SrcIdx; ++i) {
5891     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5892       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5893     }
5894   }
5895 
5896   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5897   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5898 
5899   Inst.addOperand(MCOperand::createImm(EnMask));
5900 }
5901 
5902 //===----------------------------------------------------------------------===//
5903 // s_waitcnt
5904 //===----------------------------------------------------------------------===//
5905 
5906 static bool
5907 encodeCnt(
5908   const AMDGPU::IsaVersion ISA,
5909   int64_t &IntVal,
5910   int64_t CntVal,
5911   bool Saturate,
5912   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5913   unsigned (*decode)(const IsaVersion &Version, unsigned))
5914 {
5915   bool Failed = false;
5916 
5917   IntVal = encode(ISA, IntVal, CntVal);
5918   if (CntVal != decode(ISA, IntVal)) {
5919     if (Saturate) {
5920       IntVal = encode(ISA, IntVal, -1);
5921     } else {
5922       Failed = true;
5923     }
5924   }
5925   return Failed;
5926 }
5927 
5928 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5929 
5930   SMLoc CntLoc = getLoc();
5931   StringRef CntName = getTokenStr();
5932 
5933   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5934       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5935     return false;
5936 
5937   int64_t CntVal;
5938   SMLoc ValLoc = getLoc();
5939   if (!parseExpr(CntVal))
5940     return false;
5941 
5942   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5943 
5944   bool Failed = true;
5945   bool Sat = CntName.endswith("_sat");
5946 
5947   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5948     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5949   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5950     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5951   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5952     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5953   } else {
5954     Error(CntLoc, "invalid counter name " + CntName);
5955     return false;
5956   }
5957 
5958   if (Failed) {
5959     Error(ValLoc, "too large value for " + CntName);
5960     return false;
5961   }
5962 
5963   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5964     return false;
5965 
5966   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5967     if (isToken(AsmToken::EndOfStatement)) {
5968       Error(getLoc(), "expected a counter name");
5969       return false;
5970     }
5971   }
5972 
5973   return true;
5974 }
5975 
5976 OperandMatchResultTy
5977 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5978   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5979   int64_t Waitcnt = getWaitcntBitMask(ISA);
5980   SMLoc S = getLoc();
5981 
5982   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5983     while (!isToken(AsmToken::EndOfStatement)) {
5984       if (!parseCnt(Waitcnt))
5985         return MatchOperand_ParseFail;
5986     }
5987   } else {
5988     if (!parseExpr(Waitcnt))
5989       return MatchOperand_ParseFail;
5990   }
5991 
5992   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5993   return MatchOperand_Success;
5994 }
5995 
5996 bool
5997 AMDGPUOperand::isSWaitCnt() const {
5998   return isImm();
5999 }
6000 
6001 //===----------------------------------------------------------------------===//
6002 // hwreg
6003 //===----------------------------------------------------------------------===//
6004 
6005 bool
6006 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6007                                 OperandInfoTy &Offset,
6008                                 OperandInfoTy &Width) {
6009   using namespace llvm::AMDGPU::Hwreg;
6010 
6011   // The register may be specified by name or using a numeric code
6012   HwReg.Loc = getLoc();
6013   if (isToken(AsmToken::Identifier) &&
6014       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6015     HwReg.IsSymbolic = true;
6016     lex(); // skip register name
6017   } else if (!parseExpr(HwReg.Id, "a register name")) {
6018     return false;
6019   }
6020 
6021   if (trySkipToken(AsmToken::RParen))
6022     return true;
6023 
6024   // parse optional params
6025   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6026     return false;
6027 
6028   Offset.Loc = getLoc();
6029   if (!parseExpr(Offset.Id))
6030     return false;
6031 
6032   if (!skipToken(AsmToken::Comma, "expected a comma"))
6033     return false;
6034 
6035   Width.Loc = getLoc();
6036   return parseExpr(Width.Id) &&
6037          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6038 }
6039 
6040 bool
6041 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6042                                const OperandInfoTy &Offset,
6043                                const OperandInfoTy &Width) {
6044 
6045   using namespace llvm::AMDGPU::Hwreg;
6046 
6047   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6048     Error(HwReg.Loc,
6049           "specified hardware register is not supported on this GPU");
6050     return false;
6051   }
6052   if (!isValidHwreg(HwReg.Id)) {
6053     Error(HwReg.Loc,
6054           "invalid code of hardware register: only 6-bit values are legal");
6055     return false;
6056   }
6057   if (!isValidHwregOffset(Offset.Id)) {
6058     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6059     return false;
6060   }
6061   if (!isValidHwregWidth(Width.Id)) {
6062     Error(Width.Loc,
6063           "invalid bitfield width: only values from 1 to 32 are legal");
6064     return false;
6065   }
6066   return true;
6067 }
6068 
6069 OperandMatchResultTy
6070 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6071   using namespace llvm::AMDGPU::Hwreg;
6072 
6073   int64_t ImmVal = 0;
6074   SMLoc Loc = getLoc();
6075 
6076   if (trySkipId("hwreg", AsmToken::LParen)) {
6077     OperandInfoTy HwReg(ID_UNKNOWN_);
6078     OperandInfoTy Offset(OFFSET_DEFAULT_);
6079     OperandInfoTy Width(WIDTH_DEFAULT_);
6080     if (parseHwregBody(HwReg, Offset, Width) &&
6081         validateHwreg(HwReg, Offset, Width)) {
6082       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6083     } else {
6084       return MatchOperand_ParseFail;
6085     }
6086   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6087     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6088       Error(Loc, "invalid immediate: only 16-bit values are legal");
6089       return MatchOperand_ParseFail;
6090     }
6091   } else {
6092     return MatchOperand_ParseFail;
6093   }
6094 
6095   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6096   return MatchOperand_Success;
6097 }
6098 
6099 bool AMDGPUOperand::isHwreg() const {
6100   return isImmTy(ImmTyHwreg);
6101 }
6102 
6103 //===----------------------------------------------------------------------===//
6104 // sendmsg
6105 //===----------------------------------------------------------------------===//
6106 
6107 bool
6108 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6109                                   OperandInfoTy &Op,
6110                                   OperandInfoTy &Stream) {
6111   using namespace llvm::AMDGPU::SendMsg;
6112 
6113   Msg.Loc = getLoc();
6114   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6115     Msg.IsSymbolic = true;
6116     lex(); // skip message name
6117   } else if (!parseExpr(Msg.Id, "a message name")) {
6118     return false;
6119   }
6120 
6121   if (trySkipToken(AsmToken::Comma)) {
6122     Op.IsDefined = true;
6123     Op.Loc = getLoc();
6124     if (isToken(AsmToken::Identifier) &&
6125         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6126       lex(); // skip operation name
6127     } else if (!parseExpr(Op.Id, "an operation name")) {
6128       return false;
6129     }
6130 
6131     if (trySkipToken(AsmToken::Comma)) {
6132       Stream.IsDefined = true;
6133       Stream.Loc = getLoc();
6134       if (!parseExpr(Stream.Id))
6135         return false;
6136     }
6137   }
6138 
6139   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6140 }
6141 
6142 bool
6143 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6144                                  const OperandInfoTy &Op,
6145                                  const OperandInfoTy &Stream) {
6146   using namespace llvm::AMDGPU::SendMsg;
6147 
6148   // Validation strictness depends on whether message is specified
6149   // in a symbolc or in a numeric form. In the latter case
6150   // only encoding possibility is checked.
6151   bool Strict = Msg.IsSymbolic;
6152 
6153   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6154     Error(Msg.Loc, "invalid message id");
6155     return false;
6156   }
6157   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6158     if (Op.IsDefined) {
6159       Error(Op.Loc, "message does not support operations");
6160     } else {
6161       Error(Msg.Loc, "missing message operation");
6162     }
6163     return false;
6164   }
6165   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6166     Error(Op.Loc, "invalid operation id");
6167     return false;
6168   }
6169   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6170     Error(Stream.Loc, "message operation does not support streams");
6171     return false;
6172   }
6173   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6174     Error(Stream.Loc, "invalid message stream id");
6175     return false;
6176   }
6177   return true;
6178 }
6179 
6180 OperandMatchResultTy
6181 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6182   using namespace llvm::AMDGPU::SendMsg;
6183 
6184   int64_t ImmVal = 0;
6185   SMLoc Loc = getLoc();
6186 
6187   if (trySkipId("sendmsg", AsmToken::LParen)) {
6188     OperandInfoTy Msg(ID_UNKNOWN_);
6189     OperandInfoTy Op(OP_NONE_);
6190     OperandInfoTy Stream(STREAM_ID_NONE_);
6191     if (parseSendMsgBody(Msg, Op, Stream) &&
6192         validateSendMsg(Msg, Op, Stream)) {
6193       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6194     } else {
6195       return MatchOperand_ParseFail;
6196     }
6197   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6198     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6199       Error(Loc, "invalid immediate: only 16-bit values are legal");
6200       return MatchOperand_ParseFail;
6201     }
6202   } else {
6203     return MatchOperand_ParseFail;
6204   }
6205 
6206   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6207   return MatchOperand_Success;
6208 }
6209 
6210 bool AMDGPUOperand::isSendMsg() const {
6211   return isImmTy(ImmTySendMsg);
6212 }
6213 
6214 //===----------------------------------------------------------------------===//
6215 // v_interp
6216 //===----------------------------------------------------------------------===//
6217 
6218 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6219   StringRef Str;
6220   SMLoc S = getLoc();
6221 
6222   if (!parseId(Str))
6223     return MatchOperand_NoMatch;
6224 
6225   int Slot = StringSwitch<int>(Str)
6226     .Case("p10", 0)
6227     .Case("p20", 1)
6228     .Case("p0", 2)
6229     .Default(-1);
6230 
6231   if (Slot == -1) {
6232     Error(S, "invalid interpolation slot");
6233     return MatchOperand_ParseFail;
6234   }
6235 
6236   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6237                                               AMDGPUOperand::ImmTyInterpSlot));
6238   return MatchOperand_Success;
6239 }
6240 
6241 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6242   StringRef Str;
6243   SMLoc S = getLoc();
6244 
6245   if (!parseId(Str))
6246     return MatchOperand_NoMatch;
6247 
6248   if (!Str.startswith("attr")) {
6249     Error(S, "invalid interpolation attribute");
6250     return MatchOperand_ParseFail;
6251   }
6252 
6253   StringRef Chan = Str.take_back(2);
6254   int AttrChan = StringSwitch<int>(Chan)
6255     .Case(".x", 0)
6256     .Case(".y", 1)
6257     .Case(".z", 2)
6258     .Case(".w", 3)
6259     .Default(-1);
6260   if (AttrChan == -1) {
6261     Error(S, "invalid or missing interpolation attribute channel");
6262     return MatchOperand_ParseFail;
6263   }
6264 
6265   Str = Str.drop_back(2).drop_front(4);
6266 
6267   uint8_t Attr;
6268   if (Str.getAsInteger(10, Attr)) {
6269     Error(S, "invalid or missing interpolation attribute number");
6270     return MatchOperand_ParseFail;
6271   }
6272 
6273   if (Attr > 63) {
6274     Error(S, "out of bounds interpolation attribute number");
6275     return MatchOperand_ParseFail;
6276   }
6277 
6278   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6279 
6280   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6281                                               AMDGPUOperand::ImmTyInterpAttr));
6282   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6283                                               AMDGPUOperand::ImmTyAttrChan));
6284   return MatchOperand_Success;
6285 }
6286 
6287 //===----------------------------------------------------------------------===//
6288 // exp
6289 //===----------------------------------------------------------------------===//
6290 
6291 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6292   using namespace llvm::AMDGPU::Exp;
6293 
6294   StringRef Str;
6295   SMLoc S = getLoc();
6296 
6297   if (!parseId(Str))
6298     return MatchOperand_NoMatch;
6299 
6300   unsigned Id = getTgtId(Str);
6301   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6302     Error(S, (Id == ET_INVALID) ?
6303                 "invalid exp target" :
6304                 "exp target is not supported on this GPU");
6305     return MatchOperand_ParseFail;
6306   }
6307 
6308   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6309                                               AMDGPUOperand::ImmTyExpTgt));
6310   return MatchOperand_Success;
6311 }
6312 
6313 //===----------------------------------------------------------------------===//
6314 // parser helpers
6315 //===----------------------------------------------------------------------===//
6316 
6317 bool
6318 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6319   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6320 }
6321 
6322 bool
6323 AMDGPUAsmParser::isId(const StringRef Id) const {
6324   return isId(getToken(), Id);
6325 }
6326 
6327 bool
6328 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6329   return getTokenKind() == Kind;
6330 }
6331 
6332 bool
6333 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6334   if (isId(Id)) {
6335     lex();
6336     return true;
6337   }
6338   return false;
6339 }
6340 
6341 bool
6342 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6343   if (isToken(AsmToken::Identifier)) {
6344     StringRef Tok = getTokenStr();
6345     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6346       lex();
6347       return true;
6348     }
6349   }
6350   return false;
6351 }
6352 
6353 bool
6354 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6355   if (isId(Id) && peekToken().is(Kind)) {
6356     lex();
6357     lex();
6358     return true;
6359   }
6360   return false;
6361 }
6362 
6363 bool
6364 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6365   if (isToken(Kind)) {
6366     lex();
6367     return true;
6368   }
6369   return false;
6370 }
6371 
6372 bool
6373 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6374                            const StringRef ErrMsg) {
6375   if (!trySkipToken(Kind)) {
6376     Error(getLoc(), ErrMsg);
6377     return false;
6378   }
6379   return true;
6380 }
6381 
6382 bool
6383 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6384   SMLoc S = getLoc();
6385 
6386   const MCExpr *Expr;
6387   if (Parser.parseExpression(Expr))
6388     return false;
6389 
6390   if (Expr->evaluateAsAbsolute(Imm))
6391     return true;
6392 
6393   if (Expected.empty()) {
6394     Error(S, "expected absolute expression");
6395   } else {
6396     Error(S, Twine("expected ", Expected) +
6397              Twine(" or an absolute expression"));
6398   }
6399   return false;
6400 }
6401 
6402 bool
6403 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6404   SMLoc S = getLoc();
6405 
6406   const MCExpr *Expr;
6407   if (Parser.parseExpression(Expr))
6408     return false;
6409 
6410   int64_t IntVal;
6411   if (Expr->evaluateAsAbsolute(IntVal)) {
6412     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6413   } else {
6414     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6415   }
6416   return true;
6417 }
6418 
6419 bool
6420 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6421   if (isToken(AsmToken::String)) {
6422     Val = getToken().getStringContents();
6423     lex();
6424     return true;
6425   } else {
6426     Error(getLoc(), ErrMsg);
6427     return false;
6428   }
6429 }
6430 
6431 bool
6432 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6433   if (isToken(AsmToken::Identifier)) {
6434     Val = getTokenStr();
6435     lex();
6436     return true;
6437   } else {
6438     if (!ErrMsg.empty())
6439       Error(getLoc(), ErrMsg);
6440     return false;
6441   }
6442 }
6443 
6444 AsmToken
6445 AMDGPUAsmParser::getToken() const {
6446   return Parser.getTok();
6447 }
6448 
6449 AsmToken
6450 AMDGPUAsmParser::peekToken() {
6451   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6452 }
6453 
6454 void
6455 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6456   auto TokCount = getLexer().peekTokens(Tokens);
6457 
6458   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6459     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6460 }
6461 
6462 AsmToken::TokenKind
6463 AMDGPUAsmParser::getTokenKind() const {
6464   return getLexer().getKind();
6465 }
6466 
6467 SMLoc
6468 AMDGPUAsmParser::getLoc() const {
6469   return getToken().getLoc();
6470 }
6471 
6472 StringRef
6473 AMDGPUAsmParser::getTokenStr() const {
6474   return getToken().getString();
6475 }
6476 
6477 void
6478 AMDGPUAsmParser::lex() {
6479   Parser.Lex();
6480 }
6481 
6482 SMLoc
6483 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6484                                const OperandVector &Operands) const {
6485   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6486     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6487     if (Test(Op))
6488       return Op.getStartLoc();
6489   }
6490   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6491 }
6492 
6493 SMLoc
6494 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6495                            const OperandVector &Operands) const {
6496   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6497   return getOperandLoc(Test, Operands);
6498 }
6499 
6500 SMLoc
6501 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6502                            const OperandVector &Operands) const {
6503   auto Test = [=](const AMDGPUOperand& Op) {
6504     return Op.isRegKind() && Op.getReg() == Reg;
6505   };
6506   return getOperandLoc(Test, Operands);
6507 }
6508 
6509 SMLoc
6510 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6511   auto Test = [](const AMDGPUOperand& Op) {
6512     return Op.IsImmKindLiteral() || Op.isExpr();
6513   };
6514   return getOperandLoc(Test, Operands);
6515 }
6516 
6517 SMLoc
6518 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6519   auto Test = [](const AMDGPUOperand& Op) {
6520     return Op.isImmKindConst();
6521   };
6522   return getOperandLoc(Test, Operands);
6523 }
6524 
6525 //===----------------------------------------------------------------------===//
6526 // swizzle
6527 //===----------------------------------------------------------------------===//
6528 
6529 LLVM_READNONE
6530 static unsigned
6531 encodeBitmaskPerm(const unsigned AndMask,
6532                   const unsigned OrMask,
6533                   const unsigned XorMask) {
6534   using namespace llvm::AMDGPU::Swizzle;
6535 
6536   return BITMASK_PERM_ENC |
6537          (AndMask << BITMASK_AND_SHIFT) |
6538          (OrMask  << BITMASK_OR_SHIFT)  |
6539          (XorMask << BITMASK_XOR_SHIFT);
6540 }
6541 
6542 bool
6543 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6544                                      const unsigned MinVal,
6545                                      const unsigned MaxVal,
6546                                      const StringRef ErrMsg,
6547                                      SMLoc &Loc) {
6548   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6549     return false;
6550   }
6551   Loc = getLoc();
6552   if (!parseExpr(Op)) {
6553     return false;
6554   }
6555   if (Op < MinVal || Op > MaxVal) {
6556     Error(Loc, ErrMsg);
6557     return false;
6558   }
6559 
6560   return true;
6561 }
6562 
6563 bool
6564 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6565                                       const unsigned MinVal,
6566                                       const unsigned MaxVal,
6567                                       const StringRef ErrMsg) {
6568   SMLoc Loc;
6569   for (unsigned i = 0; i < OpNum; ++i) {
6570     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6571       return false;
6572   }
6573 
6574   return true;
6575 }
6576 
6577 bool
6578 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6579   using namespace llvm::AMDGPU::Swizzle;
6580 
6581   int64_t Lane[LANE_NUM];
6582   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6583                            "expected a 2-bit lane id")) {
6584     Imm = QUAD_PERM_ENC;
6585     for (unsigned I = 0; I < LANE_NUM; ++I) {
6586       Imm |= Lane[I] << (LANE_SHIFT * I);
6587     }
6588     return true;
6589   }
6590   return false;
6591 }
6592 
6593 bool
6594 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6595   using namespace llvm::AMDGPU::Swizzle;
6596 
6597   SMLoc Loc;
6598   int64_t GroupSize;
6599   int64_t LaneIdx;
6600 
6601   if (!parseSwizzleOperand(GroupSize,
6602                            2, 32,
6603                            "group size must be in the interval [2,32]",
6604                            Loc)) {
6605     return false;
6606   }
6607   if (!isPowerOf2_64(GroupSize)) {
6608     Error(Loc, "group size must be a power of two");
6609     return false;
6610   }
6611   if (parseSwizzleOperand(LaneIdx,
6612                           0, GroupSize - 1,
6613                           "lane id must be in the interval [0,group size - 1]",
6614                           Loc)) {
6615     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6616     return true;
6617   }
6618   return false;
6619 }
6620 
6621 bool
6622 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6623   using namespace llvm::AMDGPU::Swizzle;
6624 
6625   SMLoc Loc;
6626   int64_t GroupSize;
6627 
6628   if (!parseSwizzleOperand(GroupSize,
6629                            2, 32,
6630                            "group size must be in the interval [2,32]",
6631                            Loc)) {
6632     return false;
6633   }
6634   if (!isPowerOf2_64(GroupSize)) {
6635     Error(Loc, "group size must be a power of two");
6636     return false;
6637   }
6638 
6639   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6640   return true;
6641 }
6642 
6643 bool
6644 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6645   using namespace llvm::AMDGPU::Swizzle;
6646 
6647   SMLoc Loc;
6648   int64_t GroupSize;
6649 
6650   if (!parseSwizzleOperand(GroupSize,
6651                            1, 16,
6652                            "group size must be in the interval [1,16]",
6653                            Loc)) {
6654     return false;
6655   }
6656   if (!isPowerOf2_64(GroupSize)) {
6657     Error(Loc, "group size must be a power of two");
6658     return false;
6659   }
6660 
6661   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6662   return true;
6663 }
6664 
6665 bool
6666 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6667   using namespace llvm::AMDGPU::Swizzle;
6668 
6669   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6670     return false;
6671   }
6672 
6673   StringRef Ctl;
6674   SMLoc StrLoc = getLoc();
6675   if (!parseString(Ctl)) {
6676     return false;
6677   }
6678   if (Ctl.size() != BITMASK_WIDTH) {
6679     Error(StrLoc, "expected a 5-character mask");
6680     return false;
6681   }
6682 
6683   unsigned AndMask = 0;
6684   unsigned OrMask = 0;
6685   unsigned XorMask = 0;
6686 
6687   for (size_t i = 0; i < Ctl.size(); ++i) {
6688     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6689     switch(Ctl[i]) {
6690     default:
6691       Error(StrLoc, "invalid mask");
6692       return false;
6693     case '0':
6694       break;
6695     case '1':
6696       OrMask |= Mask;
6697       break;
6698     case 'p':
6699       AndMask |= Mask;
6700       break;
6701     case 'i':
6702       AndMask |= Mask;
6703       XorMask |= Mask;
6704       break;
6705     }
6706   }
6707 
6708   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6709   return true;
6710 }
6711 
6712 bool
6713 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6714 
6715   SMLoc OffsetLoc = getLoc();
6716 
6717   if (!parseExpr(Imm, "a swizzle macro")) {
6718     return false;
6719   }
6720   if (!isUInt<16>(Imm)) {
6721     Error(OffsetLoc, "expected a 16-bit offset");
6722     return false;
6723   }
6724   return true;
6725 }
6726 
6727 bool
6728 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6729   using namespace llvm::AMDGPU::Swizzle;
6730 
6731   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6732 
6733     SMLoc ModeLoc = getLoc();
6734     bool Ok = false;
6735 
6736     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6737       Ok = parseSwizzleQuadPerm(Imm);
6738     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6739       Ok = parseSwizzleBitmaskPerm(Imm);
6740     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6741       Ok = parseSwizzleBroadcast(Imm);
6742     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6743       Ok = parseSwizzleSwap(Imm);
6744     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6745       Ok = parseSwizzleReverse(Imm);
6746     } else {
6747       Error(ModeLoc, "expected a swizzle mode");
6748     }
6749 
6750     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6751   }
6752 
6753   return false;
6754 }
6755 
6756 OperandMatchResultTy
6757 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6758   SMLoc S = getLoc();
6759   int64_t Imm = 0;
6760 
6761   if (trySkipId("offset")) {
6762 
6763     bool Ok = false;
6764     if (skipToken(AsmToken::Colon, "expected a colon")) {
6765       if (trySkipId("swizzle")) {
6766         Ok = parseSwizzleMacro(Imm);
6767       } else {
6768         Ok = parseSwizzleOffset(Imm);
6769       }
6770     }
6771 
6772     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6773 
6774     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6775   } else {
6776     // Swizzle "offset" operand is optional.
6777     // If it is omitted, try parsing other optional operands.
6778     return parseOptionalOpr(Operands);
6779   }
6780 }
6781 
6782 bool
6783 AMDGPUOperand::isSwizzle() const {
6784   return isImmTy(ImmTySwizzle);
6785 }
6786 
6787 //===----------------------------------------------------------------------===//
6788 // VGPR Index Mode
6789 //===----------------------------------------------------------------------===//
6790 
6791 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6792 
6793   using namespace llvm::AMDGPU::VGPRIndexMode;
6794 
6795   if (trySkipToken(AsmToken::RParen)) {
6796     return OFF;
6797   }
6798 
6799   int64_t Imm = 0;
6800 
6801   while (true) {
6802     unsigned Mode = 0;
6803     SMLoc S = getLoc();
6804 
6805     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6806       if (trySkipId(IdSymbolic[ModeId])) {
6807         Mode = 1 << ModeId;
6808         break;
6809       }
6810     }
6811 
6812     if (Mode == 0) {
6813       Error(S, (Imm == 0)?
6814                "expected a VGPR index mode or a closing parenthesis" :
6815                "expected a VGPR index mode");
6816       return UNDEF;
6817     }
6818 
6819     if (Imm & Mode) {
6820       Error(S, "duplicate VGPR index mode");
6821       return UNDEF;
6822     }
6823     Imm |= Mode;
6824 
6825     if (trySkipToken(AsmToken::RParen))
6826       break;
6827     if (!skipToken(AsmToken::Comma,
6828                    "expected a comma or a closing parenthesis"))
6829       return UNDEF;
6830   }
6831 
6832   return Imm;
6833 }
6834 
6835 OperandMatchResultTy
6836 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6837 
6838   using namespace llvm::AMDGPU::VGPRIndexMode;
6839 
6840   int64_t Imm = 0;
6841   SMLoc S = getLoc();
6842 
6843   if (trySkipId("gpr_idx", AsmToken::LParen)) {
6844     Imm = parseGPRIdxMacro();
6845     if (Imm == UNDEF)
6846       return MatchOperand_ParseFail;
6847   } else {
6848     if (getParser().parseAbsoluteExpression(Imm))
6849       return MatchOperand_ParseFail;
6850     if (Imm < 0 || !isUInt<4>(Imm)) {
6851       Error(S, "invalid immediate: only 4-bit values are legal");
6852       return MatchOperand_ParseFail;
6853     }
6854   }
6855 
6856   Operands.push_back(
6857       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6858   return MatchOperand_Success;
6859 }
6860 
6861 bool AMDGPUOperand::isGPRIdxMode() const {
6862   return isImmTy(ImmTyGprIdxMode);
6863 }
6864 
6865 //===----------------------------------------------------------------------===//
6866 // sopp branch targets
6867 //===----------------------------------------------------------------------===//
6868 
6869 OperandMatchResultTy
6870 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6871 
6872   // Make sure we are not parsing something
6873   // that looks like a label or an expression but is not.
6874   // This will improve error messages.
6875   if (isRegister() || isModifier())
6876     return MatchOperand_NoMatch;
6877 
6878   if (!parseExpr(Operands))
6879     return MatchOperand_ParseFail;
6880 
6881   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6882   assert(Opr.isImm() || Opr.isExpr());
6883   SMLoc Loc = Opr.getStartLoc();
6884 
6885   // Currently we do not support arbitrary expressions as branch targets.
6886   // Only labels and absolute expressions are accepted.
6887   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6888     Error(Loc, "expected an absolute expression or a label");
6889   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6890     Error(Loc, "expected a 16-bit signed jump offset");
6891   }
6892 
6893   return MatchOperand_Success;
6894 }
6895 
6896 //===----------------------------------------------------------------------===//
6897 // Boolean holding registers
6898 //===----------------------------------------------------------------------===//
6899 
6900 OperandMatchResultTy
6901 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6902   return parseReg(Operands);
6903 }
6904 
6905 //===----------------------------------------------------------------------===//
6906 // mubuf
6907 //===----------------------------------------------------------------------===//
6908 
6909 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
6910   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
6911 }
6912 
6913 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol_GLC1() const {
6914   return AMDGPUOperand::CreateImm(this, CPol::GLC, SMLoc(),
6915                                   AMDGPUOperand::ImmTyCPol);
6916 }
6917 
6918 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6919                                    const OperandVector &Operands,
6920                                    bool IsAtomic,
6921                                    bool IsLds) {
6922   bool IsLdsOpcode = IsLds;
6923   bool HasLdsModifier = false;
6924   OptionalImmIndexMap OptionalIdx;
6925   unsigned FirstOperandIdx = 1;
6926   bool IsAtomicReturn = false;
6927 
6928   if (IsAtomic) {
6929     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6930       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6931       if (!Op.isCPol())
6932         continue;
6933       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
6934       break;
6935     }
6936 
6937     if (!IsAtomicReturn) {
6938       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
6939       if (NewOpc != -1)
6940         Inst.setOpcode(NewOpc);
6941     }
6942 
6943     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
6944                       SIInstrFlags::IsAtomicRet;
6945   }
6946 
6947   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6948     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6949 
6950     // Add the register arguments
6951     if (Op.isReg()) {
6952       Op.addRegOperands(Inst, 1);
6953       // Insert a tied src for atomic return dst.
6954       // This cannot be postponed as subsequent calls to
6955       // addImmOperands rely on correct number of MC operands.
6956       if (IsAtomicReturn && i == FirstOperandIdx)
6957         Op.addRegOperands(Inst, 1);
6958       continue;
6959     }
6960 
6961     // Handle the case where soffset is an immediate
6962     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6963       Op.addImmOperands(Inst, 1);
6964       continue;
6965     }
6966 
6967     HasLdsModifier |= Op.isLDS();
6968 
6969     // Handle tokens like 'offen' which are sometimes hard-coded into the
6970     // asm string.  There are no MCInst operands for these.
6971     if (Op.isToken()) {
6972       continue;
6973     }
6974     assert(Op.isImm());
6975 
6976     // Handle optional arguments
6977     OptionalIdx[Op.getImmTy()] = i;
6978   }
6979 
6980   // This is a workaround for an llvm quirk which may result in an
6981   // incorrect instruction selection. Lds and non-lds versions of
6982   // MUBUF instructions are identical except that lds versions
6983   // have mandatory 'lds' modifier. However this modifier follows
6984   // optional modifiers and llvm asm matcher regards this 'lds'
6985   // modifier as an optional one. As a result, an lds version
6986   // of opcode may be selected even if it has no 'lds' modifier.
6987   if (IsLdsOpcode && !HasLdsModifier) {
6988     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6989     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6990       Inst.setOpcode(NoLdsOpcode);
6991       IsLdsOpcode = false;
6992     }
6993   }
6994 
6995   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6996   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
6997 
6998   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6999     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7000   }
7001   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7002 }
7003 
7004 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7005   OptionalImmIndexMap OptionalIdx;
7006 
7007   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7008     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7009 
7010     // Add the register arguments
7011     if (Op.isReg()) {
7012       Op.addRegOperands(Inst, 1);
7013       continue;
7014     }
7015 
7016     // Handle the case where soffset is an immediate
7017     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7018       Op.addImmOperands(Inst, 1);
7019       continue;
7020     }
7021 
7022     // Handle tokens like 'offen' which are sometimes hard-coded into the
7023     // asm string.  There are no MCInst operands for these.
7024     if (Op.isToken()) {
7025       continue;
7026     }
7027     assert(Op.isImm());
7028 
7029     // Handle optional arguments
7030     OptionalIdx[Op.getImmTy()] = i;
7031   }
7032 
7033   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7034                         AMDGPUOperand::ImmTyOffset);
7035   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7036   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7037   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7038   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7039 }
7040 
7041 //===----------------------------------------------------------------------===//
7042 // mimg
7043 //===----------------------------------------------------------------------===//
7044 
7045 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7046                               bool IsAtomic) {
7047   unsigned I = 1;
7048   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7049   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7050     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7051   }
7052 
7053   if (IsAtomic) {
7054     // Add src, same as dst
7055     assert(Desc.getNumDefs() == 1);
7056     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7057   }
7058 
7059   OptionalImmIndexMap OptionalIdx;
7060 
7061   for (unsigned E = Operands.size(); I != E; ++I) {
7062     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7063 
7064     // Add the register arguments
7065     if (Op.isReg()) {
7066       Op.addRegOperands(Inst, 1);
7067     } else if (Op.isImmModifier()) {
7068       OptionalIdx[Op.getImmTy()] = I;
7069     } else if (!Op.isToken()) {
7070       llvm_unreachable("unexpected operand type");
7071     }
7072   }
7073 
7074   bool IsGFX10Plus = isGFX10Plus();
7075 
7076   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7077   if (IsGFX10Plus)
7078     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7079   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7080   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7081   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7082   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7083     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7084   if (IsGFX10Plus)
7085     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7086   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7087   if (!IsGFX10Plus)
7088     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7089   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7090 }
7091 
7092 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7093   cvtMIMG(Inst, Operands, true);
7094 }
7095 
7096 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7097   OptionalImmIndexMap OptionalIdx;
7098   bool IsAtomicReturn = false;
7099 
7100   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7101     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7102     if (!Op.isCPol())
7103       continue;
7104     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7105     break;
7106   }
7107 
7108   if (!IsAtomicReturn) {
7109     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7110     if (NewOpc != -1)
7111       Inst.setOpcode(NewOpc);
7112   }
7113 
7114   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7115                     SIInstrFlags::IsAtomicRet;
7116 
7117   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7118     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7119 
7120     // Add the register arguments
7121     if (Op.isReg()) {
7122       Op.addRegOperands(Inst, 1);
7123       if (IsAtomicReturn && i == 1)
7124         Op.addRegOperands(Inst, 1);
7125       continue;
7126     }
7127 
7128     // Handle the case where soffset is an immediate
7129     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7130       Op.addImmOperands(Inst, 1);
7131       continue;
7132     }
7133 
7134     // Handle tokens like 'offen' which are sometimes hard-coded into the
7135     // asm string.  There are no MCInst operands for these.
7136     if (Op.isToken()) {
7137       continue;
7138     }
7139     assert(Op.isImm());
7140 
7141     // Handle optional arguments
7142     OptionalIdx[Op.getImmTy()] = i;
7143   }
7144 
7145   if ((int)Inst.getNumOperands() <=
7146       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7147     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7148   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7149 }
7150 
7151 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7152                                       const OperandVector &Operands) {
7153   for (unsigned I = 1; I < Operands.size(); ++I) {
7154     auto &Operand = (AMDGPUOperand &)*Operands[I];
7155     if (Operand.isReg())
7156       Operand.addRegOperands(Inst, 1);
7157   }
7158 
7159   Inst.addOperand(MCOperand::createImm(1)); // a16
7160 }
7161 
7162 //===----------------------------------------------------------------------===//
7163 // smrd
7164 //===----------------------------------------------------------------------===//
7165 
7166 bool AMDGPUOperand::isSMRDOffset8() const {
7167   return isImm() && isUInt<8>(getImm());
7168 }
7169 
7170 bool AMDGPUOperand::isSMEMOffset() const {
7171   return isImm(); // Offset range is checked later by validator.
7172 }
7173 
7174 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7175   // 32-bit literals are only supported on CI and we only want to use them
7176   // when the offset is > 8-bits.
7177   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7178 }
7179 
7180 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7181   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7182 }
7183 
7184 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7185   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7186 }
7187 
7188 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7189   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7190 }
7191 
7192 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7193   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7194 }
7195 
7196 //===----------------------------------------------------------------------===//
7197 // vop3
7198 //===----------------------------------------------------------------------===//
7199 
7200 static bool ConvertOmodMul(int64_t &Mul) {
7201   if (Mul != 1 && Mul != 2 && Mul != 4)
7202     return false;
7203 
7204   Mul >>= 1;
7205   return true;
7206 }
7207 
7208 static bool ConvertOmodDiv(int64_t &Div) {
7209   if (Div == 1) {
7210     Div = 0;
7211     return true;
7212   }
7213 
7214   if (Div == 2) {
7215     Div = 3;
7216     return true;
7217   }
7218 
7219   return false;
7220 }
7221 
7222 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7223 // This is intentional and ensures compatibility with sp3.
7224 // See bug 35397 for details.
7225 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7226   if (BoundCtrl == 0 || BoundCtrl == 1) {
7227     BoundCtrl = 1;
7228     return true;
7229   }
7230   return false;
7231 }
7232 
7233 // Note: the order in this table matches the order of operands in AsmString.
7234 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7235   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7236   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7237   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7238   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7239   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7240   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7241   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7242   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7243   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7244   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7245   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7246   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7247   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7248   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7249   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7250   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7251   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7252   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7253   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7254   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7255   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7256   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7257   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7258   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7259   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7260   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7261   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7262   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7263   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7264   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7265   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7266   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7267   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7268   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7269   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7270   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7271   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7272   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7273   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7274   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7275   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7276 };
7277 
7278 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7279 
7280   OperandMatchResultTy res = parseOptionalOpr(Operands);
7281 
7282   // This is a hack to enable hardcoded mandatory operands which follow
7283   // optional operands.
7284   //
7285   // Current design assumes that all operands after the first optional operand
7286   // are also optional. However implementation of some instructions violates
7287   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7288   //
7289   // To alleviate this problem, we have to (implicitly) parse extra operands
7290   // to make sure autogenerated parser of custom operands never hit hardcoded
7291   // mandatory operands.
7292 
7293   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7294     if (res != MatchOperand_Success ||
7295         isToken(AsmToken::EndOfStatement))
7296       break;
7297 
7298     trySkipToken(AsmToken::Comma);
7299     res = parseOptionalOpr(Operands);
7300   }
7301 
7302   return res;
7303 }
7304 
7305 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7306   OperandMatchResultTy res;
7307   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7308     // try to parse any optional operand here
7309     if (Op.IsBit) {
7310       res = parseNamedBit(Op.Name, Operands, Op.Type);
7311     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7312       res = parseOModOperand(Operands);
7313     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7314                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7315                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7316       res = parseSDWASel(Operands, Op.Name, Op.Type);
7317     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7318       res = parseSDWADstUnused(Operands);
7319     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7320                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7321                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7322                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7323       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7324                                         Op.ConvertResult);
7325     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7326       res = parseDim(Operands);
7327     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7328       res = parseCPol(Operands);
7329     } else {
7330       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7331     }
7332     if (res != MatchOperand_NoMatch) {
7333       return res;
7334     }
7335   }
7336   return MatchOperand_NoMatch;
7337 }
7338 
7339 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7340   StringRef Name = getTokenStr();
7341   if (Name == "mul") {
7342     return parseIntWithPrefix("mul", Operands,
7343                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7344   }
7345 
7346   if (Name == "div") {
7347     return parseIntWithPrefix("div", Operands,
7348                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7349   }
7350 
7351   return MatchOperand_NoMatch;
7352 }
7353 
7354 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7355   cvtVOP3P(Inst, Operands);
7356 
7357   int Opc = Inst.getOpcode();
7358 
7359   int SrcNum;
7360   const int Ops[] = { AMDGPU::OpName::src0,
7361                       AMDGPU::OpName::src1,
7362                       AMDGPU::OpName::src2 };
7363   for (SrcNum = 0;
7364        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7365        ++SrcNum);
7366   assert(SrcNum > 0);
7367 
7368   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7369   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7370 
7371   if ((OpSel & (1 << SrcNum)) != 0) {
7372     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7373     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7374     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7375   }
7376 }
7377 
7378 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7379       // 1. This operand is input modifiers
7380   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7381       // 2. This is not last operand
7382       && Desc.NumOperands > (OpNum + 1)
7383       // 3. Next operand is register class
7384       && Desc.OpInfo[OpNum + 1].RegClass != -1
7385       // 4. Next register is not tied to any other operand
7386       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7387 }
7388 
7389 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7390 {
7391   OptionalImmIndexMap OptionalIdx;
7392   unsigned Opc = Inst.getOpcode();
7393 
7394   unsigned I = 1;
7395   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7396   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7397     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7398   }
7399 
7400   for (unsigned E = Operands.size(); I != E; ++I) {
7401     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7402     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7403       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7404     } else if (Op.isInterpSlot() ||
7405                Op.isInterpAttr() ||
7406                Op.isAttrChan()) {
7407       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7408     } else if (Op.isImmModifier()) {
7409       OptionalIdx[Op.getImmTy()] = I;
7410     } else {
7411       llvm_unreachable("unhandled operand type");
7412     }
7413   }
7414 
7415   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7416     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7417   }
7418 
7419   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7420     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7421   }
7422 
7423   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7424     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7425   }
7426 }
7427 
7428 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7429                               OptionalImmIndexMap &OptionalIdx) {
7430   unsigned Opc = Inst.getOpcode();
7431 
7432   unsigned I = 1;
7433   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7434   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7435     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7436   }
7437 
7438   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7439     // This instruction has src modifiers
7440     for (unsigned E = Operands.size(); I != E; ++I) {
7441       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7442       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7443         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7444       } else if (Op.isImmModifier()) {
7445         OptionalIdx[Op.getImmTy()] = I;
7446       } else if (Op.isRegOrImm()) {
7447         Op.addRegOrImmOperands(Inst, 1);
7448       } else {
7449         llvm_unreachable("unhandled operand type");
7450       }
7451     }
7452   } else {
7453     // No src modifiers
7454     for (unsigned E = Operands.size(); I != E; ++I) {
7455       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7456       if (Op.isMod()) {
7457         OptionalIdx[Op.getImmTy()] = I;
7458       } else {
7459         Op.addRegOrImmOperands(Inst, 1);
7460       }
7461     }
7462   }
7463 
7464   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7465     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7466   }
7467 
7468   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7469     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7470   }
7471 
7472   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7473   // it has src2 register operand that is tied to dst operand
7474   // we don't allow modifiers for this operand in assembler so src2_modifiers
7475   // should be 0.
7476   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7477       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7478       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7479       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7480       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7481       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7482       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7483       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7484       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7485       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7486       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7487     auto it = Inst.begin();
7488     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7489     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7490     ++it;
7491     // Copy the operand to ensure it's not invalidated when Inst grows.
7492     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7493   }
7494 }
7495 
7496 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7497   OptionalImmIndexMap OptionalIdx;
7498   cvtVOP3(Inst, Operands, OptionalIdx);
7499 }
7500 
7501 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
7502                                const OperandVector &Operands) {
7503   OptionalImmIndexMap OptIdx;
7504   const int Opc = Inst.getOpcode();
7505   const MCInstrDesc &Desc = MII.get(Opc);
7506 
7507   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7508 
7509   cvtVOP3(Inst, Operands, OptIdx);
7510 
7511   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7512     assert(!IsPacked);
7513     Inst.addOperand(Inst.getOperand(0));
7514   }
7515 
7516   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7517   // instruction, and then figure out where to actually put the modifiers
7518 
7519   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7520 
7521   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7522   if (OpSelHiIdx != -1) {
7523     int DefaultVal = IsPacked ? -1 : 0;
7524     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7525                           DefaultVal);
7526   }
7527 
7528   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7529   if (NegLoIdx != -1) {
7530     assert(IsPacked);
7531     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7532     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7533   }
7534 
7535   const int Ops[] = { AMDGPU::OpName::src0,
7536                       AMDGPU::OpName::src1,
7537                       AMDGPU::OpName::src2 };
7538   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7539                          AMDGPU::OpName::src1_modifiers,
7540                          AMDGPU::OpName::src2_modifiers };
7541 
7542   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7543 
7544   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7545   unsigned OpSelHi = 0;
7546   unsigned NegLo = 0;
7547   unsigned NegHi = 0;
7548 
7549   if (OpSelHiIdx != -1) {
7550     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7551   }
7552 
7553   if (NegLoIdx != -1) {
7554     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7555     NegLo = Inst.getOperand(NegLoIdx).getImm();
7556     NegHi = Inst.getOperand(NegHiIdx).getImm();
7557   }
7558 
7559   for (int J = 0; J < 3; ++J) {
7560     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7561     if (OpIdx == -1)
7562       break;
7563 
7564     uint32_t ModVal = 0;
7565 
7566     if ((OpSel & (1 << J)) != 0)
7567       ModVal |= SISrcMods::OP_SEL_0;
7568 
7569     if ((OpSelHi & (1 << J)) != 0)
7570       ModVal |= SISrcMods::OP_SEL_1;
7571 
7572     if ((NegLo & (1 << J)) != 0)
7573       ModVal |= SISrcMods::NEG;
7574 
7575     if ((NegHi & (1 << J)) != 0)
7576       ModVal |= SISrcMods::NEG_HI;
7577 
7578     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7579 
7580     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7581   }
7582 }
7583 
7584 //===----------------------------------------------------------------------===//
7585 // dpp
7586 //===----------------------------------------------------------------------===//
7587 
7588 bool AMDGPUOperand::isDPP8() const {
7589   return isImmTy(ImmTyDPP8);
7590 }
7591 
7592 bool AMDGPUOperand::isDPPCtrl() const {
7593   using namespace AMDGPU::DPP;
7594 
7595   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7596   if (result) {
7597     int64_t Imm = getImm();
7598     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7599            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7600            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7601            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7602            (Imm == DppCtrl::WAVE_SHL1) ||
7603            (Imm == DppCtrl::WAVE_ROL1) ||
7604            (Imm == DppCtrl::WAVE_SHR1) ||
7605            (Imm == DppCtrl::WAVE_ROR1) ||
7606            (Imm == DppCtrl::ROW_MIRROR) ||
7607            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7608            (Imm == DppCtrl::BCAST15) ||
7609            (Imm == DppCtrl::BCAST31) ||
7610            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7611            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7612   }
7613   return false;
7614 }
7615 
7616 //===----------------------------------------------------------------------===//
7617 // mAI
7618 //===----------------------------------------------------------------------===//
7619 
7620 bool AMDGPUOperand::isBLGP() const {
7621   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7622 }
7623 
7624 bool AMDGPUOperand::isCBSZ() const {
7625   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7626 }
7627 
7628 bool AMDGPUOperand::isABID() const {
7629   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7630 }
7631 
7632 bool AMDGPUOperand::isS16Imm() const {
7633   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7634 }
7635 
7636 bool AMDGPUOperand::isU16Imm() const {
7637   return isImm() && isUInt<16>(getImm());
7638 }
7639 
7640 //===----------------------------------------------------------------------===//
7641 // dim
7642 //===----------------------------------------------------------------------===//
7643 
7644 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7645   // We want to allow "dim:1D" etc.,
7646   // but the initial 1 is tokenized as an integer.
7647   std::string Token;
7648   if (isToken(AsmToken::Integer)) {
7649     SMLoc Loc = getToken().getEndLoc();
7650     Token = std::string(getTokenStr());
7651     lex();
7652     if (getLoc() != Loc)
7653       return false;
7654   }
7655 
7656   StringRef Suffix;
7657   if (!parseId(Suffix))
7658     return false;
7659   Token += Suffix;
7660 
7661   StringRef DimId = Token;
7662   if (DimId.startswith("SQ_RSRC_IMG_"))
7663     DimId = DimId.drop_front(12);
7664 
7665   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7666   if (!DimInfo)
7667     return false;
7668 
7669   Encoding = DimInfo->Encoding;
7670   return true;
7671 }
7672 
7673 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7674   if (!isGFX10Plus())
7675     return MatchOperand_NoMatch;
7676 
7677   SMLoc S = getLoc();
7678 
7679   if (!trySkipId("dim", AsmToken::Colon))
7680     return MatchOperand_NoMatch;
7681 
7682   unsigned Encoding;
7683   SMLoc Loc = getLoc();
7684   if (!parseDimId(Encoding)) {
7685     Error(Loc, "invalid dim value");
7686     return MatchOperand_ParseFail;
7687   }
7688 
7689   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7690                                               AMDGPUOperand::ImmTyDim));
7691   return MatchOperand_Success;
7692 }
7693 
7694 //===----------------------------------------------------------------------===//
7695 // dpp
7696 //===----------------------------------------------------------------------===//
7697 
7698 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7699   SMLoc S = getLoc();
7700 
7701   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7702     return MatchOperand_NoMatch;
7703 
7704   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7705 
7706   int64_t Sels[8];
7707 
7708   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7709     return MatchOperand_ParseFail;
7710 
7711   for (size_t i = 0; i < 8; ++i) {
7712     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7713       return MatchOperand_ParseFail;
7714 
7715     SMLoc Loc = getLoc();
7716     if (getParser().parseAbsoluteExpression(Sels[i]))
7717       return MatchOperand_ParseFail;
7718     if (0 > Sels[i] || 7 < Sels[i]) {
7719       Error(Loc, "expected a 3-bit value");
7720       return MatchOperand_ParseFail;
7721     }
7722   }
7723 
7724   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7725     return MatchOperand_ParseFail;
7726 
7727   unsigned DPP8 = 0;
7728   for (size_t i = 0; i < 8; ++i)
7729     DPP8 |= (Sels[i] << (i * 3));
7730 
7731   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7732   return MatchOperand_Success;
7733 }
7734 
7735 bool
7736 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7737                                     const OperandVector &Operands) {
7738   if (Ctrl == "row_newbcast")
7739       return isGFX90A();
7740 
7741   // DPP64 is supported for row_newbcast only.
7742   const MCRegisterInfo *MRI = getMRI();
7743   if (Operands.size() > 2 && Operands[1]->isReg() &&
7744       MRI->getSubReg(Operands[1]->getReg(), AMDGPU::sub1))
7745     return false;
7746 
7747   if (Ctrl == "row_share" ||
7748       Ctrl == "row_xmask")
7749     return isGFX10Plus();
7750 
7751   if (Ctrl == "wave_shl" ||
7752       Ctrl == "wave_shr" ||
7753       Ctrl == "wave_rol" ||
7754       Ctrl == "wave_ror" ||
7755       Ctrl == "row_bcast")
7756     return isVI() || isGFX9();
7757 
7758   return Ctrl == "row_mirror" ||
7759          Ctrl == "row_half_mirror" ||
7760          Ctrl == "quad_perm" ||
7761          Ctrl == "row_shl" ||
7762          Ctrl == "row_shr" ||
7763          Ctrl == "row_ror";
7764 }
7765 
7766 int64_t
7767 AMDGPUAsmParser::parseDPPCtrlPerm() {
7768   // quad_perm:[%d,%d,%d,%d]
7769 
7770   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7771     return -1;
7772 
7773   int64_t Val = 0;
7774   for (int i = 0; i < 4; ++i) {
7775     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7776       return -1;
7777 
7778     int64_t Temp;
7779     SMLoc Loc = getLoc();
7780     if (getParser().parseAbsoluteExpression(Temp))
7781       return -1;
7782     if (Temp < 0 || Temp > 3) {
7783       Error(Loc, "expected a 2-bit value");
7784       return -1;
7785     }
7786 
7787     Val += (Temp << i * 2);
7788   }
7789 
7790   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7791     return -1;
7792 
7793   return Val;
7794 }
7795 
7796 int64_t
7797 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7798   using namespace AMDGPU::DPP;
7799 
7800   // sel:%d
7801 
7802   int64_t Val;
7803   SMLoc Loc = getLoc();
7804 
7805   if (getParser().parseAbsoluteExpression(Val))
7806     return -1;
7807 
7808   struct DppCtrlCheck {
7809     int64_t Ctrl;
7810     int Lo;
7811     int Hi;
7812   };
7813 
7814   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7815     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7816     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7817     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7818     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7819     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7820     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7821     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7822     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7823     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7824     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7825     .Default({-1, 0, 0});
7826 
7827   bool Valid;
7828   if (Check.Ctrl == -1) {
7829     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7830     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7831   } else {
7832     Valid = Check.Lo <= Val && Val <= Check.Hi;
7833     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7834   }
7835 
7836   if (!Valid) {
7837     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7838     return -1;
7839   }
7840 
7841   return Val;
7842 }
7843 
7844 OperandMatchResultTy
7845 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7846   using namespace AMDGPU::DPP;
7847 
7848   if (!isToken(AsmToken::Identifier) ||
7849       !isSupportedDPPCtrl(getTokenStr(), Operands))
7850     return MatchOperand_NoMatch;
7851 
7852   SMLoc S = getLoc();
7853   int64_t Val = -1;
7854   StringRef Ctrl;
7855 
7856   parseId(Ctrl);
7857 
7858   if (Ctrl == "row_mirror") {
7859     Val = DppCtrl::ROW_MIRROR;
7860   } else if (Ctrl == "row_half_mirror") {
7861     Val = DppCtrl::ROW_HALF_MIRROR;
7862   } else {
7863     if (skipToken(AsmToken::Colon, "expected a colon")) {
7864       if (Ctrl == "quad_perm") {
7865         Val = parseDPPCtrlPerm();
7866       } else {
7867         Val = parseDPPCtrlSel(Ctrl);
7868       }
7869     }
7870   }
7871 
7872   if (Val == -1)
7873     return MatchOperand_ParseFail;
7874 
7875   Operands.push_back(
7876     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7877   return MatchOperand_Success;
7878 }
7879 
7880 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7881   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7882 }
7883 
7884 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7885   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7886 }
7887 
7888 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7889   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7890 }
7891 
7892 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7893   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7894 }
7895 
7896 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7897   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7898 }
7899 
7900 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7901   OptionalImmIndexMap OptionalIdx;
7902 
7903   unsigned I = 1;
7904   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7905   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7906     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7907   }
7908 
7909   int Fi = 0;
7910   for (unsigned E = Operands.size(); I != E; ++I) {
7911     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7912                                             MCOI::TIED_TO);
7913     if (TiedTo != -1) {
7914       assert((unsigned)TiedTo < Inst.getNumOperands());
7915       // handle tied old or src2 for MAC instructions
7916       Inst.addOperand(Inst.getOperand(TiedTo));
7917     }
7918     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7919     // Add the register arguments
7920     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7921       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7922       // Skip it.
7923       continue;
7924     }
7925 
7926     if (IsDPP8) {
7927       if (Op.isDPP8()) {
7928         Op.addImmOperands(Inst, 1);
7929       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7930         Op.addRegWithFPInputModsOperands(Inst, 2);
7931       } else if (Op.isFI()) {
7932         Fi = Op.getImm();
7933       } else if (Op.isReg()) {
7934         Op.addRegOperands(Inst, 1);
7935       } else {
7936         llvm_unreachable("Invalid operand type");
7937       }
7938     } else {
7939       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7940         Op.addRegWithFPInputModsOperands(Inst, 2);
7941       } else if (Op.isDPPCtrl()) {
7942         Op.addImmOperands(Inst, 1);
7943       } else if (Op.isImm()) {
7944         // Handle optional arguments
7945         OptionalIdx[Op.getImmTy()] = I;
7946       } else {
7947         llvm_unreachable("Invalid operand type");
7948       }
7949     }
7950   }
7951 
7952   if (IsDPP8) {
7953     using namespace llvm::AMDGPU::DPP;
7954     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7955   } else {
7956     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7957     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7958     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7959     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7960       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7961     }
7962   }
7963 }
7964 
7965 //===----------------------------------------------------------------------===//
7966 // sdwa
7967 //===----------------------------------------------------------------------===//
7968 
7969 OperandMatchResultTy
7970 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7971                               AMDGPUOperand::ImmTy Type) {
7972   using namespace llvm::AMDGPU::SDWA;
7973 
7974   SMLoc S = getLoc();
7975   StringRef Value;
7976   OperandMatchResultTy res;
7977 
7978   SMLoc StringLoc;
7979   res = parseStringWithPrefix(Prefix, Value, StringLoc);
7980   if (res != MatchOperand_Success) {
7981     return res;
7982   }
7983 
7984   int64_t Int;
7985   Int = StringSwitch<int64_t>(Value)
7986         .Case("BYTE_0", SdwaSel::BYTE_0)
7987         .Case("BYTE_1", SdwaSel::BYTE_1)
7988         .Case("BYTE_2", SdwaSel::BYTE_2)
7989         .Case("BYTE_3", SdwaSel::BYTE_3)
7990         .Case("WORD_0", SdwaSel::WORD_0)
7991         .Case("WORD_1", SdwaSel::WORD_1)
7992         .Case("DWORD", SdwaSel::DWORD)
7993         .Default(0xffffffff);
7994 
7995   if (Int == 0xffffffff) {
7996     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
7997     return MatchOperand_ParseFail;
7998   }
7999 
8000   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8001   return MatchOperand_Success;
8002 }
8003 
8004 OperandMatchResultTy
8005 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8006   using namespace llvm::AMDGPU::SDWA;
8007 
8008   SMLoc S = getLoc();
8009   StringRef Value;
8010   OperandMatchResultTy res;
8011 
8012   SMLoc StringLoc;
8013   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8014   if (res != MatchOperand_Success) {
8015     return res;
8016   }
8017 
8018   int64_t Int;
8019   Int = StringSwitch<int64_t>(Value)
8020         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8021         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8022         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8023         .Default(0xffffffff);
8024 
8025   if (Int == 0xffffffff) {
8026     Error(StringLoc, "invalid dst_unused value");
8027     return MatchOperand_ParseFail;
8028   }
8029 
8030   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8031   return MatchOperand_Success;
8032 }
8033 
8034 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8035   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8036 }
8037 
8038 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8039   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8040 }
8041 
8042 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8043   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8044 }
8045 
8046 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8047   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8048 }
8049 
8050 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8051   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8052 }
8053 
8054 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8055                               uint64_t BasicInstType,
8056                               bool SkipDstVcc,
8057                               bool SkipSrcVcc) {
8058   using namespace llvm::AMDGPU::SDWA;
8059 
8060   OptionalImmIndexMap OptionalIdx;
8061   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8062   bool SkippedVcc = false;
8063 
8064   unsigned I = 1;
8065   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8066   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8067     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8068   }
8069 
8070   for (unsigned E = Operands.size(); I != E; ++I) {
8071     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8072     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8073         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8074       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8075       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8076       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8077       // Skip VCC only if we didn't skip it on previous iteration.
8078       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8079       if (BasicInstType == SIInstrFlags::VOP2 &&
8080           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8081            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8082         SkippedVcc = true;
8083         continue;
8084       } else if (BasicInstType == SIInstrFlags::VOPC &&
8085                  Inst.getNumOperands() == 0) {
8086         SkippedVcc = true;
8087         continue;
8088       }
8089     }
8090     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8091       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8092     } else if (Op.isImm()) {
8093       // Handle optional arguments
8094       OptionalIdx[Op.getImmTy()] = I;
8095     } else {
8096       llvm_unreachable("Invalid operand type");
8097     }
8098     SkippedVcc = false;
8099   }
8100 
8101   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8102       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8103       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8104     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8105     switch (BasicInstType) {
8106     case SIInstrFlags::VOP1:
8107       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8108       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8109         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8110       }
8111       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8112       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8113       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8114       break;
8115 
8116     case SIInstrFlags::VOP2:
8117       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8118       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8119         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8120       }
8121       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8122       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8123       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8124       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8125       break;
8126 
8127     case SIInstrFlags::VOPC:
8128       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8129         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8130       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8131       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8132       break;
8133 
8134     default:
8135       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8136     }
8137   }
8138 
8139   // special case v_mac_{f16, f32}:
8140   // it has src2 register operand that is tied to dst operand
8141   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8142       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8143     auto it = Inst.begin();
8144     std::advance(
8145       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8146     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8147   }
8148 }
8149 
8150 //===----------------------------------------------------------------------===//
8151 // mAI
8152 //===----------------------------------------------------------------------===//
8153 
8154 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8155   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8156 }
8157 
8158 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8159   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8160 }
8161 
8162 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8163   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8164 }
8165 
8166 /// Force static initialization.
8167 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8168   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8169   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8170 }
8171 
8172 #define GET_REGISTER_MATCHER
8173 #define GET_MATCHER_IMPLEMENTATION
8174 #define GET_MNEMONIC_SPELL_CHECKER
8175 #define GET_MNEMONIC_CHECKER
8176 #include "AMDGPUGenAsmMatcher.inc"
8177 
8178 // This fuction should be defined after auto-generated include so that we have
8179 // MatchClassKind enum defined
8180 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8181                                                      unsigned Kind) {
8182   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8183   // But MatchInstructionImpl() expects to meet token and fails to validate
8184   // operand. This method checks if we are given immediate operand but expect to
8185   // get corresponding token.
8186   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8187   switch (Kind) {
8188   case MCK_addr64:
8189     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8190   case MCK_gds:
8191     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8192   case MCK_lds:
8193     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8194   case MCK_idxen:
8195     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8196   case MCK_offen:
8197     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8198   case MCK_SSrcB32:
8199     // When operands have expression values, they will return true for isToken,
8200     // because it is not possible to distinguish between a token and an
8201     // expression at parse time. MatchInstructionImpl() will always try to
8202     // match an operand as a token, when isToken returns true, and when the
8203     // name of the expression is not a valid token, the match will fail,
8204     // so we need to handle it here.
8205     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8206   case MCK_SSrcF32:
8207     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8208   case MCK_SoppBrTarget:
8209     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8210   case MCK_VReg32OrOff:
8211     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8212   case MCK_InterpSlot:
8213     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8214   case MCK_Attr:
8215     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8216   case MCK_AttrChan:
8217     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8218   case MCK_ImmSMEMOffset:
8219     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8220   case MCK_SReg_64:
8221   case MCK_SReg_64_XEXEC:
8222     // Null is defined as a 32-bit register but
8223     // it should also be enabled with 64-bit operands.
8224     // The following code enables it for SReg_64 operands
8225     // used as source and destination. Remaining source
8226     // operands are handled in isInlinableImm.
8227     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8228   default:
8229     return Match_InvalidOperand;
8230   }
8231 }
8232 
8233 //===----------------------------------------------------------------------===//
8234 // endpgm
8235 //===----------------------------------------------------------------------===//
8236 
8237 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8238   SMLoc S = getLoc();
8239   int64_t Imm = 0;
8240 
8241   if (!parseExpr(Imm)) {
8242     // The operand is optional, if not present default to 0
8243     Imm = 0;
8244   }
8245 
8246   if (!isUInt<16>(Imm)) {
8247     Error(S, "expected a 16-bit value");
8248     return MatchOperand_ParseFail;
8249   }
8250 
8251   Operands.push_back(
8252       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8253   return MatchOperand_Success;
8254 }
8255 
8256 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8257