1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/TargetParser.h"
40 
41 using namespace llvm;
42 using namespace llvm::AMDGPU;
43 using namespace llvm::amdhsa;
44 
45 namespace {
46 
47 class AMDGPUAsmParser;
48 
49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
50 
51 //===----------------------------------------------------------------------===//
52 // Operand
53 //===----------------------------------------------------------------------===//
54 
55 class AMDGPUOperand : public MCParsedAsmOperand {
56   enum KindTy {
57     Token,
58     Immediate,
59     Register,
60     Expression
61   } Kind;
62 
63   SMLoc StartLoc, EndLoc;
64   const AMDGPUAsmParser *AsmParser;
65 
66 public:
67   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
68       : Kind(Kind_), AsmParser(AsmParser_) {}
69 
70   using Ptr = std::unique_ptr<AMDGPUOperand>;
71 
72   struct Modifiers {
73     bool Abs = false;
74     bool Neg = false;
75     bool Sext = false;
76 
77     bool hasFPModifiers() const { return Abs || Neg; }
78     bool hasIntModifiers() const { return Sext; }
79     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
80 
81     int64_t getFPModifiersOperand() const {
82       int64_t Operand = 0;
83       Operand |= Abs ? SISrcMods::ABS : 0u;
84       Operand |= Neg ? SISrcMods::NEG : 0u;
85       return Operand;
86     }
87 
88     int64_t getIntModifiersOperand() const {
89       int64_t Operand = 0;
90       Operand |= Sext ? SISrcMods::SEXT : 0u;
91       return Operand;
92     }
93 
94     int64_t getModifiersOperand() const {
95       assert(!(hasFPModifiers() && hasIntModifiers())
96            && "fp and int modifiers should not be used simultaneously");
97       if (hasFPModifiers()) {
98         return getFPModifiersOperand();
99       } else if (hasIntModifiers()) {
100         return getIntModifiersOperand();
101       } else {
102         return 0;
103       }
104     }
105 
106     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
107   };
108 
109   enum ImmTy {
110     ImmTyNone,
111     ImmTyGDS,
112     ImmTyLDS,
113     ImmTyOffen,
114     ImmTyIdxen,
115     ImmTyAddr64,
116     ImmTyOffset,
117     ImmTyInstOffset,
118     ImmTyOffset0,
119     ImmTyOffset1,
120     ImmTyCPol,
121     ImmTySWZ,
122     ImmTyTFE,
123     ImmTyD16,
124     ImmTyClampSI,
125     ImmTyOModSI,
126     ImmTyDPP8,
127     ImmTyDppCtrl,
128     ImmTyDppRowMask,
129     ImmTyDppBankMask,
130     ImmTyDppBoundCtrl,
131     ImmTyDppFi,
132     ImmTySdwaDstSel,
133     ImmTySdwaSrc0Sel,
134     ImmTySdwaSrc1Sel,
135     ImmTySdwaDstUnused,
136     ImmTyDMask,
137     ImmTyDim,
138     ImmTyUNorm,
139     ImmTyDA,
140     ImmTyR128A16,
141     ImmTyA16,
142     ImmTyLWE,
143     ImmTyExpTgt,
144     ImmTyExpCompr,
145     ImmTyExpVM,
146     ImmTyFORMAT,
147     ImmTyHwreg,
148     ImmTyOff,
149     ImmTySendMsg,
150     ImmTyInterpSlot,
151     ImmTyInterpAttr,
152     ImmTyAttrChan,
153     ImmTyOpSel,
154     ImmTyOpSelHi,
155     ImmTyNegLo,
156     ImmTyNegHi,
157     ImmTySwizzle,
158     ImmTyGprIdxMode,
159     ImmTyHigh,
160     ImmTyBLGP,
161     ImmTyCBSZ,
162     ImmTyABID,
163     ImmTyEndpgm,
164   };
165 
166   enum ImmKindTy {
167     ImmKindTyNone,
168     ImmKindTyLiteral,
169     ImmKindTyConst,
170   };
171 
172 private:
173   struct TokOp {
174     const char *Data;
175     unsigned Length;
176   };
177 
178   struct ImmOp {
179     int64_t Val;
180     ImmTy Type;
181     bool IsFPImm;
182     mutable ImmKindTy Kind;
183     Modifiers Mods;
184   };
185 
186   struct RegOp {
187     unsigned RegNo;
188     Modifiers Mods;
189   };
190 
191   union {
192     TokOp Tok;
193     ImmOp Imm;
194     RegOp Reg;
195     const MCExpr *Expr;
196   };
197 
198 public:
199   bool isToken() const override {
200     if (Kind == Token)
201       return true;
202 
203     // When parsing operands, we can't always tell if something was meant to be
204     // a token, like 'gds', or an expression that references a global variable.
205     // In this case, we assume the string is an expression, and if we need to
206     // interpret is a token, then we treat the symbol name as the token.
207     return isSymbolRefExpr();
208   }
209 
210   bool isSymbolRefExpr() const {
211     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
212   }
213 
214   bool isImm() const override {
215     return Kind == Immediate;
216   }
217 
218   void setImmKindNone() const {
219     assert(isImm());
220     Imm.Kind = ImmKindTyNone;
221   }
222 
223   void setImmKindLiteral() const {
224     assert(isImm());
225     Imm.Kind = ImmKindTyLiteral;
226   }
227 
228   void setImmKindConst() const {
229     assert(isImm());
230     Imm.Kind = ImmKindTyConst;
231   }
232 
233   bool IsImmKindLiteral() const {
234     return isImm() && Imm.Kind == ImmKindTyLiteral;
235   }
236 
237   bool isImmKindConst() const {
238     return isImm() && Imm.Kind == ImmKindTyConst;
239   }
240 
241   bool isInlinableImm(MVT type) const;
242   bool isLiteralImm(MVT type) const;
243 
244   bool isRegKind() const {
245     return Kind == Register;
246   }
247 
248   bool isReg() const override {
249     return isRegKind() && !hasModifiers();
250   }
251 
252   bool isRegOrInline(unsigned RCID, MVT type) const {
253     return isRegClass(RCID) || isInlinableImm(type);
254   }
255 
256   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
257     return isRegOrInline(RCID, type) || isLiteralImm(type);
258   }
259 
260   bool isRegOrImmWithInt16InputMods() const {
261     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
262   }
263 
264   bool isRegOrImmWithInt32InputMods() const {
265     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
266   }
267 
268   bool isRegOrImmWithInt64InputMods() const {
269     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
270   }
271 
272   bool isRegOrImmWithFP16InputMods() const {
273     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
274   }
275 
276   bool isRegOrImmWithFP32InputMods() const {
277     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
278   }
279 
280   bool isRegOrImmWithFP64InputMods() const {
281     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
282   }
283 
284   bool isVReg() const {
285     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
286            isRegClass(AMDGPU::VReg_64RegClassID) ||
287            isRegClass(AMDGPU::VReg_96RegClassID) ||
288            isRegClass(AMDGPU::VReg_128RegClassID) ||
289            isRegClass(AMDGPU::VReg_160RegClassID) ||
290            isRegClass(AMDGPU::VReg_192RegClassID) ||
291            isRegClass(AMDGPU::VReg_256RegClassID) ||
292            isRegClass(AMDGPU::VReg_512RegClassID) ||
293            isRegClass(AMDGPU::VReg_1024RegClassID);
294   }
295 
296   bool isVReg32() const {
297     return isRegClass(AMDGPU::VGPR_32RegClassID);
298   }
299 
300   bool isVReg32OrOff() const {
301     return isOff() || isVReg32();
302   }
303 
304   bool isNull() const {
305     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
306   }
307 
308   bool isVRegWithInputMods() const;
309 
310   bool isSDWAOperand(MVT type) const;
311   bool isSDWAFP16Operand() const;
312   bool isSDWAFP32Operand() const;
313   bool isSDWAInt16Operand() const;
314   bool isSDWAInt32Operand() const;
315 
316   bool isImmTy(ImmTy ImmT) const {
317     return isImm() && Imm.Type == ImmT;
318   }
319 
320   bool isImmModifier() const {
321     return isImm() && Imm.Type != ImmTyNone;
322   }
323 
324   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
325   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
326   bool isDMask() const { return isImmTy(ImmTyDMask); }
327   bool isDim() const { return isImmTy(ImmTyDim); }
328   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
329   bool isDA() const { return isImmTy(ImmTyDA); }
330   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
331   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
332   bool isLWE() const { return isImmTy(ImmTyLWE); }
333   bool isOff() const { return isImmTy(ImmTyOff); }
334   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
335   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
336   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
337   bool isOffen() const { return isImmTy(ImmTyOffen); }
338   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
339   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
340   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
341   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
342   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
343 
344   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
345   bool isGDS() const { return isImmTy(ImmTyGDS); }
346   bool isLDS() const { return isImmTy(ImmTyLDS); }
347   bool isCPol() const { return isImmTy(ImmTyCPol); }
348   bool isSWZ() const { return isImmTy(ImmTySWZ); }
349   bool isTFE() const { return isImmTy(ImmTyTFE); }
350   bool isD16() const { return isImmTy(ImmTyD16); }
351   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
352   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
353   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
354   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
355   bool isFI() const { return isImmTy(ImmTyDppFi); }
356   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
357   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
358   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
359   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
360   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
361   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
362   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
363   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
364   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
365   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
366   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
367   bool isHigh() const { return isImmTy(ImmTyHigh); }
368 
369   bool isMod() const {
370     return isClampSI() || isOModSI();
371   }
372 
373   bool isRegOrImm() const {
374     return isReg() || isImm();
375   }
376 
377   bool isRegClass(unsigned RCID) const;
378 
379   bool isInlineValue() const;
380 
381   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
382     return isRegOrInline(RCID, type) && !hasModifiers();
383   }
384 
385   bool isSCSrcB16() const {
386     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
387   }
388 
389   bool isSCSrcV2B16() const {
390     return isSCSrcB16();
391   }
392 
393   bool isSCSrcB32() const {
394     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
395   }
396 
397   bool isSCSrcB64() const {
398     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
399   }
400 
401   bool isBoolReg() const;
402 
403   bool isSCSrcF16() const {
404     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
405   }
406 
407   bool isSCSrcV2F16() const {
408     return isSCSrcF16();
409   }
410 
411   bool isSCSrcF32() const {
412     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
413   }
414 
415   bool isSCSrcF64() const {
416     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
417   }
418 
419   bool isSSrcB32() const {
420     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
421   }
422 
423   bool isSSrcB16() const {
424     return isSCSrcB16() || isLiteralImm(MVT::i16);
425   }
426 
427   bool isSSrcV2B16() const {
428     llvm_unreachable("cannot happen");
429     return isSSrcB16();
430   }
431 
432   bool isSSrcB64() const {
433     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
434     // See isVSrc64().
435     return isSCSrcB64() || isLiteralImm(MVT::i64);
436   }
437 
438   bool isSSrcF32() const {
439     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
440   }
441 
442   bool isSSrcF64() const {
443     return isSCSrcB64() || isLiteralImm(MVT::f64);
444   }
445 
446   bool isSSrcF16() const {
447     return isSCSrcB16() || isLiteralImm(MVT::f16);
448   }
449 
450   bool isSSrcV2F16() const {
451     llvm_unreachable("cannot happen");
452     return isSSrcF16();
453   }
454 
455   bool isSSrcV2FP32() const {
456     llvm_unreachable("cannot happen");
457     return isSSrcF32();
458   }
459 
460   bool isSCSrcV2FP32() const {
461     llvm_unreachable("cannot happen");
462     return isSCSrcF32();
463   }
464 
465   bool isSSrcV2INT32() const {
466     llvm_unreachable("cannot happen");
467     return isSSrcB32();
468   }
469 
470   bool isSCSrcV2INT32() const {
471     llvm_unreachable("cannot happen");
472     return isSCSrcB32();
473   }
474 
475   bool isSSrcOrLdsB32() const {
476     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
477            isLiteralImm(MVT::i32) || isExpr();
478   }
479 
480   bool isVCSrcB32() const {
481     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
482   }
483 
484   bool isVCSrcB64() const {
485     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
486   }
487 
488   bool isVCSrcB16() const {
489     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
490   }
491 
492   bool isVCSrcV2B16() const {
493     return isVCSrcB16();
494   }
495 
496   bool isVCSrcF32() const {
497     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
498   }
499 
500   bool isVCSrcF64() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
502   }
503 
504   bool isVCSrcF16() const {
505     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
506   }
507 
508   bool isVCSrcV2F16() const {
509     return isVCSrcF16();
510   }
511 
512   bool isVSrcB32() const {
513     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
514   }
515 
516   bool isVSrcB64() const {
517     return isVCSrcF64() || isLiteralImm(MVT::i64);
518   }
519 
520   bool isVSrcB16() const {
521     return isVCSrcB16() || isLiteralImm(MVT::i16);
522   }
523 
524   bool isVSrcV2B16() const {
525     return isVSrcB16() || isLiteralImm(MVT::v2i16);
526   }
527 
528   bool isVCSrcV2FP32() const {
529     return isVCSrcF64();
530   }
531 
532   bool isVSrcV2FP32() const {
533     return isVSrcF64() || isLiteralImm(MVT::v2f32);
534   }
535 
536   bool isVCSrcV2INT32() const {
537     return isVCSrcB64();
538   }
539 
540   bool isVSrcV2INT32() const {
541     return isVSrcB64() || isLiteralImm(MVT::v2i32);
542   }
543 
544   bool isVSrcF32() const {
545     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
546   }
547 
548   bool isVSrcF64() const {
549     return isVCSrcF64() || isLiteralImm(MVT::f64);
550   }
551 
552   bool isVSrcF16() const {
553     return isVCSrcF16() || isLiteralImm(MVT::f16);
554   }
555 
556   bool isVSrcV2F16() const {
557     return isVSrcF16() || isLiteralImm(MVT::v2f16);
558   }
559 
560   bool isVISrcB32() const {
561     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
562   }
563 
564   bool isVISrcB16() const {
565     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
566   }
567 
568   bool isVISrcV2B16() const {
569     return isVISrcB16();
570   }
571 
572   bool isVISrcF32() const {
573     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
574   }
575 
576   bool isVISrcF16() const {
577     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
578   }
579 
580   bool isVISrcV2F16() const {
581     return isVISrcF16() || isVISrcB32();
582   }
583 
584   bool isVISrc_64B64() const {
585     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
586   }
587 
588   bool isVISrc_64F64() const {
589     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
590   }
591 
592   bool isVISrc_64V2FP32() const {
593     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
594   }
595 
596   bool isVISrc_64V2INT32() const {
597     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
598   }
599 
600   bool isVISrc_256B64() const {
601     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
602   }
603 
604   bool isVISrc_256F64() const {
605     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
606   }
607 
608   bool isVISrc_128B16() const {
609     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
610   }
611 
612   bool isVISrc_128V2B16() const {
613     return isVISrc_128B16();
614   }
615 
616   bool isVISrc_128B32() const {
617     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
618   }
619 
620   bool isVISrc_128F32() const {
621     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
622   }
623 
624   bool isVISrc_256V2FP32() const {
625     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
626   }
627 
628   bool isVISrc_256V2INT32() const {
629     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
630   }
631 
632   bool isVISrc_512B32() const {
633     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
634   }
635 
636   bool isVISrc_512B16() const {
637     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
638   }
639 
640   bool isVISrc_512V2B16() const {
641     return isVISrc_512B16();
642   }
643 
644   bool isVISrc_512F32() const {
645     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
646   }
647 
648   bool isVISrc_512F16() const {
649     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
650   }
651 
652   bool isVISrc_512V2F16() const {
653     return isVISrc_512F16() || isVISrc_512B32();
654   }
655 
656   bool isVISrc_1024B32() const {
657     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
658   }
659 
660   bool isVISrc_1024B16() const {
661     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
662   }
663 
664   bool isVISrc_1024V2B16() const {
665     return isVISrc_1024B16();
666   }
667 
668   bool isVISrc_1024F32() const {
669     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
670   }
671 
672   bool isVISrc_1024F16() const {
673     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
674   }
675 
676   bool isVISrc_1024V2F16() const {
677     return isVISrc_1024F16() || isVISrc_1024B32();
678   }
679 
680   bool isAISrcB32() const {
681     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
682   }
683 
684   bool isAISrcB16() const {
685     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
686   }
687 
688   bool isAISrcV2B16() const {
689     return isAISrcB16();
690   }
691 
692   bool isAISrcF32() const {
693     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
694   }
695 
696   bool isAISrcF16() const {
697     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
698   }
699 
700   bool isAISrcV2F16() const {
701     return isAISrcF16() || isAISrcB32();
702   }
703 
704   bool isAISrc_64B64() const {
705     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
706   }
707 
708   bool isAISrc_64F64() const {
709     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
710   }
711 
712   bool isAISrc_128B32() const {
713     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
714   }
715 
716   bool isAISrc_128B16() const {
717     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
718   }
719 
720   bool isAISrc_128V2B16() const {
721     return isAISrc_128B16();
722   }
723 
724   bool isAISrc_128F32() const {
725     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
726   }
727 
728   bool isAISrc_128F16() const {
729     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
730   }
731 
732   bool isAISrc_128V2F16() const {
733     return isAISrc_128F16() || isAISrc_128B32();
734   }
735 
736   bool isVISrc_128F16() const {
737     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
738   }
739 
740   bool isVISrc_128V2F16() const {
741     return isVISrc_128F16() || isVISrc_128B32();
742   }
743 
744   bool isAISrc_256B64() const {
745     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
746   }
747 
748   bool isAISrc_256F64() const {
749     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
750   }
751 
752   bool isAISrc_512B32() const {
753     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
754   }
755 
756   bool isAISrc_512B16() const {
757     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
758   }
759 
760   bool isAISrc_512V2B16() const {
761     return isAISrc_512B16();
762   }
763 
764   bool isAISrc_512F32() const {
765     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
766   }
767 
768   bool isAISrc_512F16() const {
769     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
770   }
771 
772   bool isAISrc_512V2F16() const {
773     return isAISrc_512F16() || isAISrc_512B32();
774   }
775 
776   bool isAISrc_1024B32() const {
777     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
778   }
779 
780   bool isAISrc_1024B16() const {
781     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
782   }
783 
784   bool isAISrc_1024V2B16() const {
785     return isAISrc_1024B16();
786   }
787 
788   bool isAISrc_1024F32() const {
789     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
790   }
791 
792   bool isAISrc_1024F16() const {
793     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
794   }
795 
796   bool isAISrc_1024V2F16() const {
797     return isAISrc_1024F16() || isAISrc_1024B32();
798   }
799 
800   bool isKImmFP32() const {
801     return isLiteralImm(MVT::f32);
802   }
803 
804   bool isKImmFP16() const {
805     return isLiteralImm(MVT::f16);
806   }
807 
808   bool isMem() const override {
809     return false;
810   }
811 
812   bool isExpr() const {
813     return Kind == Expression;
814   }
815 
816   bool isSoppBrTarget() const {
817     return isExpr() || isImm();
818   }
819 
820   bool isSWaitCnt() const;
821   bool isDepCtr() const;
822   bool isSDelayAlu() const;
823   bool isHwreg() const;
824   bool isSendMsg() const;
825   bool isSwizzle() const;
826   bool isSMRDOffset8() const;
827   bool isSMEMOffset() const;
828   bool isSMRDLiteralOffset() const;
829   bool isDPP8() const;
830   bool isDPPCtrl() const;
831   bool isBLGP() const;
832   bool isCBSZ() const;
833   bool isABID() const;
834   bool isGPRIdxMode() const;
835   bool isS16Imm() const;
836   bool isU16Imm() const;
837   bool isEndpgm() const;
838 
839   StringRef getExpressionAsToken() const {
840     assert(isExpr());
841     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
842     return S->getSymbol().getName();
843   }
844 
845   StringRef getToken() const {
846     assert(isToken());
847 
848     if (Kind == Expression)
849       return getExpressionAsToken();
850 
851     return StringRef(Tok.Data, Tok.Length);
852   }
853 
854   int64_t getImm() const {
855     assert(isImm());
856     return Imm.Val;
857   }
858 
859   void setImm(int64_t Val) {
860     assert(isImm());
861     Imm.Val = Val;
862   }
863 
864   ImmTy getImmTy() const {
865     assert(isImm());
866     return Imm.Type;
867   }
868 
869   unsigned getReg() const override {
870     assert(isRegKind());
871     return Reg.RegNo;
872   }
873 
874   SMLoc getStartLoc() const override {
875     return StartLoc;
876   }
877 
878   SMLoc getEndLoc() const override {
879     return EndLoc;
880   }
881 
882   SMRange getLocRange() const {
883     return SMRange(StartLoc, EndLoc);
884   }
885 
886   Modifiers getModifiers() const {
887     assert(isRegKind() || isImmTy(ImmTyNone));
888     return isRegKind() ? Reg.Mods : Imm.Mods;
889   }
890 
891   void setModifiers(Modifiers Mods) {
892     assert(isRegKind() || isImmTy(ImmTyNone));
893     if (isRegKind())
894       Reg.Mods = Mods;
895     else
896       Imm.Mods = Mods;
897   }
898 
899   bool hasModifiers() const {
900     return getModifiers().hasModifiers();
901   }
902 
903   bool hasFPModifiers() const {
904     return getModifiers().hasFPModifiers();
905   }
906 
907   bool hasIntModifiers() const {
908     return getModifiers().hasIntModifiers();
909   }
910 
911   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
912 
913   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
914 
915   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
916 
917   template <unsigned Bitwidth>
918   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
919 
920   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
921     addKImmFPOperands<16>(Inst, N);
922   }
923 
924   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
925     addKImmFPOperands<32>(Inst, N);
926   }
927 
928   void addRegOperands(MCInst &Inst, unsigned N) const;
929 
930   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
931     addRegOperands(Inst, N);
932   }
933 
934   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
935     if (isRegKind())
936       addRegOperands(Inst, N);
937     else if (isExpr())
938       Inst.addOperand(MCOperand::createExpr(Expr));
939     else
940       addImmOperands(Inst, N);
941   }
942 
943   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
944     Modifiers Mods = getModifiers();
945     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
946     if (isRegKind()) {
947       addRegOperands(Inst, N);
948     } else {
949       addImmOperands(Inst, N, false);
950     }
951   }
952 
953   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
954     assert(!hasIntModifiers());
955     addRegOrImmWithInputModsOperands(Inst, N);
956   }
957 
958   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
959     assert(!hasFPModifiers());
960     addRegOrImmWithInputModsOperands(Inst, N);
961   }
962 
963   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
964     Modifiers Mods = getModifiers();
965     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
966     assert(isRegKind());
967     addRegOperands(Inst, N);
968   }
969 
970   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
971     assert(!hasIntModifiers());
972     addRegWithInputModsOperands(Inst, N);
973   }
974 
975   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
976     assert(!hasFPModifiers());
977     addRegWithInputModsOperands(Inst, N);
978   }
979 
980   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
981     if (isImm())
982       addImmOperands(Inst, N);
983     else {
984       assert(isExpr());
985       Inst.addOperand(MCOperand::createExpr(Expr));
986     }
987   }
988 
989   static void printImmTy(raw_ostream& OS, ImmTy Type) {
990     switch (Type) {
991     case ImmTyNone: OS << "None"; break;
992     case ImmTyGDS: OS << "GDS"; break;
993     case ImmTyLDS: OS << "LDS"; break;
994     case ImmTyOffen: OS << "Offen"; break;
995     case ImmTyIdxen: OS << "Idxen"; break;
996     case ImmTyAddr64: OS << "Addr64"; break;
997     case ImmTyOffset: OS << "Offset"; break;
998     case ImmTyInstOffset: OS << "InstOffset"; break;
999     case ImmTyOffset0: OS << "Offset0"; break;
1000     case ImmTyOffset1: OS << "Offset1"; break;
1001     case ImmTyCPol: OS << "CPol"; break;
1002     case ImmTySWZ: OS << "SWZ"; break;
1003     case ImmTyTFE: OS << "TFE"; break;
1004     case ImmTyD16: OS << "D16"; break;
1005     case ImmTyFORMAT: OS << "FORMAT"; break;
1006     case ImmTyClampSI: OS << "ClampSI"; break;
1007     case ImmTyOModSI: OS << "OModSI"; break;
1008     case ImmTyDPP8: OS << "DPP8"; break;
1009     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1010     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1011     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1012     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1013     case ImmTyDppFi: OS << "FI"; break;
1014     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1015     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1016     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1017     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1018     case ImmTyDMask: OS << "DMask"; break;
1019     case ImmTyDim: OS << "Dim"; break;
1020     case ImmTyUNorm: OS << "UNorm"; break;
1021     case ImmTyDA: OS << "DA"; break;
1022     case ImmTyR128A16: OS << "R128A16"; break;
1023     case ImmTyA16: OS << "A16"; break;
1024     case ImmTyLWE: OS << "LWE"; break;
1025     case ImmTyOff: OS << "Off"; break;
1026     case ImmTyExpTgt: OS << "ExpTgt"; break;
1027     case ImmTyExpCompr: OS << "ExpCompr"; break;
1028     case ImmTyExpVM: OS << "ExpVM"; break;
1029     case ImmTyHwreg: OS << "Hwreg"; break;
1030     case ImmTySendMsg: OS << "SendMsg"; break;
1031     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1032     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1033     case ImmTyAttrChan: OS << "AttrChan"; break;
1034     case ImmTyOpSel: OS << "OpSel"; break;
1035     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1036     case ImmTyNegLo: OS << "NegLo"; break;
1037     case ImmTyNegHi: OS << "NegHi"; break;
1038     case ImmTySwizzle: OS << "Swizzle"; break;
1039     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1040     case ImmTyHigh: OS << "High"; break;
1041     case ImmTyBLGP: OS << "BLGP"; break;
1042     case ImmTyCBSZ: OS << "CBSZ"; break;
1043     case ImmTyABID: OS << "ABID"; break;
1044     case ImmTyEndpgm: OS << "Endpgm"; break;
1045     }
1046   }
1047 
1048   void print(raw_ostream &OS) const override {
1049     switch (Kind) {
1050     case Register:
1051       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1052       break;
1053     case Immediate:
1054       OS << '<' << getImm();
1055       if (getImmTy() != ImmTyNone) {
1056         OS << " type: "; printImmTy(OS, getImmTy());
1057       }
1058       OS << " mods: " << Imm.Mods << '>';
1059       break;
1060     case Token:
1061       OS << '\'' << getToken() << '\'';
1062       break;
1063     case Expression:
1064       OS << "<expr " << *Expr << '>';
1065       break;
1066     }
1067   }
1068 
1069   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1070                                       int64_t Val, SMLoc Loc,
1071                                       ImmTy Type = ImmTyNone,
1072                                       bool IsFPImm = false) {
1073     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1074     Op->Imm.Val = Val;
1075     Op->Imm.IsFPImm = IsFPImm;
1076     Op->Imm.Kind = ImmKindTyNone;
1077     Op->Imm.Type = Type;
1078     Op->Imm.Mods = Modifiers();
1079     Op->StartLoc = Loc;
1080     Op->EndLoc = Loc;
1081     return Op;
1082   }
1083 
1084   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1085                                         StringRef Str, SMLoc Loc,
1086                                         bool HasExplicitEncodingSize = true) {
1087     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1088     Res->Tok.Data = Str.data();
1089     Res->Tok.Length = Str.size();
1090     Res->StartLoc = Loc;
1091     Res->EndLoc = Loc;
1092     return Res;
1093   }
1094 
1095   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1096                                       unsigned RegNo, SMLoc S,
1097                                       SMLoc E) {
1098     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1099     Op->Reg.RegNo = RegNo;
1100     Op->Reg.Mods = Modifiers();
1101     Op->StartLoc = S;
1102     Op->EndLoc = E;
1103     return Op;
1104   }
1105 
1106   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1107                                        const class MCExpr *Expr, SMLoc S) {
1108     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1109     Op->Expr = Expr;
1110     Op->StartLoc = S;
1111     Op->EndLoc = S;
1112     return Op;
1113   }
1114 };
1115 
1116 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1117   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1118   return OS;
1119 }
1120 
1121 //===----------------------------------------------------------------------===//
1122 // AsmParser
1123 //===----------------------------------------------------------------------===//
1124 
1125 // Holds info related to the current kernel, e.g. count of SGPRs used.
1126 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1127 // .amdgpu_hsa_kernel or at EOF.
1128 class KernelScopeInfo {
1129   int SgprIndexUnusedMin = -1;
1130   int VgprIndexUnusedMin = -1;
1131   int AgprIndexUnusedMin = -1;
1132   MCContext *Ctx = nullptr;
1133   MCSubtargetInfo const *MSTI = nullptr;
1134 
1135   void usesSgprAt(int i) {
1136     if (i >= SgprIndexUnusedMin) {
1137       SgprIndexUnusedMin = ++i;
1138       if (Ctx) {
1139         MCSymbol* const Sym =
1140           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1141         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1142       }
1143     }
1144   }
1145 
1146   void usesVgprAt(int i) {
1147     if (i >= VgprIndexUnusedMin) {
1148       VgprIndexUnusedMin = ++i;
1149       if (Ctx) {
1150         MCSymbol* const Sym =
1151           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1152         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1153                                          VgprIndexUnusedMin);
1154         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1155       }
1156     }
1157   }
1158 
1159   void usesAgprAt(int i) {
1160     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1161     if (!hasMAIInsts(*MSTI))
1162       return;
1163 
1164     if (i >= AgprIndexUnusedMin) {
1165       AgprIndexUnusedMin = ++i;
1166       if (Ctx) {
1167         MCSymbol* const Sym =
1168           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1169         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1170 
1171         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1172         MCSymbol* const vSym =
1173           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1174         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1175                                          VgprIndexUnusedMin);
1176         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1177       }
1178     }
1179   }
1180 
1181 public:
1182   KernelScopeInfo() = default;
1183 
1184   void initialize(MCContext &Context) {
1185     Ctx = &Context;
1186     MSTI = Ctx->getSubtargetInfo();
1187 
1188     usesSgprAt(SgprIndexUnusedMin = -1);
1189     usesVgprAt(VgprIndexUnusedMin = -1);
1190     if (hasMAIInsts(*MSTI)) {
1191       usesAgprAt(AgprIndexUnusedMin = -1);
1192     }
1193   }
1194 
1195   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1196                     unsigned RegWidth) {
1197     switch (RegKind) {
1198     case IS_SGPR:
1199       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1200       break;
1201     case IS_AGPR:
1202       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1203       break;
1204     case IS_VGPR:
1205       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1206       break;
1207     default:
1208       break;
1209     }
1210   }
1211 };
1212 
1213 class AMDGPUAsmParser : public MCTargetAsmParser {
1214   MCAsmParser &Parser;
1215 
1216   // Number of extra operands parsed after the first optional operand.
1217   // This may be necessary to skip hardcoded mandatory operands.
1218   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1219 
1220   unsigned ForcedEncodingSize = 0;
1221   bool ForcedDPP = false;
1222   bool ForcedSDWA = false;
1223   KernelScopeInfo KernelScope;
1224   unsigned CPolSeen;
1225 
1226   /// @name Auto-generated Match Functions
1227   /// {
1228 
1229 #define GET_ASSEMBLER_HEADER
1230 #include "AMDGPUGenAsmMatcher.inc"
1231 
1232   /// }
1233 
1234 private:
1235   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1236   bool OutOfRangeError(SMRange Range);
1237   /// Calculate VGPR/SGPR blocks required for given target, reserved
1238   /// registers, and user-specified NextFreeXGPR values.
1239   ///
1240   /// \param Features [in] Target features, used for bug corrections.
1241   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1242   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1243   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1244   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1245   /// descriptor field, if valid.
1246   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1247   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1248   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1249   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1250   /// \param VGPRBlocks [out] Result VGPR block count.
1251   /// \param SGPRBlocks [out] Result SGPR block count.
1252   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1253                           bool FlatScrUsed, bool XNACKUsed,
1254                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1255                           SMRange VGPRRange, unsigned NextFreeSGPR,
1256                           SMRange SGPRRange, unsigned &VGPRBlocks,
1257                           unsigned &SGPRBlocks);
1258   bool ParseDirectiveAMDGCNTarget();
1259   bool ParseDirectiveAMDHSAKernel();
1260   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1261   bool ParseDirectiveHSACodeObjectVersion();
1262   bool ParseDirectiveHSACodeObjectISA();
1263   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1264   bool ParseDirectiveAMDKernelCodeT();
1265   // TODO: Possibly make subtargetHasRegister const.
1266   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1267   bool ParseDirectiveAMDGPUHsaKernel();
1268 
1269   bool ParseDirectiveISAVersion();
1270   bool ParseDirectiveHSAMetadata();
1271   bool ParseDirectivePALMetadataBegin();
1272   bool ParseDirectivePALMetadata();
1273   bool ParseDirectiveAMDGPULDS();
1274 
1275   /// Common code to parse out a block of text (typically YAML) between start and
1276   /// end directives.
1277   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1278                            const char *AssemblerDirectiveEnd,
1279                            std::string &CollectString);
1280 
1281   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1282                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1283   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1284                            unsigned &RegNum, unsigned &RegWidth,
1285                            bool RestoreOnFailure = false);
1286   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1287                            unsigned &RegNum, unsigned &RegWidth,
1288                            SmallVectorImpl<AsmToken> &Tokens);
1289   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1290                            unsigned &RegWidth,
1291                            SmallVectorImpl<AsmToken> &Tokens);
1292   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1293                            unsigned &RegWidth,
1294                            SmallVectorImpl<AsmToken> &Tokens);
1295   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1296                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1297   bool ParseRegRange(unsigned& Num, unsigned& Width);
1298   unsigned getRegularReg(RegisterKind RegKind,
1299                          unsigned RegNum,
1300                          unsigned RegWidth,
1301                          SMLoc Loc);
1302 
1303   bool isRegister();
1304   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1305   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1306   void initializeGprCountSymbol(RegisterKind RegKind);
1307   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1308                              unsigned RegWidth);
1309   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1310                     bool IsAtomic, bool IsLds = false);
1311   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1312                  bool IsGdsHardcoded);
1313 
1314 public:
1315   enum AMDGPUMatchResultTy {
1316     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1317   };
1318   enum OperandMode {
1319     OperandMode_Default,
1320     OperandMode_NSA,
1321   };
1322 
1323   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1324 
1325   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1326                const MCInstrInfo &MII,
1327                const MCTargetOptions &Options)
1328       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1329     MCAsmParserExtension::Initialize(Parser);
1330 
1331     if (getFeatureBits().none()) {
1332       // Set default features.
1333       copySTI().ToggleFeature("southern-islands");
1334     }
1335 
1336     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1337 
1338     {
1339       // TODO: make those pre-defined variables read-only.
1340       // Currently there is none suitable machinery in the core llvm-mc for this.
1341       // MCSymbol::isRedefinable is intended for another purpose, and
1342       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1343       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1344       MCContext &Ctx = getContext();
1345       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1346         MCSymbol *Sym =
1347             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1348         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1349         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1350         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1351         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1352         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1353       } else {
1354         MCSymbol *Sym =
1355             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1356         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1357         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1358         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1359         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1360         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1361       }
1362       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1363         initializeGprCountSymbol(IS_VGPR);
1364         initializeGprCountSymbol(IS_SGPR);
1365       } else
1366         KernelScope.initialize(getContext());
1367     }
1368   }
1369 
1370   bool hasMIMG_R128() const {
1371     return AMDGPU::hasMIMG_R128(getSTI());
1372   }
1373 
1374   bool hasPackedD16() const {
1375     return AMDGPU::hasPackedD16(getSTI());
1376   }
1377 
1378   bool hasGFX10A16() const {
1379     return AMDGPU::hasGFX10A16(getSTI());
1380   }
1381 
1382   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1383 
1384   bool isSI() const {
1385     return AMDGPU::isSI(getSTI());
1386   }
1387 
1388   bool isCI() const {
1389     return AMDGPU::isCI(getSTI());
1390   }
1391 
1392   bool isVI() const {
1393     return AMDGPU::isVI(getSTI());
1394   }
1395 
1396   bool isGFX9() const {
1397     return AMDGPU::isGFX9(getSTI());
1398   }
1399 
1400   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1401   bool isGFX90A() const {
1402     return AMDGPU::isGFX90A(getSTI());
1403   }
1404 
1405   bool isGFX940() const {
1406     return AMDGPU::isGFX940(getSTI());
1407   }
1408 
1409   bool isGFX9Plus() const {
1410     return AMDGPU::isGFX9Plus(getSTI());
1411   }
1412 
1413   bool isGFX10() const {
1414     return AMDGPU::isGFX10(getSTI());
1415   }
1416 
1417   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1418 
1419   bool isGFX11() const {
1420     return AMDGPU::isGFX11(getSTI());
1421   }
1422 
1423   bool isGFX11Plus() const {
1424     return AMDGPU::isGFX11Plus(getSTI());
1425   }
1426 
1427   bool isGFX10_BEncoding() const {
1428     return AMDGPU::isGFX10_BEncoding(getSTI());
1429   }
1430 
1431   bool hasInv2PiInlineImm() const {
1432     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1433   }
1434 
1435   bool hasFlatOffsets() const {
1436     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1437   }
1438 
1439   bool hasArchitectedFlatScratch() const {
1440     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1441   }
1442 
1443   bool hasSGPR102_SGPR103() const {
1444     return !isVI() && !isGFX9();
1445   }
1446 
1447   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1448 
1449   bool hasIntClamp() const {
1450     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1451   }
1452 
1453   AMDGPUTargetStreamer &getTargetStreamer() {
1454     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1455     return static_cast<AMDGPUTargetStreamer &>(TS);
1456   }
1457 
1458   const MCRegisterInfo *getMRI() const {
1459     // We need this const_cast because for some reason getContext() is not const
1460     // in MCAsmParser.
1461     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1462   }
1463 
1464   const MCInstrInfo *getMII() const {
1465     return &MII;
1466   }
1467 
1468   const FeatureBitset &getFeatureBits() const {
1469     return getSTI().getFeatureBits();
1470   }
1471 
1472   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1473   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1474   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1475 
1476   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1477   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1478   bool isForcedDPP() const { return ForcedDPP; }
1479   bool isForcedSDWA() const { return ForcedSDWA; }
1480   ArrayRef<unsigned> getMatchedVariants() const;
1481   StringRef getMatchedVariantName() const;
1482 
1483   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1484   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1485                      bool RestoreOnFailure);
1486   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1487   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1488                                         SMLoc &EndLoc) override;
1489   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1490   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1491                                       unsigned Kind) override;
1492   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1493                                OperandVector &Operands, MCStreamer &Out,
1494                                uint64_t &ErrorInfo,
1495                                bool MatchingInlineAsm) override;
1496   bool ParseDirective(AsmToken DirectiveID) override;
1497   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1498                                     OperandMode Mode = OperandMode_Default);
1499   StringRef parseMnemonicSuffix(StringRef Name);
1500   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1501                         SMLoc NameLoc, OperandVector &Operands) override;
1502   //bool ProcessInstruction(MCInst &Inst);
1503 
1504   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1505 
1506   OperandMatchResultTy
1507   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1508                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1509                      bool (*ConvertResult)(int64_t &) = nullptr);
1510 
1511   OperandMatchResultTy
1512   parseOperandArrayWithPrefix(const char *Prefix,
1513                               OperandVector &Operands,
1514                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1515                               bool (*ConvertResult)(int64_t&) = nullptr);
1516 
1517   OperandMatchResultTy
1518   parseNamedBit(StringRef Name, OperandVector &Operands,
1519                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1520   OperandMatchResultTy parseCPol(OperandVector &Operands);
1521   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1522                                              StringRef &Value,
1523                                              SMLoc &StringLoc);
1524 
1525   bool isModifier();
1526   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1527   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1528   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1529   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1530   bool parseSP3NegModifier();
1531   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1532   OperandMatchResultTy parseReg(OperandVector &Operands);
1533   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1534   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1535   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1536   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1537   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1538   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1539   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1540   OperandMatchResultTy parseUfmt(int64_t &Format);
1541   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1542   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1543   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1544   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1545   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1546   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1547   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1548 
1549   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1550   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1551   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1552   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1553 
1554   bool parseCnt(int64_t &IntVal);
1555   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1556 
1557   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1558   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1559   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1560 
1561   bool parseDelay(int64_t &Delay);
1562   OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1563 
1564   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1565 
1566 private:
1567   struct OperandInfoTy {
1568     SMLoc Loc;
1569     int64_t Id;
1570     bool IsSymbolic = false;
1571     bool IsDefined = false;
1572 
1573     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1574   };
1575 
1576   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1577   bool validateSendMsg(const OperandInfoTy &Msg,
1578                        const OperandInfoTy &Op,
1579                        const OperandInfoTy &Stream);
1580 
1581   bool parseHwregBody(OperandInfoTy &HwReg,
1582                       OperandInfoTy &Offset,
1583                       OperandInfoTy &Width);
1584   bool validateHwreg(const OperandInfoTy &HwReg,
1585                      const OperandInfoTy &Offset,
1586                      const OperandInfoTy &Width);
1587 
1588   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1589   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1590   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1591 
1592   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1593                       const OperandVector &Operands) const;
1594   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1595   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1596   SMLoc getLitLoc(const OperandVector &Operands) const;
1597   SMLoc getConstLoc(const OperandVector &Operands) const;
1598 
1599   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1600   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1601   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1602   bool validateSOPLiteral(const MCInst &Inst) const;
1603   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1604   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1605   bool validateIntClampSupported(const MCInst &Inst);
1606   bool validateMIMGAtomicDMask(const MCInst &Inst);
1607   bool validateMIMGGatherDMask(const MCInst &Inst);
1608   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1609   Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1610   bool validateMIMGAddrSize(const MCInst &Inst);
1611   bool validateMIMGD16(const MCInst &Inst);
1612   bool validateMIMGDim(const MCInst &Inst);
1613   bool validateMIMGMSAA(const MCInst &Inst);
1614   bool validateOpSel(const MCInst &Inst);
1615   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1616   bool validateVccOperand(unsigned Reg) const;
1617   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1618   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1619   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1620   bool validateAGPRLdSt(const MCInst &Inst) const;
1621   bool validateVGPRAlign(const MCInst &Inst) const;
1622   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1623   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1624   bool validateDivScale(const MCInst &Inst);
1625   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1626                              const SMLoc &IDLoc);
1627   bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands,
1628                           const SMLoc &IDLoc);
1629   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1630   unsigned getConstantBusLimit(unsigned Opcode) const;
1631   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1632   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1633   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1634 
1635   bool isSupportedMnemo(StringRef Mnemo,
1636                         const FeatureBitset &FBS);
1637   bool isSupportedMnemo(StringRef Mnemo,
1638                         const FeatureBitset &FBS,
1639                         ArrayRef<unsigned> Variants);
1640   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1641 
1642   bool isId(const StringRef Id) const;
1643   bool isId(const AsmToken &Token, const StringRef Id) const;
1644   bool isToken(const AsmToken::TokenKind Kind) const;
1645   bool trySkipId(const StringRef Id);
1646   bool trySkipId(const StringRef Pref, const StringRef Id);
1647   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1648   bool trySkipToken(const AsmToken::TokenKind Kind);
1649   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1650   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1651   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1652 
1653   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1654   AsmToken::TokenKind getTokenKind() const;
1655   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1656   bool parseExpr(OperandVector &Operands);
1657   StringRef getTokenStr() const;
1658   AsmToken peekToken();
1659   AsmToken getToken() const;
1660   SMLoc getLoc() const;
1661   void lex();
1662 
1663 public:
1664   void onBeginOfFile() override;
1665 
1666   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1667   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1668 
1669   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1670   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1671   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1672   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1673   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1674   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1675 
1676   bool parseSwizzleOperand(int64_t &Op,
1677                            const unsigned MinVal,
1678                            const unsigned MaxVal,
1679                            const StringRef ErrMsg,
1680                            SMLoc &Loc);
1681   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1682                             const unsigned MinVal,
1683                             const unsigned MaxVal,
1684                             const StringRef ErrMsg);
1685   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1686   bool parseSwizzleOffset(int64_t &Imm);
1687   bool parseSwizzleMacro(int64_t &Imm);
1688   bool parseSwizzleQuadPerm(int64_t &Imm);
1689   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1690   bool parseSwizzleBroadcast(int64_t &Imm);
1691   bool parseSwizzleSwap(int64_t &Imm);
1692   bool parseSwizzleReverse(int64_t &Imm);
1693 
1694   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1695   int64_t parseGPRIdxMacro();
1696 
1697   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1698   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1699   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1700   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1701 
1702   AMDGPUOperand::Ptr defaultCPol() const;
1703 
1704   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1705   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1706   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1707   AMDGPUOperand::Ptr defaultFlatOffset() const;
1708 
1709   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1710 
1711   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1712                OptionalImmIndexMap &OptionalIdx);
1713   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1714   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1715   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1716   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1717                 OptionalImmIndexMap &OptionalIdx);
1718 
1719   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1720 
1721   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1722                bool IsAtomic = false);
1723   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1724   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1725 
1726   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1727 
1728   bool parseDimId(unsigned &Encoding);
1729   OperandMatchResultTy parseDim(OperandVector &Operands);
1730   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1731   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1732   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1733   int64_t parseDPPCtrlSel(StringRef Ctrl);
1734   int64_t parseDPPCtrlPerm();
1735   AMDGPUOperand::Ptr defaultRowMask() const;
1736   AMDGPUOperand::Ptr defaultBankMask() const;
1737   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1738   AMDGPUOperand::Ptr defaultFI() const;
1739   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1740   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1741 
1742   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1743                                     AMDGPUOperand::ImmTy Type);
1744   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1745   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1746   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1747   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1748   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1749   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1750   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1751                uint64_t BasicInstType,
1752                bool SkipDstVcc = false,
1753                bool SkipSrcVcc = false);
1754 
1755   AMDGPUOperand::Ptr defaultBLGP() const;
1756   AMDGPUOperand::Ptr defaultCBSZ() const;
1757   AMDGPUOperand::Ptr defaultABID() const;
1758 
1759   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1760   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1761 };
1762 
1763 struct OptionalOperand {
1764   const char *Name;
1765   AMDGPUOperand::ImmTy Type;
1766   bool IsBit;
1767   bool (*ConvertResult)(int64_t&);
1768 };
1769 
1770 } // end anonymous namespace
1771 
1772 // May be called with integer type with equivalent bitwidth.
1773 static const fltSemantics *getFltSemantics(unsigned Size) {
1774   switch (Size) {
1775   case 4:
1776     return &APFloat::IEEEsingle();
1777   case 8:
1778     return &APFloat::IEEEdouble();
1779   case 2:
1780     return &APFloat::IEEEhalf();
1781   default:
1782     llvm_unreachable("unsupported fp type");
1783   }
1784 }
1785 
1786 static const fltSemantics *getFltSemantics(MVT VT) {
1787   return getFltSemantics(VT.getSizeInBits() / 8);
1788 }
1789 
1790 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1791   switch (OperandType) {
1792   case AMDGPU::OPERAND_REG_IMM_INT32:
1793   case AMDGPU::OPERAND_REG_IMM_FP32:
1794   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1795   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1796   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1797   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1798   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1799   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1800   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1801   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1802   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1803   case AMDGPU::OPERAND_KIMM32:
1804     return &APFloat::IEEEsingle();
1805   case AMDGPU::OPERAND_REG_IMM_INT64:
1806   case AMDGPU::OPERAND_REG_IMM_FP64:
1807   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1808   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1809   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1810     return &APFloat::IEEEdouble();
1811   case AMDGPU::OPERAND_REG_IMM_INT16:
1812   case AMDGPU::OPERAND_REG_IMM_FP16:
1813   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1814   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1815   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1816   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1817   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1818   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1819   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1820   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1821   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1822   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1823   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1824   case AMDGPU::OPERAND_KIMM16:
1825     return &APFloat::IEEEhalf();
1826   default:
1827     llvm_unreachable("unsupported fp type");
1828   }
1829 }
1830 
1831 //===----------------------------------------------------------------------===//
1832 // Operand
1833 //===----------------------------------------------------------------------===//
1834 
1835 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1836   bool Lost;
1837 
1838   // Convert literal to single precision
1839   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1840                                                APFloat::rmNearestTiesToEven,
1841                                                &Lost);
1842   // We allow precision lost but not overflow or underflow
1843   if (Status != APFloat::opOK &&
1844       Lost &&
1845       ((Status & APFloat::opOverflow)  != 0 ||
1846        (Status & APFloat::opUnderflow) != 0)) {
1847     return false;
1848   }
1849 
1850   return true;
1851 }
1852 
1853 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1854   return isUIntN(Size, Val) || isIntN(Size, Val);
1855 }
1856 
1857 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1858   if (VT.getScalarType() == MVT::i16) {
1859     // FP immediate values are broken.
1860     return isInlinableIntLiteral(Val);
1861   }
1862 
1863   // f16/v2f16 operands work correctly for all values.
1864   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1865 }
1866 
1867 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1868 
1869   // This is a hack to enable named inline values like
1870   // shared_base with both 32-bit and 64-bit operands.
1871   // Note that these values are defined as
1872   // 32-bit operands only.
1873   if (isInlineValue()) {
1874     return true;
1875   }
1876 
1877   if (!isImmTy(ImmTyNone)) {
1878     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1879     return false;
1880   }
1881   // TODO: We should avoid using host float here. It would be better to
1882   // check the float bit values which is what a few other places do.
1883   // We've had bot failures before due to weird NaN support on mips hosts.
1884 
1885   APInt Literal(64, Imm.Val);
1886 
1887   if (Imm.IsFPImm) { // We got fp literal token
1888     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1889       return AMDGPU::isInlinableLiteral64(Imm.Val,
1890                                           AsmParser->hasInv2PiInlineImm());
1891     }
1892 
1893     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1894     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1895       return false;
1896 
1897     if (type.getScalarSizeInBits() == 16) {
1898       return isInlineableLiteralOp16(
1899         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1900         type, AsmParser->hasInv2PiInlineImm());
1901     }
1902 
1903     // Check if single precision literal is inlinable
1904     return AMDGPU::isInlinableLiteral32(
1905       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1906       AsmParser->hasInv2PiInlineImm());
1907   }
1908 
1909   // We got int literal token.
1910   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1911     return AMDGPU::isInlinableLiteral64(Imm.Val,
1912                                         AsmParser->hasInv2PiInlineImm());
1913   }
1914 
1915   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1916     return false;
1917   }
1918 
1919   if (type.getScalarSizeInBits() == 16) {
1920     return isInlineableLiteralOp16(
1921       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1922       type, AsmParser->hasInv2PiInlineImm());
1923   }
1924 
1925   return AMDGPU::isInlinableLiteral32(
1926     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1927     AsmParser->hasInv2PiInlineImm());
1928 }
1929 
1930 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1931   // Check that this immediate can be added as literal
1932   if (!isImmTy(ImmTyNone)) {
1933     return false;
1934   }
1935 
1936   if (!Imm.IsFPImm) {
1937     // We got int literal token.
1938 
1939     if (type == MVT::f64 && hasFPModifiers()) {
1940       // Cannot apply fp modifiers to int literals preserving the same semantics
1941       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1942       // disable these cases.
1943       return false;
1944     }
1945 
1946     unsigned Size = type.getSizeInBits();
1947     if (Size == 64)
1948       Size = 32;
1949 
1950     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1951     // types.
1952     return isSafeTruncation(Imm.Val, Size);
1953   }
1954 
1955   // We got fp literal token
1956   if (type == MVT::f64) { // Expected 64-bit fp operand
1957     // We would set low 64-bits of literal to zeroes but we accept this literals
1958     return true;
1959   }
1960 
1961   if (type == MVT::i64) { // Expected 64-bit int operand
1962     // We don't allow fp literals in 64-bit integer instructions. It is
1963     // unclear how we should encode them.
1964     return false;
1965   }
1966 
1967   // We allow fp literals with f16x2 operands assuming that the specified
1968   // literal goes into the lower half and the upper half is zero. We also
1969   // require that the literal may be losslessly converted to f16.
1970   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1971                      (type == MVT::v2i16)? MVT::i16 :
1972                      (type == MVT::v2f32)? MVT::f32 : type;
1973 
1974   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1975   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1976 }
1977 
1978 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1979   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1980 }
1981 
1982 bool AMDGPUOperand::isVRegWithInputMods() const {
1983   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1984          // GFX90A allows DPP on 64-bit operands.
1985          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1986           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1987 }
1988 
1989 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1990   if (AsmParser->isVI())
1991     return isVReg32();
1992   else if (AsmParser->isGFX9Plus())
1993     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1994   else
1995     return false;
1996 }
1997 
1998 bool AMDGPUOperand::isSDWAFP16Operand() const {
1999   return isSDWAOperand(MVT::f16);
2000 }
2001 
2002 bool AMDGPUOperand::isSDWAFP32Operand() const {
2003   return isSDWAOperand(MVT::f32);
2004 }
2005 
2006 bool AMDGPUOperand::isSDWAInt16Operand() const {
2007   return isSDWAOperand(MVT::i16);
2008 }
2009 
2010 bool AMDGPUOperand::isSDWAInt32Operand() const {
2011   return isSDWAOperand(MVT::i32);
2012 }
2013 
2014 bool AMDGPUOperand::isBoolReg() const {
2015   auto FB = AsmParser->getFeatureBits();
2016   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2017                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2018 }
2019 
2020 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2021 {
2022   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2023   assert(Size == 2 || Size == 4 || Size == 8);
2024 
2025   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2026 
2027   if (Imm.Mods.Abs) {
2028     Val &= ~FpSignMask;
2029   }
2030   if (Imm.Mods.Neg) {
2031     Val ^= FpSignMask;
2032   }
2033 
2034   return Val;
2035 }
2036 
2037 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2038   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2039                              Inst.getNumOperands())) {
2040     addLiteralImmOperand(Inst, Imm.Val,
2041                          ApplyModifiers &
2042                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2043   } else {
2044     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2045     Inst.addOperand(MCOperand::createImm(Imm.Val));
2046     setImmKindNone();
2047   }
2048 }
2049 
2050 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2051   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2052   auto OpNum = Inst.getNumOperands();
2053   // Check that this operand accepts literals
2054   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2055 
2056   if (ApplyModifiers) {
2057     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2058     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2059     Val = applyInputFPModifiers(Val, Size);
2060   }
2061 
2062   APInt Literal(64, Val);
2063   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2064 
2065   if (Imm.IsFPImm) { // We got fp literal token
2066     switch (OpTy) {
2067     case AMDGPU::OPERAND_REG_IMM_INT64:
2068     case AMDGPU::OPERAND_REG_IMM_FP64:
2069     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2070     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2071     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2072       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2073                                        AsmParser->hasInv2PiInlineImm())) {
2074         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2075         setImmKindConst();
2076         return;
2077       }
2078 
2079       // Non-inlineable
2080       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2081         // For fp operands we check if low 32 bits are zeros
2082         if (Literal.getLoBits(32) != 0) {
2083           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2084           "Can't encode literal as exact 64-bit floating-point operand. "
2085           "Low 32-bits will be set to zero");
2086         }
2087 
2088         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2089         setImmKindLiteral();
2090         return;
2091       }
2092 
2093       // We don't allow fp literals in 64-bit integer instructions. It is
2094       // unclear how we should encode them. This case should be checked earlier
2095       // in predicate methods (isLiteralImm())
2096       llvm_unreachable("fp literal in 64-bit integer instruction.");
2097 
2098     case AMDGPU::OPERAND_REG_IMM_INT32:
2099     case AMDGPU::OPERAND_REG_IMM_FP32:
2100     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2101     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2102     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2103     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2104     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2105     case AMDGPU::OPERAND_REG_IMM_INT16:
2106     case AMDGPU::OPERAND_REG_IMM_FP16:
2107     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2108     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2109     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2110     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2111     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2112     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2113     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2114     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2115     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2116     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2117     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2118     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2119     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2120     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2121     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2122     case AMDGPU::OPERAND_KIMM32:
2123     case AMDGPU::OPERAND_KIMM16: {
2124       bool lost;
2125       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2126       // Convert literal to single precision
2127       FPLiteral.convert(*getOpFltSemantics(OpTy),
2128                         APFloat::rmNearestTiesToEven, &lost);
2129       // We allow precision lost but not overflow or underflow. This should be
2130       // checked earlier in isLiteralImm()
2131 
2132       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2133       Inst.addOperand(MCOperand::createImm(ImmVal));
2134       setImmKindLiteral();
2135       return;
2136     }
2137     default:
2138       llvm_unreachable("invalid operand size");
2139     }
2140 
2141     return;
2142   }
2143 
2144   // We got int literal token.
2145   // Only sign extend inline immediates.
2146   switch (OpTy) {
2147   case AMDGPU::OPERAND_REG_IMM_INT32:
2148   case AMDGPU::OPERAND_REG_IMM_FP32:
2149   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2150   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2151   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2152   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2153   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2154   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2155   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2156   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2157   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2158   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2159   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2160     if (isSafeTruncation(Val, 32) &&
2161         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2162                                      AsmParser->hasInv2PiInlineImm())) {
2163       Inst.addOperand(MCOperand::createImm(Val));
2164       setImmKindConst();
2165       return;
2166     }
2167 
2168     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2169     setImmKindLiteral();
2170     return;
2171 
2172   case AMDGPU::OPERAND_REG_IMM_INT64:
2173   case AMDGPU::OPERAND_REG_IMM_FP64:
2174   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2175   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2176   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2177     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2178       Inst.addOperand(MCOperand::createImm(Val));
2179       setImmKindConst();
2180       return;
2181     }
2182 
2183     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2184     setImmKindLiteral();
2185     return;
2186 
2187   case AMDGPU::OPERAND_REG_IMM_INT16:
2188   case AMDGPU::OPERAND_REG_IMM_FP16:
2189   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2190   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2191   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2192   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2193   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2194     if (isSafeTruncation(Val, 16) &&
2195         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2196                                      AsmParser->hasInv2PiInlineImm())) {
2197       Inst.addOperand(MCOperand::createImm(Val));
2198       setImmKindConst();
2199       return;
2200     }
2201 
2202     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2203     setImmKindLiteral();
2204     return;
2205 
2206   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2207   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2208   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2209   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2210     assert(isSafeTruncation(Val, 16));
2211     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2212                                         AsmParser->hasInv2PiInlineImm()));
2213 
2214     Inst.addOperand(MCOperand::createImm(Val));
2215     return;
2216   }
2217   case AMDGPU::OPERAND_KIMM32:
2218     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2219     setImmKindNone();
2220     return;
2221   case AMDGPU::OPERAND_KIMM16:
2222     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2223     setImmKindNone();
2224     return;
2225   default:
2226     llvm_unreachable("invalid operand size");
2227   }
2228 }
2229 
2230 template <unsigned Bitwidth>
2231 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2232   APInt Literal(64, Imm.Val);
2233   setImmKindNone();
2234 
2235   if (!Imm.IsFPImm) {
2236     // We got int literal token.
2237     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2238     return;
2239   }
2240 
2241   bool Lost;
2242   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2243   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2244                     APFloat::rmNearestTiesToEven, &Lost);
2245   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2246 }
2247 
2248 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2249   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2250 }
2251 
2252 static bool isInlineValue(unsigned Reg) {
2253   switch (Reg) {
2254   case AMDGPU::SRC_SHARED_BASE:
2255   case AMDGPU::SRC_SHARED_LIMIT:
2256   case AMDGPU::SRC_PRIVATE_BASE:
2257   case AMDGPU::SRC_PRIVATE_LIMIT:
2258   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2259     return true;
2260   case AMDGPU::SRC_VCCZ:
2261   case AMDGPU::SRC_EXECZ:
2262   case AMDGPU::SRC_SCC:
2263     return true;
2264   case AMDGPU::SGPR_NULL:
2265     return true;
2266   default:
2267     return false;
2268   }
2269 }
2270 
2271 bool AMDGPUOperand::isInlineValue() const {
2272   return isRegKind() && ::isInlineValue(getReg());
2273 }
2274 
2275 //===----------------------------------------------------------------------===//
2276 // AsmParser
2277 //===----------------------------------------------------------------------===//
2278 
2279 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2280   if (Is == IS_VGPR) {
2281     switch (RegWidth) {
2282       default: return -1;
2283       case 32:
2284         return AMDGPU::VGPR_32RegClassID;
2285       case 64:
2286         return AMDGPU::VReg_64RegClassID;
2287       case 96:
2288         return AMDGPU::VReg_96RegClassID;
2289       case 128:
2290         return AMDGPU::VReg_128RegClassID;
2291       case 160:
2292         return AMDGPU::VReg_160RegClassID;
2293       case 192:
2294         return AMDGPU::VReg_192RegClassID;
2295       case 224:
2296         return AMDGPU::VReg_224RegClassID;
2297       case 256:
2298         return AMDGPU::VReg_256RegClassID;
2299       case 512:
2300         return AMDGPU::VReg_512RegClassID;
2301       case 1024:
2302         return AMDGPU::VReg_1024RegClassID;
2303     }
2304   } else if (Is == IS_TTMP) {
2305     switch (RegWidth) {
2306       default: return -1;
2307       case 32:
2308         return AMDGPU::TTMP_32RegClassID;
2309       case 64:
2310         return AMDGPU::TTMP_64RegClassID;
2311       case 128:
2312         return AMDGPU::TTMP_128RegClassID;
2313       case 256:
2314         return AMDGPU::TTMP_256RegClassID;
2315       case 512:
2316         return AMDGPU::TTMP_512RegClassID;
2317     }
2318   } else if (Is == IS_SGPR) {
2319     switch (RegWidth) {
2320       default: return -1;
2321       case 32:
2322         return AMDGPU::SGPR_32RegClassID;
2323       case 64:
2324         return AMDGPU::SGPR_64RegClassID;
2325       case 96:
2326         return AMDGPU::SGPR_96RegClassID;
2327       case 128:
2328         return AMDGPU::SGPR_128RegClassID;
2329       case 160:
2330         return AMDGPU::SGPR_160RegClassID;
2331       case 192:
2332         return AMDGPU::SGPR_192RegClassID;
2333       case 224:
2334         return AMDGPU::SGPR_224RegClassID;
2335       case 256:
2336         return AMDGPU::SGPR_256RegClassID;
2337       case 512:
2338         return AMDGPU::SGPR_512RegClassID;
2339     }
2340   } else if (Is == IS_AGPR) {
2341     switch (RegWidth) {
2342       default: return -1;
2343       case 32:
2344         return AMDGPU::AGPR_32RegClassID;
2345       case 64:
2346         return AMDGPU::AReg_64RegClassID;
2347       case 96:
2348         return AMDGPU::AReg_96RegClassID;
2349       case 128:
2350         return AMDGPU::AReg_128RegClassID;
2351       case 160:
2352         return AMDGPU::AReg_160RegClassID;
2353       case 192:
2354         return AMDGPU::AReg_192RegClassID;
2355       case 224:
2356         return AMDGPU::AReg_224RegClassID;
2357       case 256:
2358         return AMDGPU::AReg_256RegClassID;
2359       case 512:
2360         return AMDGPU::AReg_512RegClassID;
2361       case 1024:
2362         return AMDGPU::AReg_1024RegClassID;
2363     }
2364   }
2365   return -1;
2366 }
2367 
2368 static unsigned getSpecialRegForName(StringRef RegName) {
2369   return StringSwitch<unsigned>(RegName)
2370     .Case("exec", AMDGPU::EXEC)
2371     .Case("vcc", AMDGPU::VCC)
2372     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2373     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2374     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2375     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2376     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2377     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2378     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2379     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2380     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2381     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2382     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2383     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2384     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2385     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2386     .Case("m0", AMDGPU::M0)
2387     .Case("vccz", AMDGPU::SRC_VCCZ)
2388     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2389     .Case("execz", AMDGPU::SRC_EXECZ)
2390     .Case("src_execz", AMDGPU::SRC_EXECZ)
2391     .Case("scc", AMDGPU::SRC_SCC)
2392     .Case("src_scc", AMDGPU::SRC_SCC)
2393     .Case("tba", AMDGPU::TBA)
2394     .Case("tma", AMDGPU::TMA)
2395     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2396     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2397     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2398     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2399     .Case("vcc_lo", AMDGPU::VCC_LO)
2400     .Case("vcc_hi", AMDGPU::VCC_HI)
2401     .Case("exec_lo", AMDGPU::EXEC_LO)
2402     .Case("exec_hi", AMDGPU::EXEC_HI)
2403     .Case("tma_lo", AMDGPU::TMA_LO)
2404     .Case("tma_hi", AMDGPU::TMA_HI)
2405     .Case("tba_lo", AMDGPU::TBA_LO)
2406     .Case("tba_hi", AMDGPU::TBA_HI)
2407     .Case("pc", AMDGPU::PC_REG)
2408     .Case("null", AMDGPU::SGPR_NULL)
2409     .Default(AMDGPU::NoRegister);
2410 }
2411 
2412 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2413                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2414   auto R = parseRegister();
2415   if (!R) return true;
2416   assert(R->isReg());
2417   RegNo = R->getReg();
2418   StartLoc = R->getStartLoc();
2419   EndLoc = R->getEndLoc();
2420   return false;
2421 }
2422 
2423 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2424                                     SMLoc &EndLoc) {
2425   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2426 }
2427 
2428 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2429                                                        SMLoc &StartLoc,
2430                                                        SMLoc &EndLoc) {
2431   bool Result =
2432       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2433   bool PendingErrors = getParser().hasPendingError();
2434   getParser().clearPendingErrors();
2435   if (PendingErrors)
2436     return MatchOperand_ParseFail;
2437   if (Result)
2438     return MatchOperand_NoMatch;
2439   return MatchOperand_Success;
2440 }
2441 
2442 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2443                                             RegisterKind RegKind, unsigned Reg1,
2444                                             SMLoc Loc) {
2445   switch (RegKind) {
2446   case IS_SPECIAL:
2447     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2448       Reg = AMDGPU::EXEC;
2449       RegWidth = 64;
2450       return true;
2451     }
2452     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2453       Reg = AMDGPU::FLAT_SCR;
2454       RegWidth = 64;
2455       return true;
2456     }
2457     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2458       Reg = AMDGPU::XNACK_MASK;
2459       RegWidth = 64;
2460       return true;
2461     }
2462     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2463       Reg = AMDGPU::VCC;
2464       RegWidth = 64;
2465       return true;
2466     }
2467     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2468       Reg = AMDGPU::TBA;
2469       RegWidth = 64;
2470       return true;
2471     }
2472     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2473       Reg = AMDGPU::TMA;
2474       RegWidth = 64;
2475       return true;
2476     }
2477     Error(Loc, "register does not fit in the list");
2478     return false;
2479   case IS_VGPR:
2480   case IS_SGPR:
2481   case IS_AGPR:
2482   case IS_TTMP:
2483     if (Reg1 != Reg + RegWidth / 32) {
2484       Error(Loc, "registers in a list must have consecutive indices");
2485       return false;
2486     }
2487     RegWidth += 32;
2488     return true;
2489   default:
2490     llvm_unreachable("unexpected register kind");
2491   }
2492 }
2493 
2494 struct RegInfo {
2495   StringLiteral Name;
2496   RegisterKind Kind;
2497 };
2498 
2499 static constexpr RegInfo RegularRegisters[] = {
2500   {{"v"},    IS_VGPR},
2501   {{"s"},    IS_SGPR},
2502   {{"ttmp"}, IS_TTMP},
2503   {{"acc"},  IS_AGPR},
2504   {{"a"},    IS_AGPR},
2505 };
2506 
2507 static bool isRegularReg(RegisterKind Kind) {
2508   return Kind == IS_VGPR ||
2509          Kind == IS_SGPR ||
2510          Kind == IS_TTMP ||
2511          Kind == IS_AGPR;
2512 }
2513 
2514 static const RegInfo* getRegularRegInfo(StringRef Str) {
2515   for (const RegInfo &Reg : RegularRegisters)
2516     if (Str.startswith(Reg.Name))
2517       return &Reg;
2518   return nullptr;
2519 }
2520 
2521 static bool getRegNum(StringRef Str, unsigned& Num) {
2522   return !Str.getAsInteger(10, Num);
2523 }
2524 
2525 bool
2526 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2527                             const AsmToken &NextToken) const {
2528 
2529   // A list of consecutive registers: [s0,s1,s2,s3]
2530   if (Token.is(AsmToken::LBrac))
2531     return true;
2532 
2533   if (!Token.is(AsmToken::Identifier))
2534     return false;
2535 
2536   // A single register like s0 or a range of registers like s[0:1]
2537 
2538   StringRef Str = Token.getString();
2539   const RegInfo *Reg = getRegularRegInfo(Str);
2540   if (Reg) {
2541     StringRef RegName = Reg->Name;
2542     StringRef RegSuffix = Str.substr(RegName.size());
2543     if (!RegSuffix.empty()) {
2544       unsigned Num;
2545       // A single register with an index: rXX
2546       if (getRegNum(RegSuffix, Num))
2547         return true;
2548     } else {
2549       // A range of registers: r[XX:YY].
2550       if (NextToken.is(AsmToken::LBrac))
2551         return true;
2552     }
2553   }
2554 
2555   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2556 }
2557 
2558 bool
2559 AMDGPUAsmParser::isRegister()
2560 {
2561   return isRegister(getToken(), peekToken());
2562 }
2563 
2564 unsigned
2565 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2566                                unsigned RegNum,
2567                                unsigned RegWidth,
2568                                SMLoc Loc) {
2569 
2570   assert(isRegularReg(RegKind));
2571 
2572   unsigned AlignSize = 1;
2573   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2574     // SGPR and TTMP registers must be aligned.
2575     // Max required alignment is 4 dwords.
2576     AlignSize = std::min(RegWidth / 32, 4u);
2577   }
2578 
2579   if (RegNum % AlignSize != 0) {
2580     Error(Loc, "invalid register alignment");
2581     return AMDGPU::NoRegister;
2582   }
2583 
2584   unsigned RegIdx = RegNum / AlignSize;
2585   int RCID = getRegClass(RegKind, RegWidth);
2586   if (RCID == -1) {
2587     Error(Loc, "invalid or unsupported register size");
2588     return AMDGPU::NoRegister;
2589   }
2590 
2591   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2592   const MCRegisterClass RC = TRI->getRegClass(RCID);
2593   if (RegIdx >= RC.getNumRegs()) {
2594     Error(Loc, "register index is out of range");
2595     return AMDGPU::NoRegister;
2596   }
2597 
2598   return RC.getRegister(RegIdx);
2599 }
2600 
2601 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2602   int64_t RegLo, RegHi;
2603   if (!skipToken(AsmToken::LBrac, "missing register index"))
2604     return false;
2605 
2606   SMLoc FirstIdxLoc = getLoc();
2607   SMLoc SecondIdxLoc;
2608 
2609   if (!parseExpr(RegLo))
2610     return false;
2611 
2612   if (trySkipToken(AsmToken::Colon)) {
2613     SecondIdxLoc = getLoc();
2614     if (!parseExpr(RegHi))
2615       return false;
2616   } else {
2617     RegHi = RegLo;
2618   }
2619 
2620   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2621     return false;
2622 
2623   if (!isUInt<32>(RegLo)) {
2624     Error(FirstIdxLoc, "invalid register index");
2625     return false;
2626   }
2627 
2628   if (!isUInt<32>(RegHi)) {
2629     Error(SecondIdxLoc, "invalid register index");
2630     return false;
2631   }
2632 
2633   if (RegLo > RegHi) {
2634     Error(FirstIdxLoc, "first register index should not exceed second index");
2635     return false;
2636   }
2637 
2638   Num = static_cast<unsigned>(RegLo);
2639   RegWidth = 32 * ((RegHi - RegLo) + 1);
2640   return true;
2641 }
2642 
2643 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2644                                           unsigned &RegNum, unsigned &RegWidth,
2645                                           SmallVectorImpl<AsmToken> &Tokens) {
2646   assert(isToken(AsmToken::Identifier));
2647   unsigned Reg = getSpecialRegForName(getTokenStr());
2648   if (Reg) {
2649     RegNum = 0;
2650     RegWidth = 32;
2651     RegKind = IS_SPECIAL;
2652     Tokens.push_back(getToken());
2653     lex(); // skip register name
2654   }
2655   return Reg;
2656 }
2657 
2658 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2659                                           unsigned &RegNum, unsigned &RegWidth,
2660                                           SmallVectorImpl<AsmToken> &Tokens) {
2661   assert(isToken(AsmToken::Identifier));
2662   StringRef RegName = getTokenStr();
2663   auto Loc = getLoc();
2664 
2665   const RegInfo *RI = getRegularRegInfo(RegName);
2666   if (!RI) {
2667     Error(Loc, "invalid register name");
2668     return AMDGPU::NoRegister;
2669   }
2670 
2671   Tokens.push_back(getToken());
2672   lex(); // skip register name
2673 
2674   RegKind = RI->Kind;
2675   StringRef RegSuffix = RegName.substr(RI->Name.size());
2676   if (!RegSuffix.empty()) {
2677     // Single 32-bit register: vXX.
2678     if (!getRegNum(RegSuffix, RegNum)) {
2679       Error(Loc, "invalid register index");
2680       return AMDGPU::NoRegister;
2681     }
2682     RegWidth = 32;
2683   } else {
2684     // Range of registers: v[XX:YY]. ":YY" is optional.
2685     if (!ParseRegRange(RegNum, RegWidth))
2686       return AMDGPU::NoRegister;
2687   }
2688 
2689   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2690 }
2691 
2692 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2693                                        unsigned &RegWidth,
2694                                        SmallVectorImpl<AsmToken> &Tokens) {
2695   unsigned Reg = AMDGPU::NoRegister;
2696   auto ListLoc = getLoc();
2697 
2698   if (!skipToken(AsmToken::LBrac,
2699                  "expected a register or a list of registers")) {
2700     return AMDGPU::NoRegister;
2701   }
2702 
2703   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2704 
2705   auto Loc = getLoc();
2706   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2707     return AMDGPU::NoRegister;
2708   if (RegWidth != 32) {
2709     Error(Loc, "expected a single 32-bit register");
2710     return AMDGPU::NoRegister;
2711   }
2712 
2713   for (; trySkipToken(AsmToken::Comma); ) {
2714     RegisterKind NextRegKind;
2715     unsigned NextReg, NextRegNum, NextRegWidth;
2716     Loc = getLoc();
2717 
2718     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2719                              NextRegNum, NextRegWidth,
2720                              Tokens)) {
2721       return AMDGPU::NoRegister;
2722     }
2723     if (NextRegWidth != 32) {
2724       Error(Loc, "expected a single 32-bit register");
2725       return AMDGPU::NoRegister;
2726     }
2727     if (NextRegKind != RegKind) {
2728       Error(Loc, "registers in a list must be of the same kind");
2729       return AMDGPU::NoRegister;
2730     }
2731     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2732       return AMDGPU::NoRegister;
2733   }
2734 
2735   if (!skipToken(AsmToken::RBrac,
2736                  "expected a comma or a closing square bracket")) {
2737     return AMDGPU::NoRegister;
2738   }
2739 
2740   if (isRegularReg(RegKind))
2741     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2742 
2743   return Reg;
2744 }
2745 
2746 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2747                                           unsigned &RegNum, unsigned &RegWidth,
2748                                           SmallVectorImpl<AsmToken> &Tokens) {
2749   auto Loc = getLoc();
2750   Reg = AMDGPU::NoRegister;
2751 
2752   if (isToken(AsmToken::Identifier)) {
2753     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2754     if (Reg == AMDGPU::NoRegister)
2755       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2756   } else {
2757     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2758   }
2759 
2760   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2761   if (Reg == AMDGPU::NoRegister) {
2762     assert(Parser.hasPendingError());
2763     return false;
2764   }
2765 
2766   if (!subtargetHasRegister(*TRI, Reg)) {
2767     if (Reg == AMDGPU::SGPR_NULL) {
2768       Error(Loc, "'null' operand is not supported on this GPU");
2769     } else {
2770       Error(Loc, "register not available on this GPU");
2771     }
2772     return false;
2773   }
2774 
2775   return true;
2776 }
2777 
2778 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2779                                           unsigned &RegNum, unsigned &RegWidth,
2780                                           bool RestoreOnFailure /*=false*/) {
2781   Reg = AMDGPU::NoRegister;
2782 
2783   SmallVector<AsmToken, 1> Tokens;
2784   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2785     if (RestoreOnFailure) {
2786       while (!Tokens.empty()) {
2787         getLexer().UnLex(Tokens.pop_back_val());
2788       }
2789     }
2790     return true;
2791   }
2792   return false;
2793 }
2794 
2795 Optional<StringRef>
2796 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2797   switch (RegKind) {
2798   case IS_VGPR:
2799     return StringRef(".amdgcn.next_free_vgpr");
2800   case IS_SGPR:
2801     return StringRef(".amdgcn.next_free_sgpr");
2802   default:
2803     return None;
2804   }
2805 }
2806 
2807 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2808   auto SymbolName = getGprCountSymbolName(RegKind);
2809   assert(SymbolName && "initializing invalid register kind");
2810   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2811   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2812 }
2813 
2814 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2815                                             unsigned DwordRegIndex,
2816                                             unsigned RegWidth) {
2817   // Symbols are only defined for GCN targets
2818   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2819     return true;
2820 
2821   auto SymbolName = getGprCountSymbolName(RegKind);
2822   if (!SymbolName)
2823     return true;
2824   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2825 
2826   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2827   int64_t OldCount;
2828 
2829   if (!Sym->isVariable())
2830     return !Error(getLoc(),
2831                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2832   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2833     return !Error(
2834         getLoc(),
2835         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2836 
2837   if (OldCount <= NewMax)
2838     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2839 
2840   return true;
2841 }
2842 
2843 std::unique_ptr<AMDGPUOperand>
2844 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2845   const auto &Tok = getToken();
2846   SMLoc StartLoc = Tok.getLoc();
2847   SMLoc EndLoc = Tok.getEndLoc();
2848   RegisterKind RegKind;
2849   unsigned Reg, RegNum, RegWidth;
2850 
2851   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2852     return nullptr;
2853   }
2854   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2855     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2856       return nullptr;
2857   } else
2858     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2859   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2860 }
2861 
2862 OperandMatchResultTy
2863 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2864   // TODO: add syntactic sugar for 1/(2*PI)
2865 
2866   assert(!isRegister());
2867   assert(!isModifier());
2868 
2869   const auto& Tok = getToken();
2870   const auto& NextTok = peekToken();
2871   bool IsReal = Tok.is(AsmToken::Real);
2872   SMLoc S = getLoc();
2873   bool Negate = false;
2874 
2875   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2876     lex();
2877     IsReal = true;
2878     Negate = true;
2879   }
2880 
2881   if (IsReal) {
2882     // Floating-point expressions are not supported.
2883     // Can only allow floating-point literals with an
2884     // optional sign.
2885 
2886     StringRef Num = getTokenStr();
2887     lex();
2888 
2889     APFloat RealVal(APFloat::IEEEdouble());
2890     auto roundMode = APFloat::rmNearestTiesToEven;
2891     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2892       return MatchOperand_ParseFail;
2893     }
2894     if (Negate)
2895       RealVal.changeSign();
2896 
2897     Operands.push_back(
2898       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2899                                AMDGPUOperand::ImmTyNone, true));
2900 
2901     return MatchOperand_Success;
2902 
2903   } else {
2904     int64_t IntVal;
2905     const MCExpr *Expr;
2906     SMLoc S = getLoc();
2907 
2908     if (HasSP3AbsModifier) {
2909       // This is a workaround for handling expressions
2910       // as arguments of SP3 'abs' modifier, for example:
2911       //     |1.0|
2912       //     |-1|
2913       //     |1+x|
2914       // This syntax is not compatible with syntax of standard
2915       // MC expressions (due to the trailing '|').
2916       SMLoc EndLoc;
2917       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2918         return MatchOperand_ParseFail;
2919     } else {
2920       if (Parser.parseExpression(Expr))
2921         return MatchOperand_ParseFail;
2922     }
2923 
2924     if (Expr->evaluateAsAbsolute(IntVal)) {
2925       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2926     } else {
2927       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2928     }
2929 
2930     return MatchOperand_Success;
2931   }
2932 
2933   return MatchOperand_NoMatch;
2934 }
2935 
2936 OperandMatchResultTy
2937 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2938   if (!isRegister())
2939     return MatchOperand_NoMatch;
2940 
2941   if (auto R = parseRegister()) {
2942     assert(R->isReg());
2943     Operands.push_back(std::move(R));
2944     return MatchOperand_Success;
2945   }
2946   return MatchOperand_ParseFail;
2947 }
2948 
2949 OperandMatchResultTy
2950 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2951   auto res = parseReg(Operands);
2952   if (res != MatchOperand_NoMatch) {
2953     return res;
2954   } else if (isModifier()) {
2955     return MatchOperand_NoMatch;
2956   } else {
2957     return parseImm(Operands, HasSP3AbsMod);
2958   }
2959 }
2960 
2961 bool
2962 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2963   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2964     const auto &str = Token.getString();
2965     return str == "abs" || str == "neg" || str == "sext";
2966   }
2967   return false;
2968 }
2969 
2970 bool
2971 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2972   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2973 }
2974 
2975 bool
2976 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2977   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2978 }
2979 
2980 bool
2981 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2982   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2983 }
2984 
2985 // Check if this is an operand modifier or an opcode modifier
2986 // which may look like an expression but it is not. We should
2987 // avoid parsing these modifiers as expressions. Currently
2988 // recognized sequences are:
2989 //   |...|
2990 //   abs(...)
2991 //   neg(...)
2992 //   sext(...)
2993 //   -reg
2994 //   -|...|
2995 //   -abs(...)
2996 //   name:...
2997 // Note that simple opcode modifiers like 'gds' may be parsed as
2998 // expressions; this is a special case. See getExpressionAsToken.
2999 //
3000 bool
3001 AMDGPUAsmParser::isModifier() {
3002 
3003   AsmToken Tok = getToken();
3004   AsmToken NextToken[2];
3005   peekTokens(NextToken);
3006 
3007   return isOperandModifier(Tok, NextToken[0]) ||
3008          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3009          isOpcodeModifierWithVal(Tok, NextToken[0]);
3010 }
3011 
3012 // Check if the current token is an SP3 'neg' modifier.
3013 // Currently this modifier is allowed in the following context:
3014 //
3015 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3016 // 2. Before an 'abs' modifier: -abs(...)
3017 // 3. Before an SP3 'abs' modifier: -|...|
3018 //
3019 // In all other cases "-" is handled as a part
3020 // of an expression that follows the sign.
3021 //
3022 // Note: When "-" is followed by an integer literal,
3023 // this is interpreted as integer negation rather
3024 // than a floating-point NEG modifier applied to N.
3025 // Beside being contr-intuitive, such use of floating-point
3026 // NEG modifier would have resulted in different meaning
3027 // of integer literals used with VOP1/2/C and VOP3,
3028 // for example:
3029 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3030 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3031 // Negative fp literals with preceding "-" are
3032 // handled likewise for uniformity
3033 //
3034 bool
3035 AMDGPUAsmParser::parseSP3NegModifier() {
3036 
3037   AsmToken NextToken[2];
3038   peekTokens(NextToken);
3039 
3040   if (isToken(AsmToken::Minus) &&
3041       (isRegister(NextToken[0], NextToken[1]) ||
3042        NextToken[0].is(AsmToken::Pipe) ||
3043        isId(NextToken[0], "abs"))) {
3044     lex();
3045     return true;
3046   }
3047 
3048   return false;
3049 }
3050 
3051 OperandMatchResultTy
3052 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3053                                               bool AllowImm) {
3054   bool Neg, SP3Neg;
3055   bool Abs, SP3Abs;
3056   SMLoc Loc;
3057 
3058   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3059   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3060     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3061     return MatchOperand_ParseFail;
3062   }
3063 
3064   SP3Neg = parseSP3NegModifier();
3065 
3066   Loc = getLoc();
3067   Neg = trySkipId("neg");
3068   if (Neg && SP3Neg) {
3069     Error(Loc, "expected register or immediate");
3070     return MatchOperand_ParseFail;
3071   }
3072   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3073     return MatchOperand_ParseFail;
3074 
3075   Abs = trySkipId("abs");
3076   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3077     return MatchOperand_ParseFail;
3078 
3079   Loc = getLoc();
3080   SP3Abs = trySkipToken(AsmToken::Pipe);
3081   if (Abs && SP3Abs) {
3082     Error(Loc, "expected register or immediate");
3083     return MatchOperand_ParseFail;
3084   }
3085 
3086   OperandMatchResultTy Res;
3087   if (AllowImm) {
3088     Res = parseRegOrImm(Operands, SP3Abs);
3089   } else {
3090     Res = parseReg(Operands);
3091   }
3092   if (Res != MatchOperand_Success) {
3093     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3094   }
3095 
3096   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3097     return MatchOperand_ParseFail;
3098   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3099     return MatchOperand_ParseFail;
3100   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3101     return MatchOperand_ParseFail;
3102 
3103   AMDGPUOperand::Modifiers Mods;
3104   Mods.Abs = Abs || SP3Abs;
3105   Mods.Neg = Neg || SP3Neg;
3106 
3107   if (Mods.hasFPModifiers()) {
3108     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3109     if (Op.isExpr()) {
3110       Error(Op.getStartLoc(), "expected an absolute expression");
3111       return MatchOperand_ParseFail;
3112     }
3113     Op.setModifiers(Mods);
3114   }
3115   return MatchOperand_Success;
3116 }
3117 
3118 OperandMatchResultTy
3119 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3120                                                bool AllowImm) {
3121   bool Sext = trySkipId("sext");
3122   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3123     return MatchOperand_ParseFail;
3124 
3125   OperandMatchResultTy Res;
3126   if (AllowImm) {
3127     Res = parseRegOrImm(Operands);
3128   } else {
3129     Res = parseReg(Operands);
3130   }
3131   if (Res != MatchOperand_Success) {
3132     return Sext? MatchOperand_ParseFail : Res;
3133   }
3134 
3135   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3136     return MatchOperand_ParseFail;
3137 
3138   AMDGPUOperand::Modifiers Mods;
3139   Mods.Sext = Sext;
3140 
3141   if (Mods.hasIntModifiers()) {
3142     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3143     if (Op.isExpr()) {
3144       Error(Op.getStartLoc(), "expected an absolute expression");
3145       return MatchOperand_ParseFail;
3146     }
3147     Op.setModifiers(Mods);
3148   }
3149 
3150   return MatchOperand_Success;
3151 }
3152 
3153 OperandMatchResultTy
3154 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3155   return parseRegOrImmWithFPInputMods(Operands, false);
3156 }
3157 
3158 OperandMatchResultTy
3159 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3160   return parseRegOrImmWithIntInputMods(Operands, false);
3161 }
3162 
3163 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3164   auto Loc = getLoc();
3165   if (trySkipId("off")) {
3166     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3167                                                 AMDGPUOperand::ImmTyOff, false));
3168     return MatchOperand_Success;
3169   }
3170 
3171   if (!isRegister())
3172     return MatchOperand_NoMatch;
3173 
3174   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3175   if (Reg) {
3176     Operands.push_back(std::move(Reg));
3177     return MatchOperand_Success;
3178   }
3179 
3180   return MatchOperand_ParseFail;
3181 
3182 }
3183 
3184 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3185   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3186 
3187   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3188       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3189       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3190       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3191     return Match_InvalidOperand;
3192 
3193   if ((TSFlags & SIInstrFlags::VOP3) &&
3194       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3195       getForcedEncodingSize() != 64)
3196     return Match_PreferE32;
3197 
3198   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3199       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3200     // v_mac_f32/16 allow only dst_sel == DWORD;
3201     auto OpNum =
3202         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3203     const auto &Op = Inst.getOperand(OpNum);
3204     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3205       return Match_InvalidOperand;
3206     }
3207   }
3208 
3209   return Match_Success;
3210 }
3211 
3212 static ArrayRef<unsigned> getAllVariants() {
3213   static const unsigned Variants[] = {
3214     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3215     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3216   };
3217 
3218   return makeArrayRef(Variants);
3219 }
3220 
3221 // What asm variants we should check
3222 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3223   if (getForcedEncodingSize() == 32) {
3224     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3225     return makeArrayRef(Variants);
3226   }
3227 
3228   if (isForcedVOP3()) {
3229     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3230     return makeArrayRef(Variants);
3231   }
3232 
3233   if (isForcedSDWA()) {
3234     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3235                                         AMDGPUAsmVariants::SDWA9};
3236     return makeArrayRef(Variants);
3237   }
3238 
3239   if (isForcedDPP()) {
3240     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3241     return makeArrayRef(Variants);
3242   }
3243 
3244   return getAllVariants();
3245 }
3246 
3247 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3248   if (getForcedEncodingSize() == 32)
3249     return "e32";
3250 
3251   if (isForcedVOP3())
3252     return "e64";
3253 
3254   if (isForcedSDWA())
3255     return "sdwa";
3256 
3257   if (isForcedDPP())
3258     return "dpp";
3259 
3260   return "";
3261 }
3262 
3263 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3264   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3265   const unsigned Num = Desc.getNumImplicitUses();
3266   for (unsigned i = 0; i < Num; ++i) {
3267     unsigned Reg = Desc.ImplicitUses[i];
3268     switch (Reg) {
3269     case AMDGPU::FLAT_SCR:
3270     case AMDGPU::VCC:
3271     case AMDGPU::VCC_LO:
3272     case AMDGPU::VCC_HI:
3273     case AMDGPU::M0:
3274       return Reg;
3275     default:
3276       break;
3277     }
3278   }
3279   return AMDGPU::NoRegister;
3280 }
3281 
3282 // NB: This code is correct only when used to check constant
3283 // bus limitations because GFX7 support no f16 inline constants.
3284 // Note that there are no cases when a GFX7 opcode violates
3285 // constant bus limitations due to the use of an f16 constant.
3286 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3287                                        unsigned OpIdx) const {
3288   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3289 
3290   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3291     return false;
3292   }
3293 
3294   const MCOperand &MO = Inst.getOperand(OpIdx);
3295 
3296   int64_t Val = MO.getImm();
3297   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3298 
3299   switch (OpSize) { // expected operand size
3300   case 8:
3301     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3302   case 4:
3303     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3304   case 2: {
3305     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3306     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3307         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3308         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3309       return AMDGPU::isInlinableIntLiteral(Val);
3310 
3311     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3312         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3313         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3314       return AMDGPU::isInlinableIntLiteralV216(Val);
3315 
3316     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3317         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3318         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3319       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3320 
3321     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3322   }
3323   default:
3324     llvm_unreachable("invalid operand size");
3325   }
3326 }
3327 
3328 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3329   if (!isGFX10Plus())
3330     return 1;
3331 
3332   switch (Opcode) {
3333   // 64-bit shift instructions can use only one scalar value input
3334   case AMDGPU::V_LSHLREV_B64_e64:
3335   case AMDGPU::V_LSHLREV_B64_gfx10:
3336   case AMDGPU::V_LSHRREV_B64_e64:
3337   case AMDGPU::V_LSHRREV_B64_gfx10:
3338   case AMDGPU::V_ASHRREV_I64_e64:
3339   case AMDGPU::V_ASHRREV_I64_gfx10:
3340   case AMDGPU::V_LSHL_B64_e64:
3341   case AMDGPU::V_LSHR_B64_e64:
3342   case AMDGPU::V_ASHR_I64_e64:
3343     return 1;
3344   default:
3345     return 2;
3346   }
3347 }
3348 
3349 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3350   const MCOperand &MO = Inst.getOperand(OpIdx);
3351   if (MO.isImm()) {
3352     return !isInlineConstant(Inst, OpIdx);
3353   } else if (MO.isReg()) {
3354     auto Reg = MO.getReg();
3355     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3356     auto PReg = mc2PseudoReg(Reg);
3357     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3358   } else {
3359     return true;
3360   }
3361 }
3362 
3363 bool
3364 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3365                                                 const OperandVector &Operands) {
3366   const unsigned Opcode = Inst.getOpcode();
3367   const MCInstrDesc &Desc = MII.get(Opcode);
3368   unsigned LastSGPR = AMDGPU::NoRegister;
3369   unsigned ConstantBusUseCount = 0;
3370   unsigned NumLiterals = 0;
3371   unsigned LiteralSize;
3372 
3373   if (Desc.TSFlags &
3374       (SIInstrFlags::VOPC |
3375        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3376        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3377        SIInstrFlags::SDWA)) {
3378     // Check special imm operands (used by madmk, etc)
3379     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3380       ++NumLiterals;
3381       LiteralSize = 4;
3382     }
3383 
3384     SmallDenseSet<unsigned> SGPRsUsed;
3385     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3386     if (SGPRUsed != AMDGPU::NoRegister) {
3387       SGPRsUsed.insert(SGPRUsed);
3388       ++ConstantBusUseCount;
3389     }
3390 
3391     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3392     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3393     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3394 
3395     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3396 
3397     for (int OpIdx : OpIndices) {
3398       if (OpIdx == -1) break;
3399 
3400       const MCOperand &MO = Inst.getOperand(OpIdx);
3401       if (usesConstantBus(Inst, OpIdx)) {
3402         if (MO.isReg()) {
3403           LastSGPR = mc2PseudoReg(MO.getReg());
3404           // Pairs of registers with a partial intersections like these
3405           //   s0, s[0:1]
3406           //   flat_scratch_lo, flat_scratch
3407           //   flat_scratch_lo, flat_scratch_hi
3408           // are theoretically valid but they are disabled anyway.
3409           // Note that this code mimics SIInstrInfo::verifyInstruction
3410           if (!SGPRsUsed.count(LastSGPR)) {
3411             SGPRsUsed.insert(LastSGPR);
3412             ++ConstantBusUseCount;
3413           }
3414         } else { // Expression or a literal
3415 
3416           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3417             continue; // special operand like VINTERP attr_chan
3418 
3419           // An instruction may use only one literal.
3420           // This has been validated on the previous step.
3421           // See validateVOPLiteral.
3422           // This literal may be used as more than one operand.
3423           // If all these operands are of the same size,
3424           // this literal counts as one scalar value.
3425           // Otherwise it counts as 2 scalar values.
3426           // See "GFX10 Shader Programming", section 3.6.2.3.
3427 
3428           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3429           if (Size < 4) Size = 4;
3430 
3431           if (NumLiterals == 0) {
3432             NumLiterals = 1;
3433             LiteralSize = Size;
3434           } else if (LiteralSize != Size) {
3435             NumLiterals = 2;
3436           }
3437         }
3438       }
3439     }
3440   }
3441   ConstantBusUseCount += NumLiterals;
3442 
3443   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3444     return true;
3445 
3446   SMLoc LitLoc = getLitLoc(Operands);
3447   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3448   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3449   Error(Loc, "invalid operand (violates constant bus restrictions)");
3450   return false;
3451 }
3452 
3453 bool
3454 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3455                                                  const OperandVector &Operands) {
3456   const unsigned Opcode = Inst.getOpcode();
3457   const MCInstrDesc &Desc = MII.get(Opcode);
3458 
3459   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3460   if (DstIdx == -1 ||
3461       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3462     return true;
3463   }
3464 
3465   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3466 
3467   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3468   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3469   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3470 
3471   assert(DstIdx != -1);
3472   const MCOperand &Dst = Inst.getOperand(DstIdx);
3473   assert(Dst.isReg());
3474 
3475   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3476 
3477   for (int SrcIdx : SrcIndices) {
3478     if (SrcIdx == -1) break;
3479     const MCOperand &Src = Inst.getOperand(SrcIdx);
3480     if (Src.isReg()) {
3481       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3482         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3483         Error(getRegLoc(SrcReg, Operands),
3484           "destination must be different than all sources");
3485         return false;
3486       }
3487     }
3488   }
3489 
3490   return true;
3491 }
3492 
3493 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3494 
3495   const unsigned Opc = Inst.getOpcode();
3496   const MCInstrDesc &Desc = MII.get(Opc);
3497 
3498   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3499     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3500     assert(ClampIdx != -1);
3501     return Inst.getOperand(ClampIdx).getImm() == 0;
3502   }
3503 
3504   return true;
3505 }
3506 
3507 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3508 
3509   const unsigned Opc = Inst.getOpcode();
3510   const MCInstrDesc &Desc = MII.get(Opc);
3511 
3512   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3513     return None;
3514 
3515   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3516   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3517   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3518 
3519   assert(VDataIdx != -1);
3520 
3521   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3522     return None;
3523 
3524   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3525   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3526   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3527   if (DMask == 0)
3528     DMask = 1;
3529 
3530   bool isPackedD16 = false;
3531   unsigned DataSize =
3532     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3533   if (hasPackedD16()) {
3534     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3535     isPackedD16 = D16Idx >= 0;
3536     if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3537       DataSize = (DataSize + 1) / 2;
3538   }
3539 
3540   if ((VDataSize / 4) == DataSize + TFESize)
3541     return None;
3542 
3543   return StringRef(isPackedD16
3544                        ? "image data size does not match dmask, d16 and tfe"
3545                        : "image data size does not match dmask and tfe");
3546 }
3547 
3548 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3549   const unsigned Opc = Inst.getOpcode();
3550   const MCInstrDesc &Desc = MII.get(Opc);
3551 
3552   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3553     return true;
3554 
3555   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3556 
3557   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3558       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3559   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3560   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3561   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3562   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3563 
3564   assert(VAddr0Idx != -1);
3565   assert(SrsrcIdx != -1);
3566   assert(SrsrcIdx > VAddr0Idx);
3567 
3568   if (DimIdx == -1)
3569     return true; // intersect_ray
3570 
3571   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3572   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3573   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3574   unsigned ActualAddrSize =
3575       IsNSA ? SrsrcIdx - VAddr0Idx
3576             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3577   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3578 
3579   unsigned ExpectedAddrSize =
3580       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3581 
3582   if (!IsNSA) {
3583     if (ExpectedAddrSize > 8)
3584       ExpectedAddrSize = 16;
3585 
3586     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3587     // This provides backward compatibility for assembly created
3588     // before 160b/192b/224b types were directly supported.
3589     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3590       return true;
3591   }
3592 
3593   return ActualAddrSize == ExpectedAddrSize;
3594 }
3595 
3596 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3597 
3598   const unsigned Opc = Inst.getOpcode();
3599   const MCInstrDesc &Desc = MII.get(Opc);
3600 
3601   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3602     return true;
3603   if (!Desc.mayLoad() || !Desc.mayStore())
3604     return true; // Not atomic
3605 
3606   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3607   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3608 
3609   // This is an incomplete check because image_atomic_cmpswap
3610   // may only use 0x3 and 0xf while other atomic operations
3611   // may use 0x1 and 0x3. However these limitations are
3612   // verified when we check that dmask matches dst size.
3613   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3614 }
3615 
3616 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3617 
3618   const unsigned Opc = Inst.getOpcode();
3619   const MCInstrDesc &Desc = MII.get(Opc);
3620 
3621   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3622     return true;
3623 
3624   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3625   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3626 
3627   // GATHER4 instructions use dmask in a different fashion compared to
3628   // other MIMG instructions. The only useful DMASK values are
3629   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3630   // (red,red,red,red) etc.) The ISA document doesn't mention
3631   // this.
3632   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3633 }
3634 
3635 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3636   const unsigned Opc = Inst.getOpcode();
3637   const MCInstrDesc &Desc = MII.get(Opc);
3638 
3639   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3640     return true;
3641 
3642   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3643   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3644       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3645 
3646   if (!BaseOpcode->MSAA)
3647     return true;
3648 
3649   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3650   assert(DimIdx != -1);
3651 
3652   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3653   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3654 
3655   return DimInfo->MSAA;
3656 }
3657 
3658 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3659 {
3660   switch (Opcode) {
3661   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3662   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3663   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3664     return true;
3665   default:
3666     return false;
3667   }
3668 }
3669 
3670 // movrels* opcodes should only allow VGPRS as src0.
3671 // This is specified in .td description for vop1/vop3,
3672 // but sdwa is handled differently. See isSDWAOperand.
3673 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3674                                       const OperandVector &Operands) {
3675 
3676   const unsigned Opc = Inst.getOpcode();
3677   const MCInstrDesc &Desc = MII.get(Opc);
3678 
3679   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3680     return true;
3681 
3682   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3683   assert(Src0Idx != -1);
3684 
3685   SMLoc ErrLoc;
3686   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3687   if (Src0.isReg()) {
3688     auto Reg = mc2PseudoReg(Src0.getReg());
3689     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3690     if (!isSGPR(Reg, TRI))
3691       return true;
3692     ErrLoc = getRegLoc(Reg, Operands);
3693   } else {
3694     ErrLoc = getConstLoc(Operands);
3695   }
3696 
3697   Error(ErrLoc, "source operand must be a VGPR");
3698   return false;
3699 }
3700 
3701 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3702                                           const OperandVector &Operands) {
3703 
3704   const unsigned Opc = Inst.getOpcode();
3705 
3706   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3707     return true;
3708 
3709   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3710   assert(Src0Idx != -1);
3711 
3712   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3713   if (!Src0.isReg())
3714     return true;
3715 
3716   auto Reg = mc2PseudoReg(Src0.getReg());
3717   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3718   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3719     Error(getRegLoc(Reg, Operands),
3720           "source operand must be either a VGPR or an inline constant");
3721     return false;
3722   }
3723 
3724   return true;
3725 }
3726 
3727 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3728                                    const OperandVector &Operands) {
3729   const unsigned Opc = Inst.getOpcode();
3730   const MCInstrDesc &Desc = MII.get(Opc);
3731 
3732   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3733     return true;
3734 
3735   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3736   if (Src2Idx == -1)
3737     return true;
3738 
3739   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3740   if (!Src2.isReg())
3741     return true;
3742 
3743   MCRegister Src2Reg = Src2.getReg();
3744   MCRegister DstReg = Inst.getOperand(0).getReg();
3745   if (Src2Reg == DstReg)
3746     return true;
3747 
3748   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3749   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3750     return true;
3751 
3752   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3753     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3754           "source 2 operand must not partially overlap with dst");
3755     return false;
3756   }
3757 
3758   return true;
3759 }
3760 
3761 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3762   switch (Inst.getOpcode()) {
3763   default:
3764     return true;
3765   case V_DIV_SCALE_F32_gfx6_gfx7:
3766   case V_DIV_SCALE_F32_vi:
3767   case V_DIV_SCALE_F32_gfx10:
3768   case V_DIV_SCALE_F64_gfx6_gfx7:
3769   case V_DIV_SCALE_F64_vi:
3770   case V_DIV_SCALE_F64_gfx10:
3771     break;
3772   }
3773 
3774   // TODO: Check that src0 = src1 or src2.
3775 
3776   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3777                     AMDGPU::OpName::src2_modifiers,
3778                     AMDGPU::OpName::src2_modifiers}) {
3779     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3780             .getImm() &
3781         SISrcMods::ABS) {
3782       return false;
3783     }
3784   }
3785 
3786   return true;
3787 }
3788 
3789 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3790 
3791   const unsigned Opc = Inst.getOpcode();
3792   const MCInstrDesc &Desc = MII.get(Opc);
3793 
3794   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3795     return true;
3796 
3797   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3798   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3799     if (isCI() || isSI())
3800       return false;
3801   }
3802 
3803   return true;
3804 }
3805 
3806 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3807   const unsigned Opc = Inst.getOpcode();
3808   const MCInstrDesc &Desc = MII.get(Opc);
3809 
3810   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3811     return true;
3812 
3813   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3814   if (DimIdx < 0)
3815     return true;
3816 
3817   long Imm = Inst.getOperand(DimIdx).getImm();
3818   if (Imm < 0 || Imm >= 8)
3819     return false;
3820 
3821   return true;
3822 }
3823 
3824 static bool IsRevOpcode(const unsigned Opcode)
3825 {
3826   switch (Opcode) {
3827   case AMDGPU::V_SUBREV_F32_e32:
3828   case AMDGPU::V_SUBREV_F32_e64:
3829   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3830   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3831   case AMDGPU::V_SUBREV_F32_e32_vi:
3832   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3833   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3834   case AMDGPU::V_SUBREV_F32_e64_vi:
3835 
3836   case AMDGPU::V_SUBREV_CO_U32_e32:
3837   case AMDGPU::V_SUBREV_CO_U32_e64:
3838   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3839   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3840 
3841   case AMDGPU::V_SUBBREV_U32_e32:
3842   case AMDGPU::V_SUBBREV_U32_e64:
3843   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3844   case AMDGPU::V_SUBBREV_U32_e32_vi:
3845   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3846   case AMDGPU::V_SUBBREV_U32_e64_vi:
3847 
3848   case AMDGPU::V_SUBREV_U32_e32:
3849   case AMDGPU::V_SUBREV_U32_e64:
3850   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3851   case AMDGPU::V_SUBREV_U32_e32_vi:
3852   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3853   case AMDGPU::V_SUBREV_U32_e64_vi:
3854 
3855   case AMDGPU::V_SUBREV_F16_e32:
3856   case AMDGPU::V_SUBREV_F16_e64:
3857   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3858   case AMDGPU::V_SUBREV_F16_e32_vi:
3859   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3860   case AMDGPU::V_SUBREV_F16_e64_vi:
3861 
3862   case AMDGPU::V_SUBREV_U16_e32:
3863   case AMDGPU::V_SUBREV_U16_e64:
3864   case AMDGPU::V_SUBREV_U16_e32_vi:
3865   case AMDGPU::V_SUBREV_U16_e64_vi:
3866 
3867   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3868   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3869   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3870 
3871   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3872   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3873 
3874   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3875   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3876 
3877   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3878   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3879 
3880   case AMDGPU::V_LSHRREV_B32_e32:
3881   case AMDGPU::V_LSHRREV_B32_e64:
3882   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3883   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3884   case AMDGPU::V_LSHRREV_B32_e32_vi:
3885   case AMDGPU::V_LSHRREV_B32_e64_vi:
3886   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3887   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3888 
3889   case AMDGPU::V_ASHRREV_I32_e32:
3890   case AMDGPU::V_ASHRREV_I32_e64:
3891   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3892   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3893   case AMDGPU::V_ASHRREV_I32_e32_vi:
3894   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3895   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3896   case AMDGPU::V_ASHRREV_I32_e64_vi:
3897 
3898   case AMDGPU::V_LSHLREV_B32_e32:
3899   case AMDGPU::V_LSHLREV_B32_e64:
3900   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3901   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3902   case AMDGPU::V_LSHLREV_B32_e32_vi:
3903   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3904   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3905   case AMDGPU::V_LSHLREV_B32_e64_vi:
3906 
3907   case AMDGPU::V_LSHLREV_B16_e32:
3908   case AMDGPU::V_LSHLREV_B16_e64:
3909   case AMDGPU::V_LSHLREV_B16_e32_vi:
3910   case AMDGPU::V_LSHLREV_B16_e64_vi:
3911   case AMDGPU::V_LSHLREV_B16_gfx10:
3912 
3913   case AMDGPU::V_LSHRREV_B16_e32:
3914   case AMDGPU::V_LSHRREV_B16_e64:
3915   case AMDGPU::V_LSHRREV_B16_e32_vi:
3916   case AMDGPU::V_LSHRREV_B16_e64_vi:
3917   case AMDGPU::V_LSHRREV_B16_gfx10:
3918 
3919   case AMDGPU::V_ASHRREV_I16_e32:
3920   case AMDGPU::V_ASHRREV_I16_e64:
3921   case AMDGPU::V_ASHRREV_I16_e32_vi:
3922   case AMDGPU::V_ASHRREV_I16_e64_vi:
3923   case AMDGPU::V_ASHRREV_I16_gfx10:
3924 
3925   case AMDGPU::V_LSHLREV_B64_e64:
3926   case AMDGPU::V_LSHLREV_B64_gfx10:
3927   case AMDGPU::V_LSHLREV_B64_vi:
3928 
3929   case AMDGPU::V_LSHRREV_B64_e64:
3930   case AMDGPU::V_LSHRREV_B64_gfx10:
3931   case AMDGPU::V_LSHRREV_B64_vi:
3932 
3933   case AMDGPU::V_ASHRREV_I64_e64:
3934   case AMDGPU::V_ASHRREV_I64_gfx10:
3935   case AMDGPU::V_ASHRREV_I64_vi:
3936 
3937   case AMDGPU::V_PK_LSHLREV_B16:
3938   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3939   case AMDGPU::V_PK_LSHLREV_B16_vi:
3940 
3941   case AMDGPU::V_PK_LSHRREV_B16:
3942   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3943   case AMDGPU::V_PK_LSHRREV_B16_vi:
3944   case AMDGPU::V_PK_ASHRREV_I16:
3945   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3946   case AMDGPU::V_PK_ASHRREV_I16_vi:
3947     return true;
3948   default:
3949     return false;
3950   }
3951 }
3952 
3953 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3954 
3955   using namespace SIInstrFlags;
3956   const unsigned Opcode = Inst.getOpcode();
3957   const MCInstrDesc &Desc = MII.get(Opcode);
3958 
3959   // lds_direct register is defined so that it can be used
3960   // with 9-bit operands only. Ignore encodings which do not accept these.
3961   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3962   if ((Desc.TSFlags & Enc) == 0)
3963     return None;
3964 
3965   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3966     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3967     if (SrcIdx == -1)
3968       break;
3969     const auto &Src = Inst.getOperand(SrcIdx);
3970     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3971 
3972       if (isGFX90A())
3973         return StringRef("lds_direct is not supported on this GPU");
3974 
3975       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3976         return StringRef("lds_direct cannot be used with this instruction");
3977 
3978       if (SrcName != OpName::src0)
3979         return StringRef("lds_direct may be used as src0 only");
3980     }
3981   }
3982 
3983   return None;
3984 }
3985 
3986 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3987   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3988     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3989     if (Op.isFlatOffset())
3990       return Op.getStartLoc();
3991   }
3992   return getLoc();
3993 }
3994 
3995 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3996                                          const OperandVector &Operands) {
3997   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3998   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3999     return true;
4000 
4001   auto Opcode = Inst.getOpcode();
4002   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4003   assert(OpNum != -1);
4004 
4005   const auto &Op = Inst.getOperand(OpNum);
4006   if (!hasFlatOffsets() && Op.getImm() != 0) {
4007     Error(getFlatOffsetLoc(Operands),
4008           "flat offset modifier is not supported on this GPU");
4009     return false;
4010   }
4011 
4012   // For FLAT segment the offset must be positive;
4013   // MSB is ignored and forced to zero.
4014   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
4015     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4016     if (!isIntN(OffsetSize, Op.getImm())) {
4017       Error(getFlatOffsetLoc(Operands),
4018             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4019       return false;
4020     }
4021   } else {
4022     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4023     if (!isUIntN(OffsetSize, Op.getImm())) {
4024       Error(getFlatOffsetLoc(Operands),
4025             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4026       return false;
4027     }
4028   }
4029 
4030   return true;
4031 }
4032 
4033 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4034   // Start with second operand because SMEM Offset cannot be dst or src0.
4035   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4036     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4037     if (Op.isSMEMOffset())
4038       return Op.getStartLoc();
4039   }
4040   return getLoc();
4041 }
4042 
4043 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4044                                          const OperandVector &Operands) {
4045   if (isCI() || isSI())
4046     return true;
4047 
4048   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4049   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4050     return true;
4051 
4052   auto Opcode = Inst.getOpcode();
4053   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4054   if (OpNum == -1)
4055     return true;
4056 
4057   const auto &Op = Inst.getOperand(OpNum);
4058   if (!Op.isImm())
4059     return true;
4060 
4061   uint64_t Offset = Op.getImm();
4062   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4063   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4064       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4065     return true;
4066 
4067   Error(getSMEMOffsetLoc(Operands),
4068         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4069                                "expected a 21-bit signed offset");
4070 
4071   return false;
4072 }
4073 
4074 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4075   unsigned Opcode = Inst.getOpcode();
4076   const MCInstrDesc &Desc = MII.get(Opcode);
4077   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4078     return true;
4079 
4080   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4081   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4082 
4083   const int OpIndices[] = { Src0Idx, Src1Idx };
4084 
4085   unsigned NumExprs = 0;
4086   unsigned NumLiterals = 0;
4087   uint32_t LiteralValue;
4088 
4089   for (int OpIdx : OpIndices) {
4090     if (OpIdx == -1) break;
4091 
4092     const MCOperand &MO = Inst.getOperand(OpIdx);
4093     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4094     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4095       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4096         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4097         if (NumLiterals == 0 || LiteralValue != Value) {
4098           LiteralValue = Value;
4099           ++NumLiterals;
4100         }
4101       } else if (MO.isExpr()) {
4102         ++NumExprs;
4103       }
4104     }
4105   }
4106 
4107   return NumLiterals + NumExprs <= 1;
4108 }
4109 
4110 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4111   const unsigned Opc = Inst.getOpcode();
4112   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4113       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4114     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4115     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4116 
4117     if (OpSel & ~3)
4118       return false;
4119   }
4120 
4121   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4122     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4123     if (OpSelIdx != -1) {
4124       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4125         return false;
4126     }
4127     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4128     if (OpSelHiIdx != -1) {
4129       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4130         return false;
4131     }
4132   }
4133 
4134   return true;
4135 }
4136 
4137 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4138                                   const OperandVector &Operands) {
4139   const unsigned Opc = Inst.getOpcode();
4140   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4141   if (DppCtrlIdx < 0)
4142     return true;
4143   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4144 
4145   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4146     // DPP64 is supported for row_newbcast only.
4147     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4148     if (Src0Idx >= 0 &&
4149         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4150       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4151       Error(S, "64 bit dpp only supports row_newbcast");
4152       return false;
4153     }
4154   }
4155 
4156   return true;
4157 }
4158 
4159 // Check if VCC register matches wavefront size
4160 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4161   auto FB = getFeatureBits();
4162   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4163     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4164 }
4165 
4166 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4167 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4168                                          const OperandVector &Operands) {
4169   unsigned Opcode = Inst.getOpcode();
4170   const MCInstrDesc &Desc = MII.get(Opcode);
4171   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4172   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4173       ImmIdx == -1)
4174     return true;
4175 
4176   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4177   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4178   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4179 
4180   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4181 
4182   unsigned NumExprs = 0;
4183   unsigned NumLiterals = 0;
4184   uint32_t LiteralValue;
4185 
4186   for (int OpIdx : OpIndices) {
4187     if (OpIdx == -1)
4188       continue;
4189 
4190     const MCOperand &MO = Inst.getOperand(OpIdx);
4191     if (!MO.isImm() && !MO.isExpr())
4192       continue;
4193     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4194       continue;
4195 
4196     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4197         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4198       Error(getConstLoc(Operands),
4199             "inline constants are not allowed for this operand");
4200       return false;
4201     }
4202 
4203     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4204       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4205       if (NumLiterals == 0 || LiteralValue != Value) {
4206         LiteralValue = Value;
4207         ++NumLiterals;
4208       }
4209     } else if (MO.isExpr()) {
4210       ++NumExprs;
4211     }
4212   }
4213   NumLiterals += NumExprs;
4214 
4215   if (!NumLiterals)
4216     return true;
4217 
4218   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4219     Error(getLitLoc(Operands), "literal operands are not supported");
4220     return false;
4221   }
4222 
4223   if (NumLiterals > 1) {
4224     Error(getLitLoc(Operands), "only one literal operand is allowed");
4225     return false;
4226   }
4227 
4228   return true;
4229 }
4230 
4231 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4232 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4233                          const MCRegisterInfo *MRI) {
4234   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4235   if (OpIdx < 0)
4236     return -1;
4237 
4238   const MCOperand &Op = Inst.getOperand(OpIdx);
4239   if (!Op.isReg())
4240     return -1;
4241 
4242   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4243   auto Reg = Sub ? Sub : Op.getReg();
4244   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4245   return AGPR32.contains(Reg) ? 1 : 0;
4246 }
4247 
4248 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4249   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4250   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4251                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4252                   SIInstrFlags::DS)) == 0)
4253     return true;
4254 
4255   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4256                                                       : AMDGPU::OpName::vdata;
4257 
4258   const MCRegisterInfo *MRI = getMRI();
4259   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4260   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4261 
4262   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4263     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4264     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4265       return false;
4266   }
4267 
4268   auto FB = getFeatureBits();
4269   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4270     if (DataAreg < 0 || DstAreg < 0)
4271       return true;
4272     return DstAreg == DataAreg;
4273   }
4274 
4275   return DstAreg < 1 && DataAreg < 1;
4276 }
4277 
4278 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4279   auto FB = getFeatureBits();
4280   if (!FB[AMDGPU::FeatureGFX90AInsts])
4281     return true;
4282 
4283   const MCRegisterInfo *MRI = getMRI();
4284   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4285   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4286   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4287     const MCOperand &Op = Inst.getOperand(I);
4288     if (!Op.isReg())
4289       continue;
4290 
4291     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4292     if (!Sub)
4293       continue;
4294 
4295     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4296       return false;
4297     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4298       return false;
4299   }
4300 
4301   return true;
4302 }
4303 
4304 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4305   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4306     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4307     if (Op.isBLGP())
4308       return Op.getStartLoc();
4309   }
4310   return SMLoc();
4311 }
4312 
4313 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4314                                    const OperandVector &Operands) {
4315   unsigned Opc = Inst.getOpcode();
4316   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4317   if (BlgpIdx == -1)
4318     return true;
4319   SMLoc BLGPLoc = getBLGPLoc(Operands);
4320   if (!BLGPLoc.isValid())
4321     return true;
4322   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4323   auto FB = getFeatureBits();
4324   bool UsesNeg = false;
4325   if (FB[AMDGPU::FeatureGFX940Insts]) {
4326     switch (Opc) {
4327     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4328     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4329     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4330     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4331       UsesNeg = true;
4332     }
4333   }
4334 
4335   if (IsNeg == UsesNeg)
4336     return true;
4337 
4338   Error(BLGPLoc,
4339         UsesNeg ? "invalid modifier: blgp is not supported"
4340                 : "invalid modifier: neg is not supported");
4341 
4342   return false;
4343 }
4344 
4345 // gfx90a has an undocumented limitation:
4346 // DS_GWS opcodes must use even aligned registers.
4347 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4348                                   const OperandVector &Operands) {
4349   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4350     return true;
4351 
4352   int Opc = Inst.getOpcode();
4353   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4354       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4355     return true;
4356 
4357   const MCRegisterInfo *MRI = getMRI();
4358   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4359   int Data0Pos =
4360       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4361   assert(Data0Pos != -1);
4362   auto Reg = Inst.getOperand(Data0Pos).getReg();
4363   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4364   if (RegIdx & 1) {
4365     SMLoc RegLoc = getRegLoc(Reg, Operands);
4366     Error(RegLoc, "vgpr must be even aligned");
4367     return false;
4368   }
4369 
4370   return true;
4371 }
4372 
4373 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4374                                             const OperandVector &Operands,
4375                                             const SMLoc &IDLoc) {
4376   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4377                                            AMDGPU::OpName::cpol);
4378   if (CPolPos == -1)
4379     return true;
4380 
4381   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4382 
4383   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4384   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4385       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4386     Error(IDLoc, "invalid cache policy for SMRD instruction");
4387     return false;
4388   }
4389 
4390   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4391     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4392     StringRef CStr(S.getPointer());
4393     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4394     Error(S, "scc is not supported on this GPU");
4395     return false;
4396   }
4397 
4398   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4399     return true;
4400 
4401   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4402     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4403       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4404                               : "instruction must use glc");
4405       return false;
4406     }
4407   } else {
4408     if (CPol & CPol::GLC) {
4409       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4410       StringRef CStr(S.getPointer());
4411       S = SMLoc::getFromPointer(
4412           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4413       Error(S, isGFX940() ? "instruction must not use sc0"
4414                           : "instruction must not use glc");
4415       return false;
4416     }
4417   }
4418 
4419   return true;
4420 }
4421 
4422 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst,
4423                                          const OperandVector &Operands,
4424                                          const SMLoc &IDLoc) {
4425   if (isGFX940())
4426     return true;
4427 
4428   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4429   if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) !=
4430       (SIInstrFlags::VALU | SIInstrFlags::FLAT))
4431     return true;
4432   // This is FLAT LDS DMA.
4433 
4434   SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands);
4435   StringRef CStr(S.getPointer());
4436   if (!CStr.startswith("lds")) {
4437     // This is incorrectly selected LDS DMA version of a FLAT load opcode.
4438     // And LDS version should have 'lds' modifier, but it follows optional
4439     // operands so its absense is ignored by the matcher.
4440     Error(IDLoc, "invalid operands for instruction");
4441     return false;
4442   }
4443 
4444   return true;
4445 }
4446 
4447 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4448                                           const SMLoc &IDLoc,
4449                                           const OperandVector &Operands) {
4450   if (auto ErrMsg = validateLdsDirect(Inst)) {
4451     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4452     return false;
4453   }
4454   if (!validateSOPLiteral(Inst)) {
4455     Error(getLitLoc(Operands),
4456       "only one literal operand is allowed");
4457     return false;
4458   }
4459   if (!validateVOPLiteral(Inst, Operands)) {
4460     return false;
4461   }
4462   if (!validateConstantBusLimitations(Inst, Operands)) {
4463     return false;
4464   }
4465   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4466     return false;
4467   }
4468   if (!validateIntClampSupported(Inst)) {
4469     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4470       "integer clamping is not supported on this GPU");
4471     return false;
4472   }
4473   if (!validateOpSel(Inst)) {
4474     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4475       "invalid op_sel operand");
4476     return false;
4477   }
4478   if (!validateDPP(Inst, Operands)) {
4479     return false;
4480   }
4481   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4482   if (!validateMIMGD16(Inst)) {
4483     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4484       "d16 modifier is not supported on this GPU");
4485     return false;
4486   }
4487   if (!validateMIMGDim(Inst)) {
4488     Error(IDLoc, "dim modifier is required on this GPU");
4489     return false;
4490   }
4491   if (!validateMIMGMSAA(Inst)) {
4492     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4493           "invalid dim; must be MSAA type");
4494     return false;
4495   }
4496   if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4497     Error(IDLoc, *ErrMsg);
4498     return false;
4499   }
4500   if (!validateMIMGAddrSize(Inst)) {
4501     Error(IDLoc,
4502       "image address size does not match dim and a16");
4503     return false;
4504   }
4505   if (!validateMIMGAtomicDMask(Inst)) {
4506     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4507       "invalid atomic image dmask");
4508     return false;
4509   }
4510   if (!validateMIMGGatherDMask(Inst)) {
4511     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4512       "invalid image_gather dmask: only one bit must be set");
4513     return false;
4514   }
4515   if (!validateMovrels(Inst, Operands)) {
4516     return false;
4517   }
4518   if (!validateFlatOffset(Inst, Operands)) {
4519     return false;
4520   }
4521   if (!validateSMEMOffset(Inst, Operands)) {
4522     return false;
4523   }
4524   if (!validateMAIAccWrite(Inst, Operands)) {
4525     return false;
4526   }
4527   if (!validateMFMA(Inst, Operands)) {
4528     return false;
4529   }
4530   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4531     return false;
4532   }
4533 
4534   if (!validateAGPRLdSt(Inst)) {
4535     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4536     ? "invalid register class: data and dst should be all VGPR or AGPR"
4537     : "invalid register class: agpr loads and stores not supported on this GPU"
4538     );
4539     return false;
4540   }
4541   if (!validateVGPRAlign(Inst)) {
4542     Error(IDLoc,
4543       "invalid register class: vgpr tuples must be 64 bit aligned");
4544     return false;
4545   }
4546   if (!validateGWS(Inst, Operands)) {
4547     return false;
4548   }
4549 
4550   if (!validateBLGP(Inst, Operands)) {
4551     return false;
4552   }
4553 
4554   if (!validateDivScale(Inst)) {
4555     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4556     return false;
4557   }
4558   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4559     return false;
4560   }
4561 
4562   if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) {
4563     return false;
4564   }
4565 
4566   return true;
4567 }
4568 
4569 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4570                                             const FeatureBitset &FBS,
4571                                             unsigned VariantID = 0);
4572 
4573 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4574                                 const FeatureBitset &AvailableFeatures,
4575                                 unsigned VariantID);
4576 
4577 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4578                                        const FeatureBitset &FBS) {
4579   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4580 }
4581 
4582 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4583                                        const FeatureBitset &FBS,
4584                                        ArrayRef<unsigned> Variants) {
4585   for (auto Variant : Variants) {
4586     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4587       return true;
4588   }
4589 
4590   return false;
4591 }
4592 
4593 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4594                                                   const SMLoc &IDLoc) {
4595   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4596 
4597   // Check if requested instruction variant is supported.
4598   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4599     return false;
4600 
4601   // This instruction is not supported.
4602   // Clear any other pending errors because they are no longer relevant.
4603   getParser().clearPendingErrors();
4604 
4605   // Requested instruction variant is not supported.
4606   // Check if any other variants are supported.
4607   StringRef VariantName = getMatchedVariantName();
4608   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4609     return Error(IDLoc,
4610                  Twine(VariantName,
4611                        " variant of this instruction is not supported"));
4612   }
4613 
4614   // Finally check if this instruction is supported on any other GPU.
4615   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4616     return Error(IDLoc, "instruction not supported on this GPU");
4617   }
4618 
4619   // Instruction not supported on any GPU. Probably a typo.
4620   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4621   return Error(IDLoc, "invalid instruction" + Suggestion);
4622 }
4623 
4624 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4625                                               OperandVector &Operands,
4626                                               MCStreamer &Out,
4627                                               uint64_t &ErrorInfo,
4628                                               bool MatchingInlineAsm) {
4629   MCInst Inst;
4630   unsigned Result = Match_Success;
4631   for (auto Variant : getMatchedVariants()) {
4632     uint64_t EI;
4633     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4634                                   Variant);
4635     // We order match statuses from least to most specific. We use most specific
4636     // status as resulting
4637     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4638     if ((R == Match_Success) ||
4639         (R == Match_PreferE32) ||
4640         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4641         (R == Match_InvalidOperand && Result != Match_MissingFeature
4642                                    && Result != Match_PreferE32) ||
4643         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4644                                    && Result != Match_MissingFeature
4645                                    && Result != Match_PreferE32)) {
4646       Result = R;
4647       ErrorInfo = EI;
4648     }
4649     if (R == Match_Success)
4650       break;
4651   }
4652 
4653   if (Result == Match_Success) {
4654     if (!validateInstruction(Inst, IDLoc, Operands)) {
4655       return true;
4656     }
4657     Inst.setLoc(IDLoc);
4658     Out.emitInstruction(Inst, getSTI());
4659     return false;
4660   }
4661 
4662   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4663   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4664     return true;
4665   }
4666 
4667   switch (Result) {
4668   default: break;
4669   case Match_MissingFeature:
4670     // It has been verified that the specified instruction
4671     // mnemonic is valid. A match was found but it requires
4672     // features which are not supported on this GPU.
4673     return Error(IDLoc, "operands are not valid for this GPU or mode");
4674 
4675   case Match_InvalidOperand: {
4676     SMLoc ErrorLoc = IDLoc;
4677     if (ErrorInfo != ~0ULL) {
4678       if (ErrorInfo >= Operands.size()) {
4679         return Error(IDLoc, "too few operands for instruction");
4680       }
4681       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4682       if (ErrorLoc == SMLoc())
4683         ErrorLoc = IDLoc;
4684     }
4685     return Error(ErrorLoc, "invalid operand for instruction");
4686   }
4687 
4688   case Match_PreferE32:
4689     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4690                         "should be encoded as e32");
4691   case Match_MnemonicFail:
4692     llvm_unreachable("Invalid instructions should have been handled already");
4693   }
4694   llvm_unreachable("Implement any new match types added!");
4695 }
4696 
4697 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4698   int64_t Tmp = -1;
4699   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4700     return true;
4701   }
4702   if (getParser().parseAbsoluteExpression(Tmp)) {
4703     return true;
4704   }
4705   Ret = static_cast<uint32_t>(Tmp);
4706   return false;
4707 }
4708 
4709 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4710                                                uint32_t &Minor) {
4711   if (ParseAsAbsoluteExpression(Major))
4712     return TokError("invalid major version");
4713 
4714   if (!trySkipToken(AsmToken::Comma))
4715     return TokError("minor version number required, comma expected");
4716 
4717   if (ParseAsAbsoluteExpression(Minor))
4718     return TokError("invalid minor version");
4719 
4720   return false;
4721 }
4722 
4723 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4724   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4725     return TokError("directive only supported for amdgcn architecture");
4726 
4727   std::string TargetIDDirective;
4728   SMLoc TargetStart = getTok().getLoc();
4729   if (getParser().parseEscapedString(TargetIDDirective))
4730     return true;
4731 
4732   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4733   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4734     return getParser().Error(TargetRange.Start,
4735         (Twine(".amdgcn_target directive's target id ") +
4736          Twine(TargetIDDirective) +
4737          Twine(" does not match the specified target id ") +
4738          Twine(getTargetStreamer().getTargetID()->toString())).str());
4739 
4740   return false;
4741 }
4742 
4743 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4744   return Error(Range.Start, "value out of range", Range);
4745 }
4746 
4747 bool AMDGPUAsmParser::calculateGPRBlocks(
4748     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4749     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4750     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4751     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4752   // TODO(scott.linder): These calculations are duplicated from
4753   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4754   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4755 
4756   unsigned NumVGPRs = NextFreeVGPR;
4757   unsigned NumSGPRs = NextFreeSGPR;
4758 
4759   if (Version.Major >= 10)
4760     NumSGPRs = 0;
4761   else {
4762     unsigned MaxAddressableNumSGPRs =
4763         IsaInfo::getAddressableNumSGPRs(&getSTI());
4764 
4765     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4766         NumSGPRs > MaxAddressableNumSGPRs)
4767       return OutOfRangeError(SGPRRange);
4768 
4769     NumSGPRs +=
4770         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4771 
4772     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4773         NumSGPRs > MaxAddressableNumSGPRs)
4774       return OutOfRangeError(SGPRRange);
4775 
4776     if (Features.test(FeatureSGPRInitBug))
4777       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4778   }
4779 
4780   VGPRBlocks =
4781       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4782   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4783 
4784   return false;
4785 }
4786 
4787 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4788   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4789     return TokError("directive only supported for amdgcn architecture");
4790 
4791   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4792     return TokError("directive only supported for amdhsa OS");
4793 
4794   StringRef KernelName;
4795   if (getParser().parseIdentifier(KernelName))
4796     return true;
4797 
4798   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4799 
4800   StringSet<> Seen;
4801 
4802   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4803 
4804   SMRange VGPRRange;
4805   uint64_t NextFreeVGPR = 0;
4806   uint64_t AccumOffset = 0;
4807   uint64_t SharedVGPRCount = 0;
4808   SMRange SGPRRange;
4809   uint64_t NextFreeSGPR = 0;
4810 
4811   // Count the number of user SGPRs implied from the enabled feature bits.
4812   unsigned ImpliedUserSGPRCount = 0;
4813 
4814   // Track if the asm explicitly contains the directive for the user SGPR
4815   // count.
4816   Optional<unsigned> ExplicitUserSGPRCount;
4817   bool ReserveVCC = true;
4818   bool ReserveFlatScr = true;
4819   Optional<bool> EnableWavefrontSize32;
4820 
4821   while (true) {
4822     while (trySkipToken(AsmToken::EndOfStatement));
4823 
4824     StringRef ID;
4825     SMRange IDRange = getTok().getLocRange();
4826     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4827       return true;
4828 
4829     if (ID == ".end_amdhsa_kernel")
4830       break;
4831 
4832     if (Seen.find(ID) != Seen.end())
4833       return TokError(".amdhsa_ directives cannot be repeated");
4834     Seen.insert(ID);
4835 
4836     SMLoc ValStart = getLoc();
4837     int64_t IVal;
4838     if (getParser().parseAbsoluteExpression(IVal))
4839       return true;
4840     SMLoc ValEnd = getLoc();
4841     SMRange ValRange = SMRange(ValStart, ValEnd);
4842 
4843     if (IVal < 0)
4844       return OutOfRangeError(ValRange);
4845 
4846     uint64_t Val = IVal;
4847 
4848 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4849   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4850     return OutOfRangeError(RANGE);                                             \
4851   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4852 
4853     if (ID == ".amdhsa_group_segment_fixed_size") {
4854       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4855         return OutOfRangeError(ValRange);
4856       KD.group_segment_fixed_size = Val;
4857     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4858       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4859         return OutOfRangeError(ValRange);
4860       KD.private_segment_fixed_size = Val;
4861     } else if (ID == ".amdhsa_kernarg_size") {
4862       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4863         return OutOfRangeError(ValRange);
4864       KD.kernarg_size = Val;
4865     } else if (ID == ".amdhsa_user_sgpr_count") {
4866       ExplicitUserSGPRCount = Val;
4867     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4868       if (hasArchitectedFlatScratch())
4869         return Error(IDRange.Start,
4870                      "directive is not supported with architected flat scratch",
4871                      IDRange);
4872       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4873                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4874                        Val, ValRange);
4875       if (Val)
4876         ImpliedUserSGPRCount += 4;
4877     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4878       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4879                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4880                        ValRange);
4881       if (Val)
4882         ImpliedUserSGPRCount += 2;
4883     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4884       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4885                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4886                        ValRange);
4887       if (Val)
4888         ImpliedUserSGPRCount += 2;
4889     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4890       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4891                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4892                        Val, ValRange);
4893       if (Val)
4894         ImpliedUserSGPRCount += 2;
4895     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4896       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4897                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4898                        ValRange);
4899       if (Val)
4900         ImpliedUserSGPRCount += 2;
4901     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4902       if (hasArchitectedFlatScratch())
4903         return Error(IDRange.Start,
4904                      "directive is not supported with architected flat scratch",
4905                      IDRange);
4906       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4907                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4908                        ValRange);
4909       if (Val)
4910         ImpliedUserSGPRCount += 2;
4911     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4912       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4913                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4914                        Val, ValRange);
4915       if (Val)
4916         ImpliedUserSGPRCount += 1;
4917     } else if (ID == ".amdhsa_wavefront_size32") {
4918       if (IVersion.Major < 10)
4919         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4920       EnableWavefrontSize32 = Val;
4921       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4922                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4923                        Val, ValRange);
4924     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4925       if (hasArchitectedFlatScratch())
4926         return Error(IDRange.Start,
4927                      "directive is not supported with architected flat scratch",
4928                      IDRange);
4929       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4930                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4931     } else if (ID == ".amdhsa_enable_private_segment") {
4932       if (!hasArchitectedFlatScratch())
4933         return Error(
4934             IDRange.Start,
4935             "directive is not supported without architected flat scratch",
4936             IDRange);
4937       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4938                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4939     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4940       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4941                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4942                        ValRange);
4943     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4944       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4945                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4946                        ValRange);
4947     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4948       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4949                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4950                        ValRange);
4951     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4952       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4953                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4954                        ValRange);
4955     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4956       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4957                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4958                        ValRange);
4959     } else if (ID == ".amdhsa_next_free_vgpr") {
4960       VGPRRange = ValRange;
4961       NextFreeVGPR = Val;
4962     } else if (ID == ".amdhsa_next_free_sgpr") {
4963       SGPRRange = ValRange;
4964       NextFreeSGPR = Val;
4965     } else if (ID == ".amdhsa_accum_offset") {
4966       if (!isGFX90A())
4967         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4968       AccumOffset = Val;
4969     } else if (ID == ".amdhsa_reserve_vcc") {
4970       if (!isUInt<1>(Val))
4971         return OutOfRangeError(ValRange);
4972       ReserveVCC = Val;
4973     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4974       if (IVersion.Major < 7)
4975         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4976       if (hasArchitectedFlatScratch())
4977         return Error(IDRange.Start,
4978                      "directive is not supported with architected flat scratch",
4979                      IDRange);
4980       if (!isUInt<1>(Val))
4981         return OutOfRangeError(ValRange);
4982       ReserveFlatScr = Val;
4983     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4984       if (IVersion.Major < 8)
4985         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4986       if (!isUInt<1>(Val))
4987         return OutOfRangeError(ValRange);
4988       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4989         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4990                                  IDRange);
4991     } else if (ID == ".amdhsa_float_round_mode_32") {
4992       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4993                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4994     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4995       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4996                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4997     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4998       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4999                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5000     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5001       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5002                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5003                        ValRange);
5004     } else if (ID == ".amdhsa_dx10_clamp") {
5005       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5006                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5007     } else if (ID == ".amdhsa_ieee_mode") {
5008       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5009                        Val, ValRange);
5010     } else if (ID == ".amdhsa_fp16_overflow") {
5011       if (IVersion.Major < 9)
5012         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5013       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5014                        ValRange);
5015     } else if (ID == ".amdhsa_tg_split") {
5016       if (!isGFX90A())
5017         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5018       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5019                        ValRange);
5020     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5021       if (IVersion.Major < 10)
5022         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5023       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5024                        ValRange);
5025     } else if (ID == ".amdhsa_memory_ordered") {
5026       if (IVersion.Major < 10)
5027         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5028       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5029                        ValRange);
5030     } else if (ID == ".amdhsa_forward_progress") {
5031       if (IVersion.Major < 10)
5032         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5033       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5034                        ValRange);
5035     } else if (ID == ".amdhsa_shared_vgpr_count") {
5036       if (IVersion.Major < 10)
5037         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5038       SharedVGPRCount = Val;
5039       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5040                        COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
5041                        ValRange);
5042     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5043       PARSE_BITS_ENTRY(
5044           KD.compute_pgm_rsrc2,
5045           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5046           ValRange);
5047     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5048       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5049                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5050                        Val, ValRange);
5051     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5052       PARSE_BITS_ENTRY(
5053           KD.compute_pgm_rsrc2,
5054           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5055           ValRange);
5056     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5057       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5058                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5059                        Val, ValRange);
5060     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5061       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5062                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5063                        Val, ValRange);
5064     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5065       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5066                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5067                        Val, ValRange);
5068     } else if (ID == ".amdhsa_exception_int_div_zero") {
5069       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5070                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5071                        Val, ValRange);
5072     } else {
5073       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5074     }
5075 
5076 #undef PARSE_BITS_ENTRY
5077   }
5078 
5079   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5080     return TokError(".amdhsa_next_free_vgpr directive is required");
5081 
5082   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5083     return TokError(".amdhsa_next_free_sgpr directive is required");
5084 
5085   unsigned VGPRBlocks;
5086   unsigned SGPRBlocks;
5087   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5088                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5089                          EnableWavefrontSize32, NextFreeVGPR,
5090                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5091                          SGPRBlocks))
5092     return true;
5093 
5094   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5095           VGPRBlocks))
5096     return OutOfRangeError(VGPRRange);
5097   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5098                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5099 
5100   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5101           SGPRBlocks))
5102     return OutOfRangeError(SGPRRange);
5103   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5104                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5105                   SGPRBlocks);
5106 
5107   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5108     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5109                     "enabled user SGPRs");
5110 
5111   unsigned UserSGPRCount =
5112       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5113 
5114   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5115     return TokError("too many user SGPRs enabled");
5116   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5117                   UserSGPRCount);
5118 
5119   if (isGFX90A()) {
5120     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5121       return TokError(".amdhsa_accum_offset directive is required");
5122     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5123       return TokError("accum_offset should be in range [4..256] in "
5124                       "increments of 4");
5125     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5126       return TokError("accum_offset exceeds total VGPR allocation");
5127     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5128                     (AccumOffset / 4 - 1));
5129   }
5130 
5131   if (IVersion.Major == 10) {
5132     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5133     if (SharedVGPRCount && EnableWavefrontSize32) {
5134       return TokError("shared_vgpr_count directive not valid on "
5135                       "wavefront size 32");
5136     }
5137     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5138       return TokError("shared_vgpr_count*2 + "
5139                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5140                       "exceed 63\n");
5141     }
5142   }
5143 
5144   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5145       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5146       ReserveFlatScr);
5147   return false;
5148 }
5149 
5150 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5151   uint32_t Major;
5152   uint32_t Minor;
5153 
5154   if (ParseDirectiveMajorMinor(Major, Minor))
5155     return true;
5156 
5157   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5158   return false;
5159 }
5160 
5161 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5162   uint32_t Major;
5163   uint32_t Minor;
5164   uint32_t Stepping;
5165   StringRef VendorName;
5166   StringRef ArchName;
5167 
5168   // If this directive has no arguments, then use the ISA version for the
5169   // targeted GPU.
5170   if (isToken(AsmToken::EndOfStatement)) {
5171     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5172     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5173                                                         ISA.Stepping,
5174                                                         "AMD", "AMDGPU");
5175     return false;
5176   }
5177 
5178   if (ParseDirectiveMajorMinor(Major, Minor))
5179     return true;
5180 
5181   if (!trySkipToken(AsmToken::Comma))
5182     return TokError("stepping version number required, comma expected");
5183 
5184   if (ParseAsAbsoluteExpression(Stepping))
5185     return TokError("invalid stepping version");
5186 
5187   if (!trySkipToken(AsmToken::Comma))
5188     return TokError("vendor name required, comma expected");
5189 
5190   if (!parseString(VendorName, "invalid vendor name"))
5191     return true;
5192 
5193   if (!trySkipToken(AsmToken::Comma))
5194     return TokError("arch name required, comma expected");
5195 
5196   if (!parseString(ArchName, "invalid arch name"))
5197     return true;
5198 
5199   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5200                                                       VendorName, ArchName);
5201   return false;
5202 }
5203 
5204 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5205                                                amd_kernel_code_t &Header) {
5206   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5207   // assembly for backwards compatibility.
5208   if (ID == "max_scratch_backing_memory_byte_size") {
5209     Parser.eatToEndOfStatement();
5210     return false;
5211   }
5212 
5213   SmallString<40> ErrStr;
5214   raw_svector_ostream Err(ErrStr);
5215   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5216     return TokError(Err.str());
5217   }
5218   Lex();
5219 
5220   if (ID == "enable_wavefront_size32") {
5221     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5222       if (!isGFX10Plus())
5223         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5224       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5225         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5226     } else {
5227       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5228         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5229     }
5230   }
5231 
5232   if (ID == "wavefront_size") {
5233     if (Header.wavefront_size == 5) {
5234       if (!isGFX10Plus())
5235         return TokError("wavefront_size=5 is only allowed on GFX10+");
5236       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5237         return TokError("wavefront_size=5 requires +WavefrontSize32");
5238     } else if (Header.wavefront_size == 6) {
5239       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5240         return TokError("wavefront_size=6 requires +WavefrontSize64");
5241     }
5242   }
5243 
5244   if (ID == "enable_wgp_mode") {
5245     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5246         !isGFX10Plus())
5247       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5248   }
5249 
5250   if (ID == "enable_mem_ordered") {
5251     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5252         !isGFX10Plus())
5253       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5254   }
5255 
5256   if (ID == "enable_fwd_progress") {
5257     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5258         !isGFX10Plus())
5259       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5260   }
5261 
5262   return false;
5263 }
5264 
5265 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5266   amd_kernel_code_t Header;
5267   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5268 
5269   while (true) {
5270     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5271     // will set the current token to EndOfStatement.
5272     while(trySkipToken(AsmToken::EndOfStatement));
5273 
5274     StringRef ID;
5275     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5276       return true;
5277 
5278     if (ID == ".end_amd_kernel_code_t")
5279       break;
5280 
5281     if (ParseAMDKernelCodeTValue(ID, Header))
5282       return true;
5283   }
5284 
5285   getTargetStreamer().EmitAMDKernelCodeT(Header);
5286 
5287   return false;
5288 }
5289 
5290 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5291   StringRef KernelName;
5292   if (!parseId(KernelName, "expected symbol name"))
5293     return true;
5294 
5295   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5296                                            ELF::STT_AMDGPU_HSA_KERNEL);
5297 
5298   KernelScope.initialize(getContext());
5299   return false;
5300 }
5301 
5302 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5303   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5304     return Error(getLoc(),
5305                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5306                  "architectures");
5307   }
5308 
5309   auto TargetIDDirective = getLexer().getTok().getStringContents();
5310   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5311     return Error(getParser().getTok().getLoc(), "target id must match options");
5312 
5313   getTargetStreamer().EmitISAVersion();
5314   Lex();
5315 
5316   return false;
5317 }
5318 
5319 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5320   const char *AssemblerDirectiveBegin;
5321   const char *AssemblerDirectiveEnd;
5322   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5323       isHsaAbiVersion3AndAbove(&getSTI())
5324           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5325                             HSAMD::V3::AssemblerDirectiveEnd)
5326           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5327                             HSAMD::AssemblerDirectiveEnd);
5328 
5329   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5330     return Error(getLoc(),
5331                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5332                  "not available on non-amdhsa OSes")).str());
5333   }
5334 
5335   std::string HSAMetadataString;
5336   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5337                           HSAMetadataString))
5338     return true;
5339 
5340   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5341     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5342       return Error(getLoc(), "invalid HSA metadata");
5343   } else {
5344     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5345       return Error(getLoc(), "invalid HSA metadata");
5346   }
5347 
5348   return false;
5349 }
5350 
5351 /// Common code to parse out a block of text (typically YAML) between start and
5352 /// end directives.
5353 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5354                                           const char *AssemblerDirectiveEnd,
5355                                           std::string &CollectString) {
5356 
5357   raw_string_ostream CollectStream(CollectString);
5358 
5359   getLexer().setSkipSpace(false);
5360 
5361   bool FoundEnd = false;
5362   while (!isToken(AsmToken::Eof)) {
5363     while (isToken(AsmToken::Space)) {
5364       CollectStream << getTokenStr();
5365       Lex();
5366     }
5367 
5368     if (trySkipId(AssemblerDirectiveEnd)) {
5369       FoundEnd = true;
5370       break;
5371     }
5372 
5373     CollectStream << Parser.parseStringToEndOfStatement()
5374                   << getContext().getAsmInfo()->getSeparatorString();
5375 
5376     Parser.eatToEndOfStatement();
5377   }
5378 
5379   getLexer().setSkipSpace(true);
5380 
5381   if (isToken(AsmToken::Eof) && !FoundEnd) {
5382     return TokError(Twine("expected directive ") +
5383                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5384   }
5385 
5386   CollectStream.flush();
5387   return false;
5388 }
5389 
5390 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5391 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5392   std::string String;
5393   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5394                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5395     return true;
5396 
5397   auto PALMetadata = getTargetStreamer().getPALMetadata();
5398   if (!PALMetadata->setFromString(String))
5399     return Error(getLoc(), "invalid PAL metadata");
5400   return false;
5401 }
5402 
5403 /// Parse the assembler directive for old linear-format PAL metadata.
5404 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5405   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5406     return Error(getLoc(),
5407                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5408                  "not available on non-amdpal OSes")).str());
5409   }
5410 
5411   auto PALMetadata = getTargetStreamer().getPALMetadata();
5412   PALMetadata->setLegacy();
5413   for (;;) {
5414     uint32_t Key, Value;
5415     if (ParseAsAbsoluteExpression(Key)) {
5416       return TokError(Twine("invalid value in ") +
5417                       Twine(PALMD::AssemblerDirective));
5418     }
5419     if (!trySkipToken(AsmToken::Comma)) {
5420       return TokError(Twine("expected an even number of values in ") +
5421                       Twine(PALMD::AssemblerDirective));
5422     }
5423     if (ParseAsAbsoluteExpression(Value)) {
5424       return TokError(Twine("invalid value in ") +
5425                       Twine(PALMD::AssemblerDirective));
5426     }
5427     PALMetadata->setRegister(Key, Value);
5428     if (!trySkipToken(AsmToken::Comma))
5429       break;
5430   }
5431   return false;
5432 }
5433 
5434 /// ParseDirectiveAMDGPULDS
5435 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5436 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5437   if (getParser().checkForValidSection())
5438     return true;
5439 
5440   StringRef Name;
5441   SMLoc NameLoc = getLoc();
5442   if (getParser().parseIdentifier(Name))
5443     return TokError("expected identifier in directive");
5444 
5445   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5446   if (parseToken(AsmToken::Comma, "expected ','"))
5447     return true;
5448 
5449   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5450 
5451   int64_t Size;
5452   SMLoc SizeLoc = getLoc();
5453   if (getParser().parseAbsoluteExpression(Size))
5454     return true;
5455   if (Size < 0)
5456     return Error(SizeLoc, "size must be non-negative");
5457   if (Size > LocalMemorySize)
5458     return Error(SizeLoc, "size is too large");
5459 
5460   int64_t Alignment = 4;
5461   if (trySkipToken(AsmToken::Comma)) {
5462     SMLoc AlignLoc = getLoc();
5463     if (getParser().parseAbsoluteExpression(Alignment))
5464       return true;
5465     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5466       return Error(AlignLoc, "alignment must be a power of two");
5467 
5468     // Alignment larger than the size of LDS is possible in theory, as long
5469     // as the linker manages to place to symbol at address 0, but we do want
5470     // to make sure the alignment fits nicely into a 32-bit integer.
5471     if (Alignment >= 1u << 31)
5472       return Error(AlignLoc, "alignment is too large");
5473   }
5474 
5475   if (parseToken(AsmToken::EndOfStatement,
5476                  "unexpected token in '.amdgpu_lds' directive"))
5477     return true;
5478 
5479   Symbol->redefineIfPossible();
5480   if (!Symbol->isUndefined())
5481     return Error(NameLoc, "invalid symbol redefinition");
5482 
5483   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5484   return false;
5485 }
5486 
5487 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5488   StringRef IDVal = DirectiveID.getString();
5489 
5490   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5491     if (IDVal == ".amdhsa_kernel")
5492      return ParseDirectiveAMDHSAKernel();
5493 
5494     // TODO: Restructure/combine with PAL metadata directive.
5495     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5496       return ParseDirectiveHSAMetadata();
5497   } else {
5498     if (IDVal == ".hsa_code_object_version")
5499       return ParseDirectiveHSACodeObjectVersion();
5500 
5501     if (IDVal == ".hsa_code_object_isa")
5502       return ParseDirectiveHSACodeObjectISA();
5503 
5504     if (IDVal == ".amd_kernel_code_t")
5505       return ParseDirectiveAMDKernelCodeT();
5506 
5507     if (IDVal == ".amdgpu_hsa_kernel")
5508       return ParseDirectiveAMDGPUHsaKernel();
5509 
5510     if (IDVal == ".amd_amdgpu_isa")
5511       return ParseDirectiveISAVersion();
5512 
5513     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5514       return ParseDirectiveHSAMetadata();
5515   }
5516 
5517   if (IDVal == ".amdgcn_target")
5518     return ParseDirectiveAMDGCNTarget();
5519 
5520   if (IDVal == ".amdgpu_lds")
5521     return ParseDirectiveAMDGPULDS();
5522 
5523   if (IDVal == PALMD::AssemblerDirectiveBegin)
5524     return ParseDirectivePALMetadataBegin();
5525 
5526   if (IDVal == PALMD::AssemblerDirective)
5527     return ParseDirectivePALMetadata();
5528 
5529   return true;
5530 }
5531 
5532 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5533                                            unsigned RegNo) {
5534 
5535   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5536     return isGFX9Plus();
5537 
5538   // GFX10 has 2 more SGPRs 104 and 105.
5539   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5540     return hasSGPR104_SGPR105();
5541 
5542   switch (RegNo) {
5543   case AMDGPU::SRC_SHARED_BASE:
5544   case AMDGPU::SRC_SHARED_LIMIT:
5545   case AMDGPU::SRC_PRIVATE_BASE:
5546   case AMDGPU::SRC_PRIVATE_LIMIT:
5547   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5548     return isGFX9Plus();
5549   case AMDGPU::TBA:
5550   case AMDGPU::TBA_LO:
5551   case AMDGPU::TBA_HI:
5552   case AMDGPU::TMA:
5553   case AMDGPU::TMA_LO:
5554   case AMDGPU::TMA_HI:
5555     return !isGFX9Plus();
5556   case AMDGPU::XNACK_MASK:
5557   case AMDGPU::XNACK_MASK_LO:
5558   case AMDGPU::XNACK_MASK_HI:
5559     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5560   case AMDGPU::SGPR_NULL:
5561     return isGFX10Plus();
5562   default:
5563     break;
5564   }
5565 
5566   if (isCI())
5567     return true;
5568 
5569   if (isSI() || isGFX10Plus()) {
5570     // No flat_scr on SI.
5571     // On GFX10 flat scratch is not a valid register operand and can only be
5572     // accessed with s_setreg/s_getreg.
5573     switch (RegNo) {
5574     case AMDGPU::FLAT_SCR:
5575     case AMDGPU::FLAT_SCR_LO:
5576     case AMDGPU::FLAT_SCR_HI:
5577       return false;
5578     default:
5579       return true;
5580     }
5581   }
5582 
5583   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5584   // SI/CI have.
5585   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5586     return hasSGPR102_SGPR103();
5587 
5588   return true;
5589 }
5590 
5591 OperandMatchResultTy
5592 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5593                               OperandMode Mode) {
5594   // Try to parse with a custom parser
5595   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5596 
5597   // If we successfully parsed the operand or if there as an error parsing,
5598   // we are done.
5599   //
5600   // If we are parsing after we reach EndOfStatement then this means we
5601   // are appending default values to the Operands list.  This is only done
5602   // by custom parser, so we shouldn't continue on to the generic parsing.
5603   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5604       isToken(AsmToken::EndOfStatement))
5605     return ResTy;
5606 
5607   SMLoc RBraceLoc;
5608   SMLoc LBraceLoc = getLoc();
5609   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5610     unsigned Prefix = Operands.size();
5611 
5612     for (;;) {
5613       auto Loc = getLoc();
5614       ResTy = parseReg(Operands);
5615       if (ResTy == MatchOperand_NoMatch)
5616         Error(Loc, "expected a register");
5617       if (ResTy != MatchOperand_Success)
5618         return MatchOperand_ParseFail;
5619 
5620       RBraceLoc = getLoc();
5621       if (trySkipToken(AsmToken::RBrac))
5622         break;
5623 
5624       if (!skipToken(AsmToken::Comma,
5625                      "expected a comma or a closing square bracket")) {
5626         return MatchOperand_ParseFail;
5627       }
5628     }
5629 
5630     if (Operands.size() - Prefix > 1) {
5631       Operands.insert(Operands.begin() + Prefix,
5632                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5633       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5634     }
5635 
5636     return MatchOperand_Success;
5637   }
5638 
5639   return parseRegOrImm(Operands);
5640 }
5641 
5642 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5643   // Clear any forced encodings from the previous instruction.
5644   setForcedEncodingSize(0);
5645   setForcedDPP(false);
5646   setForcedSDWA(false);
5647 
5648   if (Name.endswith("_e64")) {
5649     setForcedEncodingSize(64);
5650     return Name.substr(0, Name.size() - 4);
5651   } else if (Name.endswith("_e32")) {
5652     setForcedEncodingSize(32);
5653     return Name.substr(0, Name.size() - 4);
5654   } else if (Name.endswith("_dpp")) {
5655     setForcedDPP(true);
5656     return Name.substr(0, Name.size() - 4);
5657   } else if (Name.endswith("_sdwa")) {
5658     setForcedSDWA(true);
5659     return Name.substr(0, Name.size() - 5);
5660   }
5661   return Name;
5662 }
5663 
5664 static void applyMnemonicAliases(StringRef &Mnemonic,
5665                                  const FeatureBitset &Features,
5666                                  unsigned VariantID);
5667 
5668 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5669                                        StringRef Name,
5670                                        SMLoc NameLoc, OperandVector &Operands) {
5671   // Add the instruction mnemonic
5672   Name = parseMnemonicSuffix(Name);
5673 
5674   // If the target architecture uses MnemonicAlias, call it here to parse
5675   // operands correctly.
5676   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5677 
5678   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5679 
5680   bool IsMIMG = Name.startswith("image_");
5681 
5682   while (!trySkipToken(AsmToken::EndOfStatement)) {
5683     OperandMode Mode = OperandMode_Default;
5684     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5685       Mode = OperandMode_NSA;
5686     CPolSeen = 0;
5687     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5688 
5689     if (Res != MatchOperand_Success) {
5690       checkUnsupportedInstruction(Name, NameLoc);
5691       if (!Parser.hasPendingError()) {
5692         // FIXME: use real operand location rather than the current location.
5693         StringRef Msg =
5694           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5695                                             "not a valid operand.";
5696         Error(getLoc(), Msg);
5697       }
5698       while (!trySkipToken(AsmToken::EndOfStatement)) {
5699         lex();
5700       }
5701       return true;
5702     }
5703 
5704     // Eat the comma or space if there is one.
5705     trySkipToken(AsmToken::Comma);
5706   }
5707 
5708   return false;
5709 }
5710 
5711 //===----------------------------------------------------------------------===//
5712 // Utility functions
5713 //===----------------------------------------------------------------------===//
5714 
5715 OperandMatchResultTy
5716 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5717 
5718   if (!trySkipId(Prefix, AsmToken::Colon))
5719     return MatchOperand_NoMatch;
5720 
5721   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5722 }
5723 
5724 OperandMatchResultTy
5725 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5726                                     AMDGPUOperand::ImmTy ImmTy,
5727                                     bool (*ConvertResult)(int64_t&)) {
5728   SMLoc S = getLoc();
5729   int64_t Value = 0;
5730 
5731   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5732   if (Res != MatchOperand_Success)
5733     return Res;
5734 
5735   if (ConvertResult && !ConvertResult(Value)) {
5736     Error(S, "invalid " + StringRef(Prefix) + " value.");
5737   }
5738 
5739   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5740   return MatchOperand_Success;
5741 }
5742 
5743 OperandMatchResultTy
5744 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5745                                              OperandVector &Operands,
5746                                              AMDGPUOperand::ImmTy ImmTy,
5747                                              bool (*ConvertResult)(int64_t&)) {
5748   SMLoc S = getLoc();
5749   if (!trySkipId(Prefix, AsmToken::Colon))
5750     return MatchOperand_NoMatch;
5751 
5752   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5753     return MatchOperand_ParseFail;
5754 
5755   unsigned Val = 0;
5756   const unsigned MaxSize = 4;
5757 
5758   // FIXME: How to verify the number of elements matches the number of src
5759   // operands?
5760   for (int I = 0; ; ++I) {
5761     int64_t Op;
5762     SMLoc Loc = getLoc();
5763     if (!parseExpr(Op))
5764       return MatchOperand_ParseFail;
5765 
5766     if (Op != 0 && Op != 1) {
5767       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5768       return MatchOperand_ParseFail;
5769     }
5770 
5771     Val |= (Op << I);
5772 
5773     if (trySkipToken(AsmToken::RBrac))
5774       break;
5775 
5776     if (I + 1 == MaxSize) {
5777       Error(getLoc(), "expected a closing square bracket");
5778       return MatchOperand_ParseFail;
5779     }
5780 
5781     if (!skipToken(AsmToken::Comma, "expected a comma"))
5782       return MatchOperand_ParseFail;
5783   }
5784 
5785   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5786   return MatchOperand_Success;
5787 }
5788 
5789 OperandMatchResultTy
5790 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5791                                AMDGPUOperand::ImmTy ImmTy) {
5792   int64_t Bit;
5793   SMLoc S = getLoc();
5794 
5795   if (trySkipId(Name)) {
5796     Bit = 1;
5797   } else if (trySkipId("no", Name)) {
5798     Bit = 0;
5799   } else {
5800     return MatchOperand_NoMatch;
5801   }
5802 
5803   if (Name == "r128" && !hasMIMG_R128()) {
5804     Error(S, "r128 modifier is not supported on this GPU");
5805     return MatchOperand_ParseFail;
5806   }
5807   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5808     Error(S, "a16 modifier is not supported on this GPU");
5809     return MatchOperand_ParseFail;
5810   }
5811 
5812   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5813     ImmTy = AMDGPUOperand::ImmTyR128A16;
5814 
5815   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5816   return MatchOperand_Success;
5817 }
5818 
5819 OperandMatchResultTy
5820 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5821   unsigned CPolOn = 0;
5822   unsigned CPolOff = 0;
5823   SMLoc S = getLoc();
5824 
5825   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5826   if (isGFX940() && !Mnemo.startswith("s_")) {
5827     if (trySkipId("sc0"))
5828       CPolOn = AMDGPU::CPol::SC0;
5829     else if (trySkipId("nosc0"))
5830       CPolOff = AMDGPU::CPol::SC0;
5831     else if (trySkipId("nt"))
5832       CPolOn = AMDGPU::CPol::NT;
5833     else if (trySkipId("nont"))
5834       CPolOff = AMDGPU::CPol::NT;
5835     else if (trySkipId("sc1"))
5836       CPolOn = AMDGPU::CPol::SC1;
5837     else if (trySkipId("nosc1"))
5838       CPolOff = AMDGPU::CPol::SC1;
5839     else
5840       return MatchOperand_NoMatch;
5841   }
5842   else if (trySkipId("glc"))
5843     CPolOn = AMDGPU::CPol::GLC;
5844   else if (trySkipId("noglc"))
5845     CPolOff = AMDGPU::CPol::GLC;
5846   else if (trySkipId("slc"))
5847     CPolOn = AMDGPU::CPol::SLC;
5848   else if (trySkipId("noslc"))
5849     CPolOff = AMDGPU::CPol::SLC;
5850   else if (trySkipId("dlc"))
5851     CPolOn = AMDGPU::CPol::DLC;
5852   else if (trySkipId("nodlc"))
5853     CPolOff = AMDGPU::CPol::DLC;
5854   else if (trySkipId("scc"))
5855     CPolOn = AMDGPU::CPol::SCC;
5856   else if (trySkipId("noscc"))
5857     CPolOff = AMDGPU::CPol::SCC;
5858   else
5859     return MatchOperand_NoMatch;
5860 
5861   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5862     Error(S, "dlc modifier is not supported on this GPU");
5863     return MatchOperand_ParseFail;
5864   }
5865 
5866   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5867     Error(S, "scc modifier is not supported on this GPU");
5868     return MatchOperand_ParseFail;
5869   }
5870 
5871   if (CPolSeen & (CPolOn | CPolOff)) {
5872     Error(S, "duplicate cache policy modifier");
5873     return MatchOperand_ParseFail;
5874   }
5875 
5876   CPolSeen |= (CPolOn | CPolOff);
5877 
5878   for (unsigned I = 1; I != Operands.size(); ++I) {
5879     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5880     if (Op.isCPol()) {
5881       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5882       return MatchOperand_Success;
5883     }
5884   }
5885 
5886   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5887                                               AMDGPUOperand::ImmTyCPol));
5888 
5889   return MatchOperand_Success;
5890 }
5891 
5892 static void addOptionalImmOperand(
5893   MCInst& Inst, const OperandVector& Operands,
5894   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5895   AMDGPUOperand::ImmTy ImmT,
5896   int64_t Default = 0) {
5897   auto i = OptionalIdx.find(ImmT);
5898   if (i != OptionalIdx.end()) {
5899     unsigned Idx = i->second;
5900     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5901   } else {
5902     Inst.addOperand(MCOperand::createImm(Default));
5903   }
5904 }
5905 
5906 OperandMatchResultTy
5907 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5908                                        StringRef &Value,
5909                                        SMLoc &StringLoc) {
5910   if (!trySkipId(Prefix, AsmToken::Colon))
5911     return MatchOperand_NoMatch;
5912 
5913   StringLoc = getLoc();
5914   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5915                                                   : MatchOperand_ParseFail;
5916 }
5917 
5918 //===----------------------------------------------------------------------===//
5919 // MTBUF format
5920 //===----------------------------------------------------------------------===//
5921 
5922 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5923                                   int64_t MaxVal,
5924                                   int64_t &Fmt) {
5925   int64_t Val;
5926   SMLoc Loc = getLoc();
5927 
5928   auto Res = parseIntWithPrefix(Pref, Val);
5929   if (Res == MatchOperand_ParseFail)
5930     return false;
5931   if (Res == MatchOperand_NoMatch)
5932     return true;
5933 
5934   if (Val < 0 || Val > MaxVal) {
5935     Error(Loc, Twine("out of range ", StringRef(Pref)));
5936     return false;
5937   }
5938 
5939   Fmt = Val;
5940   return true;
5941 }
5942 
5943 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5944 // values to live in a joint format operand in the MCInst encoding.
5945 OperandMatchResultTy
5946 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5947   using namespace llvm::AMDGPU::MTBUFFormat;
5948 
5949   int64_t Dfmt = DFMT_UNDEF;
5950   int64_t Nfmt = NFMT_UNDEF;
5951 
5952   // dfmt and nfmt can appear in either order, and each is optional.
5953   for (int I = 0; I < 2; ++I) {
5954     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5955       return MatchOperand_ParseFail;
5956 
5957     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5958       return MatchOperand_ParseFail;
5959     }
5960     // Skip optional comma between dfmt/nfmt
5961     // but guard against 2 commas following each other.
5962     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5963         !peekToken().is(AsmToken::Comma)) {
5964       trySkipToken(AsmToken::Comma);
5965     }
5966   }
5967 
5968   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5969     return MatchOperand_NoMatch;
5970 
5971   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5972   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5973 
5974   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5975   return MatchOperand_Success;
5976 }
5977 
5978 OperandMatchResultTy
5979 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5980   using namespace llvm::AMDGPU::MTBUFFormat;
5981 
5982   int64_t Fmt = UFMT_UNDEF;
5983 
5984   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5985     return MatchOperand_ParseFail;
5986 
5987   if (Fmt == UFMT_UNDEF)
5988     return MatchOperand_NoMatch;
5989 
5990   Format = Fmt;
5991   return MatchOperand_Success;
5992 }
5993 
5994 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5995                                     int64_t &Nfmt,
5996                                     StringRef FormatStr,
5997                                     SMLoc Loc) {
5998   using namespace llvm::AMDGPU::MTBUFFormat;
5999   int64_t Format;
6000 
6001   Format = getDfmt(FormatStr);
6002   if (Format != DFMT_UNDEF) {
6003     Dfmt = Format;
6004     return true;
6005   }
6006 
6007   Format = getNfmt(FormatStr, getSTI());
6008   if (Format != NFMT_UNDEF) {
6009     Nfmt = Format;
6010     return true;
6011   }
6012 
6013   Error(Loc, "unsupported format");
6014   return false;
6015 }
6016 
6017 OperandMatchResultTy
6018 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6019                                           SMLoc FormatLoc,
6020                                           int64_t &Format) {
6021   using namespace llvm::AMDGPU::MTBUFFormat;
6022 
6023   int64_t Dfmt = DFMT_UNDEF;
6024   int64_t Nfmt = NFMT_UNDEF;
6025   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6026     return MatchOperand_ParseFail;
6027 
6028   if (trySkipToken(AsmToken::Comma)) {
6029     StringRef Str;
6030     SMLoc Loc = getLoc();
6031     if (!parseId(Str, "expected a format string") ||
6032         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
6033       return MatchOperand_ParseFail;
6034     }
6035     if (Dfmt == DFMT_UNDEF) {
6036       Error(Loc, "duplicate numeric format");
6037       return MatchOperand_ParseFail;
6038     } else if (Nfmt == NFMT_UNDEF) {
6039       Error(Loc, "duplicate data format");
6040       return MatchOperand_ParseFail;
6041     }
6042   }
6043 
6044   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6045   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6046 
6047   if (isGFX10Plus()) {
6048     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6049     if (Ufmt == UFMT_UNDEF) {
6050       Error(FormatLoc, "unsupported format");
6051       return MatchOperand_ParseFail;
6052     }
6053     Format = Ufmt;
6054   } else {
6055     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6056   }
6057 
6058   return MatchOperand_Success;
6059 }
6060 
6061 OperandMatchResultTy
6062 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6063                                             SMLoc Loc,
6064                                             int64_t &Format) {
6065   using namespace llvm::AMDGPU::MTBUFFormat;
6066 
6067   auto Id = getUnifiedFormat(FormatStr, getSTI());
6068   if (Id == UFMT_UNDEF)
6069     return MatchOperand_NoMatch;
6070 
6071   if (!isGFX10Plus()) {
6072     Error(Loc, "unified format is not supported on this GPU");
6073     return MatchOperand_ParseFail;
6074   }
6075 
6076   Format = Id;
6077   return MatchOperand_Success;
6078 }
6079 
6080 OperandMatchResultTy
6081 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6082   using namespace llvm::AMDGPU::MTBUFFormat;
6083   SMLoc Loc = getLoc();
6084 
6085   if (!parseExpr(Format))
6086     return MatchOperand_ParseFail;
6087   if (!isValidFormatEncoding(Format, getSTI())) {
6088     Error(Loc, "out of range format");
6089     return MatchOperand_ParseFail;
6090   }
6091 
6092   return MatchOperand_Success;
6093 }
6094 
6095 OperandMatchResultTy
6096 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6097   using namespace llvm::AMDGPU::MTBUFFormat;
6098 
6099   if (!trySkipId("format", AsmToken::Colon))
6100     return MatchOperand_NoMatch;
6101 
6102   if (trySkipToken(AsmToken::LBrac)) {
6103     StringRef FormatStr;
6104     SMLoc Loc = getLoc();
6105     if (!parseId(FormatStr, "expected a format string"))
6106       return MatchOperand_ParseFail;
6107 
6108     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6109     if (Res == MatchOperand_NoMatch)
6110       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6111     if (Res != MatchOperand_Success)
6112       return Res;
6113 
6114     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6115       return MatchOperand_ParseFail;
6116 
6117     return MatchOperand_Success;
6118   }
6119 
6120   return parseNumericFormat(Format);
6121 }
6122 
6123 OperandMatchResultTy
6124 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6125   using namespace llvm::AMDGPU::MTBUFFormat;
6126 
6127   int64_t Format = getDefaultFormatEncoding(getSTI());
6128   OperandMatchResultTy Res;
6129   SMLoc Loc = getLoc();
6130 
6131   // Parse legacy format syntax.
6132   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6133   if (Res == MatchOperand_ParseFail)
6134     return Res;
6135 
6136   bool FormatFound = (Res == MatchOperand_Success);
6137 
6138   Operands.push_back(
6139     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6140 
6141   if (FormatFound)
6142     trySkipToken(AsmToken::Comma);
6143 
6144   if (isToken(AsmToken::EndOfStatement)) {
6145     // We are expecting an soffset operand,
6146     // but let matcher handle the error.
6147     return MatchOperand_Success;
6148   }
6149 
6150   // Parse soffset.
6151   Res = parseRegOrImm(Operands);
6152   if (Res != MatchOperand_Success)
6153     return Res;
6154 
6155   trySkipToken(AsmToken::Comma);
6156 
6157   if (!FormatFound) {
6158     Res = parseSymbolicOrNumericFormat(Format);
6159     if (Res == MatchOperand_ParseFail)
6160       return Res;
6161     if (Res == MatchOperand_Success) {
6162       auto Size = Operands.size();
6163       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6164       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6165       Op.setImm(Format);
6166     }
6167     return MatchOperand_Success;
6168   }
6169 
6170   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6171     Error(getLoc(), "duplicate format");
6172     return MatchOperand_ParseFail;
6173   }
6174   return MatchOperand_Success;
6175 }
6176 
6177 //===----------------------------------------------------------------------===//
6178 // ds
6179 //===----------------------------------------------------------------------===//
6180 
6181 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6182                                     const OperandVector &Operands) {
6183   OptionalImmIndexMap OptionalIdx;
6184 
6185   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6186     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6187 
6188     // Add the register arguments
6189     if (Op.isReg()) {
6190       Op.addRegOperands(Inst, 1);
6191       continue;
6192     }
6193 
6194     // Handle optional arguments
6195     OptionalIdx[Op.getImmTy()] = i;
6196   }
6197 
6198   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6199   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6200   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6201 
6202   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6203 }
6204 
6205 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6206                                 bool IsGdsHardcoded) {
6207   OptionalImmIndexMap OptionalIdx;
6208 
6209   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6210     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6211 
6212     // Add the register arguments
6213     if (Op.isReg()) {
6214       Op.addRegOperands(Inst, 1);
6215       continue;
6216     }
6217 
6218     if (Op.isToken() && Op.getToken() == "gds") {
6219       IsGdsHardcoded = true;
6220       continue;
6221     }
6222 
6223     // Handle optional arguments
6224     OptionalIdx[Op.getImmTy()] = i;
6225   }
6226 
6227   AMDGPUOperand::ImmTy OffsetType =
6228     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6229      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6230      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6231                                                       AMDGPUOperand::ImmTyOffset;
6232 
6233   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6234 
6235   if (!IsGdsHardcoded) {
6236     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6237   }
6238   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6239 }
6240 
6241 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6242   OptionalImmIndexMap OptionalIdx;
6243 
6244   unsigned OperandIdx[4];
6245   unsigned EnMask = 0;
6246   int SrcIdx = 0;
6247 
6248   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6249     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6250 
6251     // Add the register arguments
6252     if (Op.isReg()) {
6253       assert(SrcIdx < 4);
6254       OperandIdx[SrcIdx] = Inst.size();
6255       Op.addRegOperands(Inst, 1);
6256       ++SrcIdx;
6257       continue;
6258     }
6259 
6260     if (Op.isOff()) {
6261       assert(SrcIdx < 4);
6262       OperandIdx[SrcIdx] = Inst.size();
6263       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6264       ++SrcIdx;
6265       continue;
6266     }
6267 
6268     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6269       Op.addImmOperands(Inst, 1);
6270       continue;
6271     }
6272 
6273     if (Op.isToken() && Op.getToken() == "done")
6274       continue;
6275 
6276     // Handle optional arguments
6277     OptionalIdx[Op.getImmTy()] = i;
6278   }
6279 
6280   assert(SrcIdx == 4);
6281 
6282   bool Compr = false;
6283   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6284     Compr = true;
6285     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6286     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6287     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6288   }
6289 
6290   for (auto i = 0; i < SrcIdx; ++i) {
6291     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6292       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6293     }
6294   }
6295 
6296   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6297   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6298 
6299   Inst.addOperand(MCOperand::createImm(EnMask));
6300 }
6301 
6302 //===----------------------------------------------------------------------===//
6303 // s_waitcnt
6304 //===----------------------------------------------------------------------===//
6305 
6306 static bool
6307 encodeCnt(
6308   const AMDGPU::IsaVersion ISA,
6309   int64_t &IntVal,
6310   int64_t CntVal,
6311   bool Saturate,
6312   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6313   unsigned (*decode)(const IsaVersion &Version, unsigned))
6314 {
6315   bool Failed = false;
6316 
6317   IntVal = encode(ISA, IntVal, CntVal);
6318   if (CntVal != decode(ISA, IntVal)) {
6319     if (Saturate) {
6320       IntVal = encode(ISA, IntVal, -1);
6321     } else {
6322       Failed = true;
6323     }
6324   }
6325   return Failed;
6326 }
6327 
6328 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6329 
6330   SMLoc CntLoc = getLoc();
6331   StringRef CntName = getTokenStr();
6332 
6333   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6334       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6335     return false;
6336 
6337   int64_t CntVal;
6338   SMLoc ValLoc = getLoc();
6339   if (!parseExpr(CntVal))
6340     return false;
6341 
6342   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6343 
6344   bool Failed = true;
6345   bool Sat = CntName.endswith("_sat");
6346 
6347   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6348     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6349   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6350     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6351   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6352     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6353   } else {
6354     Error(CntLoc, "invalid counter name " + CntName);
6355     return false;
6356   }
6357 
6358   if (Failed) {
6359     Error(ValLoc, "too large value for " + CntName);
6360     return false;
6361   }
6362 
6363   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6364     return false;
6365 
6366   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6367     if (isToken(AsmToken::EndOfStatement)) {
6368       Error(getLoc(), "expected a counter name");
6369       return false;
6370     }
6371   }
6372 
6373   return true;
6374 }
6375 
6376 OperandMatchResultTy
6377 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6378   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6379   int64_t Waitcnt = getWaitcntBitMask(ISA);
6380   SMLoc S = getLoc();
6381 
6382   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6383     while (!isToken(AsmToken::EndOfStatement)) {
6384       if (!parseCnt(Waitcnt))
6385         return MatchOperand_ParseFail;
6386     }
6387   } else {
6388     if (!parseExpr(Waitcnt))
6389       return MatchOperand_ParseFail;
6390   }
6391 
6392   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6393   return MatchOperand_Success;
6394 }
6395 
6396 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6397   SMLoc FieldLoc = getLoc();
6398   StringRef FieldName = getTokenStr();
6399   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6400       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6401     return false;
6402 
6403   SMLoc ValueLoc = getLoc();
6404   StringRef ValueName = getTokenStr();
6405   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6406       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6407     return false;
6408 
6409   unsigned Shift;
6410   if (FieldName == "instid0") {
6411     Shift = 0;
6412   } else if (FieldName == "instskip") {
6413     Shift = 4;
6414   } else if (FieldName == "instid1") {
6415     Shift = 7;
6416   } else {
6417     Error(FieldLoc, "invalid field name " + FieldName);
6418     return false;
6419   }
6420 
6421   int Value;
6422   if (Shift == 4) {
6423     // Parse values for instskip.
6424     Value = StringSwitch<int>(ValueName)
6425                 .Case("SAME", 0)
6426                 .Case("NEXT", 1)
6427                 .Case("SKIP_1", 2)
6428                 .Case("SKIP_2", 3)
6429                 .Case("SKIP_3", 4)
6430                 .Case("SKIP_4", 5)
6431                 .Default(-1);
6432   } else {
6433     // Parse values for instid0 and instid1.
6434     Value = StringSwitch<int>(ValueName)
6435                 .Case("NO_DEP", 0)
6436                 .Case("VALU_DEP_1", 1)
6437                 .Case("VALU_DEP_2", 2)
6438                 .Case("VALU_DEP_3", 3)
6439                 .Case("VALU_DEP_4", 4)
6440                 .Case("TRANS32_DEP_1", 5)
6441                 .Case("TRANS32_DEP_2", 6)
6442                 .Case("TRANS32_DEP_3", 7)
6443                 .Case("FMA_ACCUM_CYCLE_1", 8)
6444                 .Case("SALU_CYCLE_1", 9)
6445                 .Case("SALU_CYCLE_2", 10)
6446                 .Case("SALU_CYCLE_3", 11)
6447                 .Default(-1);
6448   }
6449   if (Value < 0) {
6450     Error(ValueLoc, "invalid value name " + ValueName);
6451     return false;
6452   }
6453 
6454   Delay |= Value << Shift;
6455   return true;
6456 }
6457 
6458 OperandMatchResultTy
6459 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) {
6460   int64_t Delay = 0;
6461   SMLoc S = getLoc();
6462 
6463   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6464     do {
6465       if (!parseDelay(Delay))
6466         return MatchOperand_ParseFail;
6467     } while (trySkipToken(AsmToken::Pipe));
6468   } else {
6469     if (!parseExpr(Delay))
6470       return MatchOperand_ParseFail;
6471   }
6472 
6473   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6474   return MatchOperand_Success;
6475 }
6476 
6477 bool
6478 AMDGPUOperand::isSWaitCnt() const {
6479   return isImm();
6480 }
6481 
6482 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); }
6483 
6484 //===----------------------------------------------------------------------===//
6485 // DepCtr
6486 //===----------------------------------------------------------------------===//
6487 
6488 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6489                                   StringRef DepCtrName) {
6490   switch (ErrorId) {
6491   case OPR_ID_UNKNOWN:
6492     Error(Loc, Twine("invalid counter name ", DepCtrName));
6493     return;
6494   case OPR_ID_UNSUPPORTED:
6495     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6496     return;
6497   case OPR_ID_DUPLICATE:
6498     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6499     return;
6500   case OPR_VAL_INVALID:
6501     Error(Loc, Twine("invalid value for ", DepCtrName));
6502     return;
6503   default:
6504     assert(false);
6505   }
6506 }
6507 
6508 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6509 
6510   using namespace llvm::AMDGPU::DepCtr;
6511 
6512   SMLoc DepCtrLoc = getLoc();
6513   StringRef DepCtrName = getTokenStr();
6514 
6515   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6516       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6517     return false;
6518 
6519   int64_t ExprVal;
6520   if (!parseExpr(ExprVal))
6521     return false;
6522 
6523   unsigned PrevOprMask = UsedOprMask;
6524   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6525 
6526   if (CntVal < 0) {
6527     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6528     return false;
6529   }
6530 
6531   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6532     return false;
6533 
6534   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6535     if (isToken(AsmToken::EndOfStatement)) {
6536       Error(getLoc(), "expected a counter name");
6537       return false;
6538     }
6539   }
6540 
6541   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6542   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6543   return true;
6544 }
6545 
6546 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6547   using namespace llvm::AMDGPU::DepCtr;
6548 
6549   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6550   SMLoc Loc = getLoc();
6551 
6552   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6553     unsigned UsedOprMask = 0;
6554     while (!isToken(AsmToken::EndOfStatement)) {
6555       if (!parseDepCtr(DepCtr, UsedOprMask))
6556         return MatchOperand_ParseFail;
6557     }
6558   } else {
6559     if (!parseExpr(DepCtr))
6560       return MatchOperand_ParseFail;
6561   }
6562 
6563   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6564   return MatchOperand_Success;
6565 }
6566 
6567 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6568 
6569 //===----------------------------------------------------------------------===//
6570 // hwreg
6571 //===----------------------------------------------------------------------===//
6572 
6573 bool
6574 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6575                                 OperandInfoTy &Offset,
6576                                 OperandInfoTy &Width) {
6577   using namespace llvm::AMDGPU::Hwreg;
6578 
6579   // The register may be specified by name or using a numeric code
6580   HwReg.Loc = getLoc();
6581   if (isToken(AsmToken::Identifier) &&
6582       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6583     HwReg.IsSymbolic = true;
6584     lex(); // skip register name
6585   } else if (!parseExpr(HwReg.Id, "a register name")) {
6586     return false;
6587   }
6588 
6589   if (trySkipToken(AsmToken::RParen))
6590     return true;
6591 
6592   // parse optional params
6593   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6594     return false;
6595 
6596   Offset.Loc = getLoc();
6597   if (!parseExpr(Offset.Id))
6598     return false;
6599 
6600   if (!skipToken(AsmToken::Comma, "expected a comma"))
6601     return false;
6602 
6603   Width.Loc = getLoc();
6604   return parseExpr(Width.Id) &&
6605          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6606 }
6607 
6608 bool
6609 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6610                                const OperandInfoTy &Offset,
6611                                const OperandInfoTy &Width) {
6612 
6613   using namespace llvm::AMDGPU::Hwreg;
6614 
6615   if (HwReg.IsSymbolic) {
6616     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6617       Error(HwReg.Loc,
6618             "specified hardware register is not supported on this GPU");
6619       return false;
6620     }
6621   } else {
6622     if (!isValidHwreg(HwReg.Id)) {
6623       Error(HwReg.Loc,
6624             "invalid code of hardware register: only 6-bit values are legal");
6625       return false;
6626     }
6627   }
6628   if (!isValidHwregOffset(Offset.Id)) {
6629     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6630     return false;
6631   }
6632   if (!isValidHwregWidth(Width.Id)) {
6633     Error(Width.Loc,
6634           "invalid bitfield width: only values from 1 to 32 are legal");
6635     return false;
6636   }
6637   return true;
6638 }
6639 
6640 OperandMatchResultTy
6641 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6642   using namespace llvm::AMDGPU::Hwreg;
6643 
6644   int64_t ImmVal = 0;
6645   SMLoc Loc = getLoc();
6646 
6647   if (trySkipId("hwreg", AsmToken::LParen)) {
6648     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6649     OperandInfoTy Offset(OFFSET_DEFAULT_);
6650     OperandInfoTy Width(WIDTH_DEFAULT_);
6651     if (parseHwregBody(HwReg, Offset, Width) &&
6652         validateHwreg(HwReg, Offset, Width)) {
6653       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6654     } else {
6655       return MatchOperand_ParseFail;
6656     }
6657   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6658     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6659       Error(Loc, "invalid immediate: only 16-bit values are legal");
6660       return MatchOperand_ParseFail;
6661     }
6662   } else {
6663     return MatchOperand_ParseFail;
6664   }
6665 
6666   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6667   return MatchOperand_Success;
6668 }
6669 
6670 bool AMDGPUOperand::isHwreg() const {
6671   return isImmTy(ImmTyHwreg);
6672 }
6673 
6674 //===----------------------------------------------------------------------===//
6675 // sendmsg
6676 //===----------------------------------------------------------------------===//
6677 
6678 bool
6679 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6680                                   OperandInfoTy &Op,
6681                                   OperandInfoTy &Stream) {
6682   using namespace llvm::AMDGPU::SendMsg;
6683 
6684   Msg.Loc = getLoc();
6685   if (isToken(AsmToken::Identifier) &&
6686       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6687     Msg.IsSymbolic = true;
6688     lex(); // skip message name
6689   } else if (!parseExpr(Msg.Id, "a message name")) {
6690     return false;
6691   }
6692 
6693   if (trySkipToken(AsmToken::Comma)) {
6694     Op.IsDefined = true;
6695     Op.Loc = getLoc();
6696     if (isToken(AsmToken::Identifier) &&
6697         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6698       lex(); // skip operation name
6699     } else if (!parseExpr(Op.Id, "an operation name")) {
6700       return false;
6701     }
6702 
6703     if (trySkipToken(AsmToken::Comma)) {
6704       Stream.IsDefined = true;
6705       Stream.Loc = getLoc();
6706       if (!parseExpr(Stream.Id))
6707         return false;
6708     }
6709   }
6710 
6711   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6712 }
6713 
6714 bool
6715 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6716                                  const OperandInfoTy &Op,
6717                                  const OperandInfoTy &Stream) {
6718   using namespace llvm::AMDGPU::SendMsg;
6719 
6720   // Validation strictness depends on whether message is specified
6721   // in a symbolic or in a numeric form. In the latter case
6722   // only encoding possibility is checked.
6723   bool Strict = Msg.IsSymbolic;
6724 
6725   if (Strict) {
6726     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6727       Error(Msg.Loc, "specified message id is not supported on this GPU");
6728       return false;
6729     }
6730   } else {
6731     if (!isValidMsgId(Msg.Id, getSTI())) {
6732       Error(Msg.Loc, "invalid message id");
6733       return false;
6734     }
6735   }
6736   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6737     if (Op.IsDefined) {
6738       Error(Op.Loc, "message does not support operations");
6739     } else {
6740       Error(Msg.Loc, "missing message operation");
6741     }
6742     return false;
6743   }
6744   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6745     Error(Op.Loc, "invalid operation id");
6746     return false;
6747   }
6748   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6749       Stream.IsDefined) {
6750     Error(Stream.Loc, "message operation does not support streams");
6751     return false;
6752   }
6753   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6754     Error(Stream.Loc, "invalid message stream id");
6755     return false;
6756   }
6757   return true;
6758 }
6759 
6760 OperandMatchResultTy
6761 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6762   using namespace llvm::AMDGPU::SendMsg;
6763 
6764   int64_t ImmVal = 0;
6765   SMLoc Loc = getLoc();
6766 
6767   if (trySkipId("sendmsg", AsmToken::LParen)) {
6768     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6769     OperandInfoTy Op(OP_NONE_);
6770     OperandInfoTy Stream(STREAM_ID_NONE_);
6771     if (parseSendMsgBody(Msg, Op, Stream) &&
6772         validateSendMsg(Msg, Op, Stream)) {
6773       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6774     } else {
6775       return MatchOperand_ParseFail;
6776     }
6777   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6778     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6779       Error(Loc, "invalid immediate: only 16-bit values are legal");
6780       return MatchOperand_ParseFail;
6781     }
6782   } else {
6783     return MatchOperand_ParseFail;
6784   }
6785 
6786   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6787   return MatchOperand_Success;
6788 }
6789 
6790 bool AMDGPUOperand::isSendMsg() const {
6791   return isImmTy(ImmTySendMsg);
6792 }
6793 
6794 //===----------------------------------------------------------------------===//
6795 // v_interp
6796 //===----------------------------------------------------------------------===//
6797 
6798 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6799   StringRef Str;
6800   SMLoc S = getLoc();
6801 
6802   if (!parseId(Str))
6803     return MatchOperand_NoMatch;
6804 
6805   int Slot = StringSwitch<int>(Str)
6806     .Case("p10", 0)
6807     .Case("p20", 1)
6808     .Case("p0", 2)
6809     .Default(-1);
6810 
6811   if (Slot == -1) {
6812     Error(S, "invalid interpolation slot");
6813     return MatchOperand_ParseFail;
6814   }
6815 
6816   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6817                                               AMDGPUOperand::ImmTyInterpSlot));
6818   return MatchOperand_Success;
6819 }
6820 
6821 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6822   StringRef Str;
6823   SMLoc S = getLoc();
6824 
6825   if (!parseId(Str))
6826     return MatchOperand_NoMatch;
6827 
6828   if (!Str.startswith("attr")) {
6829     Error(S, "invalid interpolation attribute");
6830     return MatchOperand_ParseFail;
6831   }
6832 
6833   StringRef Chan = Str.take_back(2);
6834   int AttrChan = StringSwitch<int>(Chan)
6835     .Case(".x", 0)
6836     .Case(".y", 1)
6837     .Case(".z", 2)
6838     .Case(".w", 3)
6839     .Default(-1);
6840   if (AttrChan == -1) {
6841     Error(S, "invalid or missing interpolation attribute channel");
6842     return MatchOperand_ParseFail;
6843   }
6844 
6845   Str = Str.drop_back(2).drop_front(4);
6846 
6847   uint8_t Attr;
6848   if (Str.getAsInteger(10, Attr)) {
6849     Error(S, "invalid or missing interpolation attribute number");
6850     return MatchOperand_ParseFail;
6851   }
6852 
6853   if (Attr > 63) {
6854     Error(S, "out of bounds interpolation attribute number");
6855     return MatchOperand_ParseFail;
6856   }
6857 
6858   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6859 
6860   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6861                                               AMDGPUOperand::ImmTyInterpAttr));
6862   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6863                                               AMDGPUOperand::ImmTyAttrChan));
6864   return MatchOperand_Success;
6865 }
6866 
6867 //===----------------------------------------------------------------------===//
6868 // exp
6869 //===----------------------------------------------------------------------===//
6870 
6871 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6872   using namespace llvm::AMDGPU::Exp;
6873 
6874   StringRef Str;
6875   SMLoc S = getLoc();
6876 
6877   if (!parseId(Str))
6878     return MatchOperand_NoMatch;
6879 
6880   unsigned Id = getTgtId(Str);
6881   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6882     Error(S, (Id == ET_INVALID) ?
6883                 "invalid exp target" :
6884                 "exp target is not supported on this GPU");
6885     return MatchOperand_ParseFail;
6886   }
6887 
6888   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6889                                               AMDGPUOperand::ImmTyExpTgt));
6890   return MatchOperand_Success;
6891 }
6892 
6893 //===----------------------------------------------------------------------===//
6894 // parser helpers
6895 //===----------------------------------------------------------------------===//
6896 
6897 bool
6898 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6899   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6900 }
6901 
6902 bool
6903 AMDGPUAsmParser::isId(const StringRef Id) const {
6904   return isId(getToken(), Id);
6905 }
6906 
6907 bool
6908 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6909   return getTokenKind() == Kind;
6910 }
6911 
6912 bool
6913 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6914   if (isId(Id)) {
6915     lex();
6916     return true;
6917   }
6918   return false;
6919 }
6920 
6921 bool
6922 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6923   if (isToken(AsmToken::Identifier)) {
6924     StringRef Tok = getTokenStr();
6925     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6926       lex();
6927       return true;
6928     }
6929   }
6930   return false;
6931 }
6932 
6933 bool
6934 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6935   if (isId(Id) && peekToken().is(Kind)) {
6936     lex();
6937     lex();
6938     return true;
6939   }
6940   return false;
6941 }
6942 
6943 bool
6944 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6945   if (isToken(Kind)) {
6946     lex();
6947     return true;
6948   }
6949   return false;
6950 }
6951 
6952 bool
6953 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6954                            const StringRef ErrMsg) {
6955   if (!trySkipToken(Kind)) {
6956     Error(getLoc(), ErrMsg);
6957     return false;
6958   }
6959   return true;
6960 }
6961 
6962 bool
6963 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6964   SMLoc S = getLoc();
6965 
6966   const MCExpr *Expr;
6967   if (Parser.parseExpression(Expr))
6968     return false;
6969 
6970   if (Expr->evaluateAsAbsolute(Imm))
6971     return true;
6972 
6973   if (Expected.empty()) {
6974     Error(S, "expected absolute expression");
6975   } else {
6976     Error(S, Twine("expected ", Expected) +
6977              Twine(" or an absolute expression"));
6978   }
6979   return false;
6980 }
6981 
6982 bool
6983 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6984   SMLoc S = getLoc();
6985 
6986   const MCExpr *Expr;
6987   if (Parser.parseExpression(Expr))
6988     return false;
6989 
6990   int64_t IntVal;
6991   if (Expr->evaluateAsAbsolute(IntVal)) {
6992     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6993   } else {
6994     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6995   }
6996   return true;
6997 }
6998 
6999 bool
7000 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7001   if (isToken(AsmToken::String)) {
7002     Val = getToken().getStringContents();
7003     lex();
7004     return true;
7005   } else {
7006     Error(getLoc(), ErrMsg);
7007     return false;
7008   }
7009 }
7010 
7011 bool
7012 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7013   if (isToken(AsmToken::Identifier)) {
7014     Val = getTokenStr();
7015     lex();
7016     return true;
7017   } else {
7018     if (!ErrMsg.empty())
7019       Error(getLoc(), ErrMsg);
7020     return false;
7021   }
7022 }
7023 
7024 AsmToken
7025 AMDGPUAsmParser::getToken() const {
7026   return Parser.getTok();
7027 }
7028 
7029 AsmToken
7030 AMDGPUAsmParser::peekToken() {
7031   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
7032 }
7033 
7034 void
7035 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7036   auto TokCount = getLexer().peekTokens(Tokens);
7037 
7038   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7039     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7040 }
7041 
7042 AsmToken::TokenKind
7043 AMDGPUAsmParser::getTokenKind() const {
7044   return getLexer().getKind();
7045 }
7046 
7047 SMLoc
7048 AMDGPUAsmParser::getLoc() const {
7049   return getToken().getLoc();
7050 }
7051 
7052 StringRef
7053 AMDGPUAsmParser::getTokenStr() const {
7054   return getToken().getString();
7055 }
7056 
7057 void
7058 AMDGPUAsmParser::lex() {
7059   Parser.Lex();
7060 }
7061 
7062 SMLoc
7063 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7064                                const OperandVector &Operands) const {
7065   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7066     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7067     if (Test(Op))
7068       return Op.getStartLoc();
7069   }
7070   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7071 }
7072 
7073 SMLoc
7074 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7075                            const OperandVector &Operands) const {
7076   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7077   return getOperandLoc(Test, Operands);
7078 }
7079 
7080 SMLoc
7081 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7082                            const OperandVector &Operands) const {
7083   auto Test = [=](const AMDGPUOperand& Op) {
7084     return Op.isRegKind() && Op.getReg() == Reg;
7085   };
7086   return getOperandLoc(Test, Operands);
7087 }
7088 
7089 SMLoc
7090 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
7091   auto Test = [](const AMDGPUOperand& Op) {
7092     return Op.IsImmKindLiteral() || Op.isExpr();
7093   };
7094   return getOperandLoc(Test, Operands);
7095 }
7096 
7097 SMLoc
7098 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7099   auto Test = [](const AMDGPUOperand& Op) {
7100     return Op.isImmKindConst();
7101   };
7102   return getOperandLoc(Test, Operands);
7103 }
7104 
7105 //===----------------------------------------------------------------------===//
7106 // swizzle
7107 //===----------------------------------------------------------------------===//
7108 
7109 LLVM_READNONE
7110 static unsigned
7111 encodeBitmaskPerm(const unsigned AndMask,
7112                   const unsigned OrMask,
7113                   const unsigned XorMask) {
7114   using namespace llvm::AMDGPU::Swizzle;
7115 
7116   return BITMASK_PERM_ENC |
7117          (AndMask << BITMASK_AND_SHIFT) |
7118          (OrMask  << BITMASK_OR_SHIFT)  |
7119          (XorMask << BITMASK_XOR_SHIFT);
7120 }
7121 
7122 bool
7123 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7124                                      const unsigned MinVal,
7125                                      const unsigned MaxVal,
7126                                      const StringRef ErrMsg,
7127                                      SMLoc &Loc) {
7128   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7129     return false;
7130   }
7131   Loc = getLoc();
7132   if (!parseExpr(Op)) {
7133     return false;
7134   }
7135   if (Op < MinVal || Op > MaxVal) {
7136     Error(Loc, ErrMsg);
7137     return false;
7138   }
7139 
7140   return true;
7141 }
7142 
7143 bool
7144 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7145                                       const unsigned MinVal,
7146                                       const unsigned MaxVal,
7147                                       const StringRef ErrMsg) {
7148   SMLoc Loc;
7149   for (unsigned i = 0; i < OpNum; ++i) {
7150     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7151       return false;
7152   }
7153 
7154   return true;
7155 }
7156 
7157 bool
7158 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7159   using namespace llvm::AMDGPU::Swizzle;
7160 
7161   int64_t Lane[LANE_NUM];
7162   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7163                            "expected a 2-bit lane id")) {
7164     Imm = QUAD_PERM_ENC;
7165     for (unsigned I = 0; I < LANE_NUM; ++I) {
7166       Imm |= Lane[I] << (LANE_SHIFT * I);
7167     }
7168     return true;
7169   }
7170   return false;
7171 }
7172 
7173 bool
7174 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7175   using namespace llvm::AMDGPU::Swizzle;
7176 
7177   SMLoc Loc;
7178   int64_t GroupSize;
7179   int64_t LaneIdx;
7180 
7181   if (!parseSwizzleOperand(GroupSize,
7182                            2, 32,
7183                            "group size must be in the interval [2,32]",
7184                            Loc)) {
7185     return false;
7186   }
7187   if (!isPowerOf2_64(GroupSize)) {
7188     Error(Loc, "group size must be a power of two");
7189     return false;
7190   }
7191   if (parseSwizzleOperand(LaneIdx,
7192                           0, GroupSize - 1,
7193                           "lane id must be in the interval [0,group size - 1]",
7194                           Loc)) {
7195     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7196     return true;
7197   }
7198   return false;
7199 }
7200 
7201 bool
7202 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7203   using namespace llvm::AMDGPU::Swizzle;
7204 
7205   SMLoc Loc;
7206   int64_t GroupSize;
7207 
7208   if (!parseSwizzleOperand(GroupSize,
7209                            2, 32,
7210                            "group size must be in the interval [2,32]",
7211                            Loc)) {
7212     return false;
7213   }
7214   if (!isPowerOf2_64(GroupSize)) {
7215     Error(Loc, "group size must be a power of two");
7216     return false;
7217   }
7218 
7219   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7220   return true;
7221 }
7222 
7223 bool
7224 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7225   using namespace llvm::AMDGPU::Swizzle;
7226 
7227   SMLoc Loc;
7228   int64_t GroupSize;
7229 
7230   if (!parseSwizzleOperand(GroupSize,
7231                            1, 16,
7232                            "group size must be in the interval [1,16]",
7233                            Loc)) {
7234     return false;
7235   }
7236   if (!isPowerOf2_64(GroupSize)) {
7237     Error(Loc, "group size must be a power of two");
7238     return false;
7239   }
7240 
7241   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7242   return true;
7243 }
7244 
7245 bool
7246 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7247   using namespace llvm::AMDGPU::Swizzle;
7248 
7249   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7250     return false;
7251   }
7252 
7253   StringRef Ctl;
7254   SMLoc StrLoc = getLoc();
7255   if (!parseString(Ctl)) {
7256     return false;
7257   }
7258   if (Ctl.size() != BITMASK_WIDTH) {
7259     Error(StrLoc, "expected a 5-character mask");
7260     return false;
7261   }
7262 
7263   unsigned AndMask = 0;
7264   unsigned OrMask = 0;
7265   unsigned XorMask = 0;
7266 
7267   for (size_t i = 0; i < Ctl.size(); ++i) {
7268     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7269     switch(Ctl[i]) {
7270     default:
7271       Error(StrLoc, "invalid mask");
7272       return false;
7273     case '0':
7274       break;
7275     case '1':
7276       OrMask |= Mask;
7277       break;
7278     case 'p':
7279       AndMask |= Mask;
7280       break;
7281     case 'i':
7282       AndMask |= Mask;
7283       XorMask |= Mask;
7284       break;
7285     }
7286   }
7287 
7288   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7289   return true;
7290 }
7291 
7292 bool
7293 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7294 
7295   SMLoc OffsetLoc = getLoc();
7296 
7297   if (!parseExpr(Imm, "a swizzle macro")) {
7298     return false;
7299   }
7300   if (!isUInt<16>(Imm)) {
7301     Error(OffsetLoc, "expected a 16-bit offset");
7302     return false;
7303   }
7304   return true;
7305 }
7306 
7307 bool
7308 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7309   using namespace llvm::AMDGPU::Swizzle;
7310 
7311   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7312 
7313     SMLoc ModeLoc = getLoc();
7314     bool Ok = false;
7315 
7316     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7317       Ok = parseSwizzleQuadPerm(Imm);
7318     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7319       Ok = parseSwizzleBitmaskPerm(Imm);
7320     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7321       Ok = parseSwizzleBroadcast(Imm);
7322     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7323       Ok = parseSwizzleSwap(Imm);
7324     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7325       Ok = parseSwizzleReverse(Imm);
7326     } else {
7327       Error(ModeLoc, "expected a swizzle mode");
7328     }
7329 
7330     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7331   }
7332 
7333   return false;
7334 }
7335 
7336 OperandMatchResultTy
7337 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7338   SMLoc S = getLoc();
7339   int64_t Imm = 0;
7340 
7341   if (trySkipId("offset")) {
7342 
7343     bool Ok = false;
7344     if (skipToken(AsmToken::Colon, "expected a colon")) {
7345       if (trySkipId("swizzle")) {
7346         Ok = parseSwizzleMacro(Imm);
7347       } else {
7348         Ok = parseSwizzleOffset(Imm);
7349       }
7350     }
7351 
7352     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7353 
7354     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7355   } else {
7356     // Swizzle "offset" operand is optional.
7357     // If it is omitted, try parsing other optional operands.
7358     return parseOptionalOpr(Operands);
7359   }
7360 }
7361 
7362 bool
7363 AMDGPUOperand::isSwizzle() const {
7364   return isImmTy(ImmTySwizzle);
7365 }
7366 
7367 //===----------------------------------------------------------------------===//
7368 // VGPR Index Mode
7369 //===----------------------------------------------------------------------===//
7370 
7371 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7372 
7373   using namespace llvm::AMDGPU::VGPRIndexMode;
7374 
7375   if (trySkipToken(AsmToken::RParen)) {
7376     return OFF;
7377   }
7378 
7379   int64_t Imm = 0;
7380 
7381   while (true) {
7382     unsigned Mode = 0;
7383     SMLoc S = getLoc();
7384 
7385     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7386       if (trySkipId(IdSymbolic[ModeId])) {
7387         Mode = 1 << ModeId;
7388         break;
7389       }
7390     }
7391 
7392     if (Mode == 0) {
7393       Error(S, (Imm == 0)?
7394                "expected a VGPR index mode or a closing parenthesis" :
7395                "expected a VGPR index mode");
7396       return UNDEF;
7397     }
7398 
7399     if (Imm & Mode) {
7400       Error(S, "duplicate VGPR index mode");
7401       return UNDEF;
7402     }
7403     Imm |= Mode;
7404 
7405     if (trySkipToken(AsmToken::RParen))
7406       break;
7407     if (!skipToken(AsmToken::Comma,
7408                    "expected a comma or a closing parenthesis"))
7409       return UNDEF;
7410   }
7411 
7412   return Imm;
7413 }
7414 
7415 OperandMatchResultTy
7416 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7417 
7418   using namespace llvm::AMDGPU::VGPRIndexMode;
7419 
7420   int64_t Imm = 0;
7421   SMLoc S = getLoc();
7422 
7423   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7424     Imm = parseGPRIdxMacro();
7425     if (Imm == UNDEF)
7426       return MatchOperand_ParseFail;
7427   } else {
7428     if (getParser().parseAbsoluteExpression(Imm))
7429       return MatchOperand_ParseFail;
7430     if (Imm < 0 || !isUInt<4>(Imm)) {
7431       Error(S, "invalid immediate: only 4-bit values are legal");
7432       return MatchOperand_ParseFail;
7433     }
7434   }
7435 
7436   Operands.push_back(
7437       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7438   return MatchOperand_Success;
7439 }
7440 
7441 bool AMDGPUOperand::isGPRIdxMode() const {
7442   return isImmTy(ImmTyGprIdxMode);
7443 }
7444 
7445 //===----------------------------------------------------------------------===//
7446 // sopp branch targets
7447 //===----------------------------------------------------------------------===//
7448 
7449 OperandMatchResultTy
7450 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7451 
7452   // Make sure we are not parsing something
7453   // that looks like a label or an expression but is not.
7454   // This will improve error messages.
7455   if (isRegister() || isModifier())
7456     return MatchOperand_NoMatch;
7457 
7458   if (!parseExpr(Operands))
7459     return MatchOperand_ParseFail;
7460 
7461   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7462   assert(Opr.isImm() || Opr.isExpr());
7463   SMLoc Loc = Opr.getStartLoc();
7464 
7465   // Currently we do not support arbitrary expressions as branch targets.
7466   // Only labels and absolute expressions are accepted.
7467   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7468     Error(Loc, "expected an absolute expression or a label");
7469   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7470     Error(Loc, "expected a 16-bit signed jump offset");
7471   }
7472 
7473   return MatchOperand_Success;
7474 }
7475 
7476 //===----------------------------------------------------------------------===//
7477 // Boolean holding registers
7478 //===----------------------------------------------------------------------===//
7479 
7480 OperandMatchResultTy
7481 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7482   return parseReg(Operands);
7483 }
7484 
7485 //===----------------------------------------------------------------------===//
7486 // mubuf
7487 //===----------------------------------------------------------------------===//
7488 
7489 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7490   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7491 }
7492 
7493 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7494                                    const OperandVector &Operands,
7495                                    bool IsAtomic,
7496                                    bool IsLds) {
7497   OptionalImmIndexMap OptionalIdx;
7498   unsigned FirstOperandIdx = 1;
7499   bool IsAtomicReturn = false;
7500 
7501   if (IsAtomic) {
7502     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7503       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7504       if (!Op.isCPol())
7505         continue;
7506       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7507       break;
7508     }
7509 
7510     if (!IsAtomicReturn) {
7511       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7512       if (NewOpc != -1)
7513         Inst.setOpcode(NewOpc);
7514     }
7515 
7516     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7517                       SIInstrFlags::IsAtomicRet;
7518   }
7519 
7520   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7521     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7522 
7523     // Add the register arguments
7524     if (Op.isReg()) {
7525       Op.addRegOperands(Inst, 1);
7526       // Insert a tied src for atomic return dst.
7527       // This cannot be postponed as subsequent calls to
7528       // addImmOperands rely on correct number of MC operands.
7529       if (IsAtomicReturn && i == FirstOperandIdx)
7530         Op.addRegOperands(Inst, 1);
7531       continue;
7532     }
7533 
7534     // Handle the case where soffset is an immediate
7535     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7536       Op.addImmOperands(Inst, 1);
7537       continue;
7538     }
7539 
7540     // Handle tokens like 'offen' which are sometimes hard-coded into the
7541     // asm string.  There are no MCInst operands for these.
7542     if (Op.isToken()) {
7543       continue;
7544     }
7545     assert(Op.isImm());
7546 
7547     // Handle optional arguments
7548     OptionalIdx[Op.getImmTy()] = i;
7549   }
7550 
7551   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7552   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7553 
7554   if (!IsLds) { // tfe is not legal with lds opcodes
7555     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7556   }
7557   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7558 }
7559 
7560 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7561   OptionalImmIndexMap OptionalIdx;
7562 
7563   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7564     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7565 
7566     // Add the register arguments
7567     if (Op.isReg()) {
7568       Op.addRegOperands(Inst, 1);
7569       continue;
7570     }
7571 
7572     // Handle the case where soffset is an immediate
7573     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7574       Op.addImmOperands(Inst, 1);
7575       continue;
7576     }
7577 
7578     // Handle tokens like 'offen' which are sometimes hard-coded into the
7579     // asm string.  There are no MCInst operands for these.
7580     if (Op.isToken()) {
7581       continue;
7582     }
7583     assert(Op.isImm());
7584 
7585     // Handle optional arguments
7586     OptionalIdx[Op.getImmTy()] = i;
7587   }
7588 
7589   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7590                         AMDGPUOperand::ImmTyOffset);
7591   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7592   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7593   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7594   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7595 }
7596 
7597 //===----------------------------------------------------------------------===//
7598 // mimg
7599 //===----------------------------------------------------------------------===//
7600 
7601 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7602                               bool IsAtomic) {
7603   unsigned I = 1;
7604   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7605   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7606     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7607   }
7608 
7609   if (IsAtomic) {
7610     // Add src, same as dst
7611     assert(Desc.getNumDefs() == 1);
7612     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7613   }
7614 
7615   OptionalImmIndexMap OptionalIdx;
7616 
7617   for (unsigned E = Operands.size(); I != E; ++I) {
7618     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7619 
7620     // Add the register arguments
7621     if (Op.isReg()) {
7622       Op.addRegOperands(Inst, 1);
7623     } else if (Op.isImmModifier()) {
7624       OptionalIdx[Op.getImmTy()] = I;
7625     } else if (!Op.isToken()) {
7626       llvm_unreachable("unexpected operand type");
7627     }
7628   }
7629 
7630   bool IsGFX10Plus = isGFX10Plus();
7631 
7632   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7633   if (IsGFX10Plus)
7634     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7635   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7636   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7637   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7638   if (IsGFX10Plus)
7639     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7640   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7641     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7642   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7643   if (!IsGFX10Plus)
7644     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7645   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7646 }
7647 
7648 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7649   cvtMIMG(Inst, Operands, true);
7650 }
7651 
7652 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7653   OptionalImmIndexMap OptionalIdx;
7654   bool IsAtomicReturn = false;
7655 
7656   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7657     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7658     if (!Op.isCPol())
7659       continue;
7660     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7661     break;
7662   }
7663 
7664   if (!IsAtomicReturn) {
7665     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7666     if (NewOpc != -1)
7667       Inst.setOpcode(NewOpc);
7668   }
7669 
7670   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7671                     SIInstrFlags::IsAtomicRet;
7672 
7673   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7674     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7675 
7676     // Add the register arguments
7677     if (Op.isReg()) {
7678       Op.addRegOperands(Inst, 1);
7679       if (IsAtomicReturn && i == 1)
7680         Op.addRegOperands(Inst, 1);
7681       continue;
7682     }
7683 
7684     // Handle the case where soffset is an immediate
7685     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7686       Op.addImmOperands(Inst, 1);
7687       continue;
7688     }
7689 
7690     // Handle tokens like 'offen' which are sometimes hard-coded into the
7691     // asm string.  There are no MCInst operands for these.
7692     if (Op.isToken()) {
7693       continue;
7694     }
7695     assert(Op.isImm());
7696 
7697     // Handle optional arguments
7698     OptionalIdx[Op.getImmTy()] = i;
7699   }
7700 
7701   if ((int)Inst.getNumOperands() <=
7702       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7703     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7704   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7705 }
7706 
7707 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7708                                       const OperandVector &Operands) {
7709   for (unsigned I = 1; I < Operands.size(); ++I) {
7710     auto &Operand = (AMDGPUOperand &)*Operands[I];
7711     if (Operand.isReg())
7712       Operand.addRegOperands(Inst, 1);
7713   }
7714 
7715   Inst.addOperand(MCOperand::createImm(1)); // a16
7716 }
7717 
7718 //===----------------------------------------------------------------------===//
7719 // smrd
7720 //===----------------------------------------------------------------------===//
7721 
7722 bool AMDGPUOperand::isSMRDOffset8() const {
7723   return isImm() && isUInt<8>(getImm());
7724 }
7725 
7726 bool AMDGPUOperand::isSMEMOffset() const {
7727   return isImm(); // Offset range is checked later by validator.
7728 }
7729 
7730 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7731   // 32-bit literals are only supported on CI and we only want to use them
7732   // when the offset is > 8-bits.
7733   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7734 }
7735 
7736 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7737   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7738 }
7739 
7740 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7741   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7742 }
7743 
7744 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7745   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7746 }
7747 
7748 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7749   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7750 }
7751 
7752 //===----------------------------------------------------------------------===//
7753 // vop3
7754 //===----------------------------------------------------------------------===//
7755 
7756 static bool ConvertOmodMul(int64_t &Mul) {
7757   if (Mul != 1 && Mul != 2 && Mul != 4)
7758     return false;
7759 
7760   Mul >>= 1;
7761   return true;
7762 }
7763 
7764 static bool ConvertOmodDiv(int64_t &Div) {
7765   if (Div == 1) {
7766     Div = 0;
7767     return true;
7768   }
7769 
7770   if (Div == 2) {
7771     Div = 3;
7772     return true;
7773   }
7774 
7775   return false;
7776 }
7777 
7778 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7779 // This is intentional and ensures compatibility with sp3.
7780 // See bug 35397 for details.
7781 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7782   if (BoundCtrl == 0 || BoundCtrl == 1) {
7783     BoundCtrl = 1;
7784     return true;
7785   }
7786   return false;
7787 }
7788 
7789 // Note: the order in this table matches the order of operands in AsmString.
7790 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7791   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7792   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7793   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7794   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7795   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7796   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7797   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7798   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7799   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7800   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7801   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7802   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7803   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7804   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7805   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7806   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7807   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7808   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7809   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7810   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7811   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7812   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7813   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7814   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7815   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7816   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7817   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7818   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7819   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7820   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7821   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7822   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7823   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7824   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7825   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7826   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7827   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7828   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7829   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7830   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7831   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7832 };
7833 
7834 void AMDGPUAsmParser::onBeginOfFile() {
7835   if (!getParser().getStreamer().getTargetStreamer() ||
7836       getSTI().getTargetTriple().getArch() == Triple::r600)
7837     return;
7838 
7839   if (!getTargetStreamer().getTargetID())
7840     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7841 
7842   if (isHsaAbiVersion3AndAbove(&getSTI()))
7843     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7844 }
7845 
7846 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7847 
7848   OperandMatchResultTy res = parseOptionalOpr(Operands);
7849 
7850   // This is a hack to enable hardcoded mandatory operands which follow
7851   // optional operands.
7852   //
7853   // Current design assumes that all operands after the first optional operand
7854   // are also optional. However implementation of some instructions violates
7855   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7856   //
7857   // To alleviate this problem, we have to (implicitly) parse extra operands
7858   // to make sure autogenerated parser of custom operands never hit hardcoded
7859   // mandatory operands.
7860 
7861   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7862     if (res != MatchOperand_Success ||
7863         isToken(AsmToken::EndOfStatement))
7864       break;
7865 
7866     trySkipToken(AsmToken::Comma);
7867     res = parseOptionalOpr(Operands);
7868   }
7869 
7870   return res;
7871 }
7872 
7873 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7874   OperandMatchResultTy res;
7875   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7876     // try to parse any optional operand here
7877     if (Op.IsBit) {
7878       res = parseNamedBit(Op.Name, Operands, Op.Type);
7879     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7880       res = parseOModOperand(Operands);
7881     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7882                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7883                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7884       res = parseSDWASel(Operands, Op.Name, Op.Type);
7885     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7886       res = parseSDWADstUnused(Operands);
7887     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7888                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7889                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7890                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7891       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7892                                         Op.ConvertResult);
7893     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7894       res = parseDim(Operands);
7895     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7896       res = parseCPol(Operands);
7897     } else {
7898       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7899       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
7900         res = parseOperandArrayWithPrefix("neg", Operands,
7901                                           AMDGPUOperand::ImmTyBLGP,
7902                                           nullptr);
7903       }
7904     }
7905     if (res != MatchOperand_NoMatch) {
7906       return res;
7907     }
7908   }
7909   return MatchOperand_NoMatch;
7910 }
7911 
7912 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7913   StringRef Name = getTokenStr();
7914   if (Name == "mul") {
7915     return parseIntWithPrefix("mul", Operands,
7916                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7917   }
7918 
7919   if (Name == "div") {
7920     return parseIntWithPrefix("div", Operands,
7921                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7922   }
7923 
7924   return MatchOperand_NoMatch;
7925 }
7926 
7927 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7928   cvtVOP3P(Inst, Operands);
7929 
7930   int Opc = Inst.getOpcode();
7931 
7932   int SrcNum;
7933   const int Ops[] = { AMDGPU::OpName::src0,
7934                       AMDGPU::OpName::src1,
7935                       AMDGPU::OpName::src2 };
7936   for (SrcNum = 0;
7937        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7938        ++SrcNum);
7939   assert(SrcNum > 0);
7940 
7941   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7942   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7943 
7944   if ((OpSel & (1 << SrcNum)) != 0) {
7945     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7946     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7947     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7948   }
7949 }
7950 
7951 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7952       // 1. This operand is input modifiers
7953   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7954       // 2. This is not last operand
7955       && Desc.NumOperands > (OpNum + 1)
7956       // 3. Next operand is register class
7957       && Desc.OpInfo[OpNum + 1].RegClass != -1
7958       // 4. Next register is not tied to any other operand
7959       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7960 }
7961 
7962 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7963 {
7964   OptionalImmIndexMap OptionalIdx;
7965   unsigned Opc = Inst.getOpcode();
7966 
7967   unsigned I = 1;
7968   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7969   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7970     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7971   }
7972 
7973   for (unsigned E = Operands.size(); I != E; ++I) {
7974     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7975     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7976       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7977     } else if (Op.isInterpSlot() ||
7978                Op.isInterpAttr() ||
7979                Op.isAttrChan()) {
7980       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7981     } else if (Op.isImmModifier()) {
7982       OptionalIdx[Op.getImmTy()] = I;
7983     } else {
7984       llvm_unreachable("unhandled operand type");
7985     }
7986   }
7987 
7988   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7989     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7990   }
7991 
7992   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7993     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7994   }
7995 
7996   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7997     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7998   }
7999 }
8000 
8001 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8002                               OptionalImmIndexMap &OptionalIdx) {
8003   unsigned Opc = Inst.getOpcode();
8004 
8005   unsigned I = 1;
8006   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8007   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8008     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8009   }
8010 
8011   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
8012     // This instruction has src modifiers
8013     for (unsigned E = Operands.size(); I != E; ++I) {
8014       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8015       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8016         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8017       } else if (Op.isImmModifier()) {
8018         OptionalIdx[Op.getImmTy()] = I;
8019       } else if (Op.isRegOrImm()) {
8020         Op.addRegOrImmOperands(Inst, 1);
8021       } else {
8022         llvm_unreachable("unhandled operand type");
8023       }
8024     }
8025   } else {
8026     // No src modifiers
8027     for (unsigned E = Operands.size(); I != E; ++I) {
8028       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8029       if (Op.isMod()) {
8030         OptionalIdx[Op.getImmTy()] = I;
8031       } else {
8032         Op.addRegOrImmOperands(Inst, 1);
8033       }
8034     }
8035   }
8036 
8037   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8038     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8039   }
8040 
8041   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8042     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8043   }
8044 
8045   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8046   // it has src2 register operand that is tied to dst operand
8047   // we don't allow modifiers for this operand in assembler so src2_modifiers
8048   // should be 0.
8049   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
8050       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
8051       Opc == AMDGPU::V_MAC_F32_e64_vi ||
8052       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
8053       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
8054       Opc == AMDGPU::V_MAC_F16_e64_vi ||
8055       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
8056       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
8057       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
8058       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
8059       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
8060     auto it = Inst.begin();
8061     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8062     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8063     ++it;
8064     // Copy the operand to ensure it's not invalidated when Inst grows.
8065     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8066   }
8067 }
8068 
8069 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8070   OptionalImmIndexMap OptionalIdx;
8071   cvtVOP3(Inst, Operands, OptionalIdx);
8072 }
8073 
8074 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8075                                OptionalImmIndexMap &OptIdx) {
8076   const int Opc = Inst.getOpcode();
8077   const MCInstrDesc &Desc = MII.get(Opc);
8078 
8079   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8080 
8081   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
8082     assert(!IsPacked);
8083     Inst.addOperand(Inst.getOperand(0));
8084   }
8085 
8086   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8087   // instruction, and then figure out where to actually put the modifiers
8088 
8089   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8090   if (OpSelIdx != -1) {
8091     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8092   }
8093 
8094   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8095   if (OpSelHiIdx != -1) {
8096     int DefaultVal = IsPacked ? -1 : 0;
8097     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8098                           DefaultVal);
8099   }
8100 
8101   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8102   if (NegLoIdx != -1) {
8103     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8104     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8105   }
8106 
8107   const int Ops[] = { AMDGPU::OpName::src0,
8108                       AMDGPU::OpName::src1,
8109                       AMDGPU::OpName::src2 };
8110   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8111                          AMDGPU::OpName::src1_modifiers,
8112                          AMDGPU::OpName::src2_modifiers };
8113 
8114   unsigned OpSel = 0;
8115   unsigned OpSelHi = 0;
8116   unsigned NegLo = 0;
8117   unsigned NegHi = 0;
8118 
8119   if (OpSelIdx != -1)
8120     OpSel = Inst.getOperand(OpSelIdx).getImm();
8121 
8122   if (OpSelHiIdx != -1)
8123     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8124 
8125   if (NegLoIdx != -1) {
8126     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8127     NegLo = Inst.getOperand(NegLoIdx).getImm();
8128     NegHi = Inst.getOperand(NegHiIdx).getImm();
8129   }
8130 
8131   for (int J = 0; J < 3; ++J) {
8132     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8133     if (OpIdx == -1)
8134       break;
8135 
8136     uint32_t ModVal = 0;
8137 
8138     if ((OpSel & (1 << J)) != 0)
8139       ModVal |= SISrcMods::OP_SEL_0;
8140 
8141     if ((OpSelHi & (1 << J)) != 0)
8142       ModVal |= SISrcMods::OP_SEL_1;
8143 
8144     if ((NegLo & (1 << J)) != 0)
8145       ModVal |= SISrcMods::NEG;
8146 
8147     if ((NegHi & (1 << J)) != 0)
8148       ModVal |= SISrcMods::NEG_HI;
8149 
8150     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8151 
8152     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8153   }
8154 }
8155 
8156 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8157   OptionalImmIndexMap OptIdx;
8158   cvtVOP3(Inst, Operands, OptIdx);
8159   cvtVOP3P(Inst, Operands, OptIdx);
8160 }
8161 
8162 //===----------------------------------------------------------------------===//
8163 // dpp
8164 //===----------------------------------------------------------------------===//
8165 
8166 bool AMDGPUOperand::isDPP8() const {
8167   return isImmTy(ImmTyDPP8);
8168 }
8169 
8170 bool AMDGPUOperand::isDPPCtrl() const {
8171   using namespace AMDGPU::DPP;
8172 
8173   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8174   if (result) {
8175     int64_t Imm = getImm();
8176     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8177            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8178            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8179            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8180            (Imm == DppCtrl::WAVE_SHL1) ||
8181            (Imm == DppCtrl::WAVE_ROL1) ||
8182            (Imm == DppCtrl::WAVE_SHR1) ||
8183            (Imm == DppCtrl::WAVE_ROR1) ||
8184            (Imm == DppCtrl::ROW_MIRROR) ||
8185            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8186            (Imm == DppCtrl::BCAST15) ||
8187            (Imm == DppCtrl::BCAST31) ||
8188            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8189            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8190   }
8191   return false;
8192 }
8193 
8194 //===----------------------------------------------------------------------===//
8195 // mAI
8196 //===----------------------------------------------------------------------===//
8197 
8198 bool AMDGPUOperand::isBLGP() const {
8199   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8200 }
8201 
8202 bool AMDGPUOperand::isCBSZ() const {
8203   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8204 }
8205 
8206 bool AMDGPUOperand::isABID() const {
8207   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8208 }
8209 
8210 bool AMDGPUOperand::isS16Imm() const {
8211   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8212 }
8213 
8214 bool AMDGPUOperand::isU16Imm() const {
8215   return isImm() && isUInt<16>(getImm());
8216 }
8217 
8218 //===----------------------------------------------------------------------===//
8219 // dim
8220 //===----------------------------------------------------------------------===//
8221 
8222 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8223   // We want to allow "dim:1D" etc.,
8224   // but the initial 1 is tokenized as an integer.
8225   std::string Token;
8226   if (isToken(AsmToken::Integer)) {
8227     SMLoc Loc = getToken().getEndLoc();
8228     Token = std::string(getTokenStr());
8229     lex();
8230     if (getLoc() != Loc)
8231       return false;
8232   }
8233 
8234   StringRef Suffix;
8235   if (!parseId(Suffix))
8236     return false;
8237   Token += Suffix;
8238 
8239   StringRef DimId = Token;
8240   if (DimId.startswith("SQ_RSRC_IMG_"))
8241     DimId = DimId.drop_front(12);
8242 
8243   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8244   if (!DimInfo)
8245     return false;
8246 
8247   Encoding = DimInfo->Encoding;
8248   return true;
8249 }
8250 
8251 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8252   if (!isGFX10Plus())
8253     return MatchOperand_NoMatch;
8254 
8255   SMLoc S = getLoc();
8256 
8257   if (!trySkipId("dim", AsmToken::Colon))
8258     return MatchOperand_NoMatch;
8259 
8260   unsigned Encoding;
8261   SMLoc Loc = getLoc();
8262   if (!parseDimId(Encoding)) {
8263     Error(Loc, "invalid dim value");
8264     return MatchOperand_ParseFail;
8265   }
8266 
8267   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8268                                               AMDGPUOperand::ImmTyDim));
8269   return MatchOperand_Success;
8270 }
8271 
8272 //===----------------------------------------------------------------------===//
8273 // dpp
8274 //===----------------------------------------------------------------------===//
8275 
8276 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8277   SMLoc S = getLoc();
8278 
8279   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8280     return MatchOperand_NoMatch;
8281 
8282   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8283 
8284   int64_t Sels[8];
8285 
8286   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8287     return MatchOperand_ParseFail;
8288 
8289   for (size_t i = 0; i < 8; ++i) {
8290     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8291       return MatchOperand_ParseFail;
8292 
8293     SMLoc Loc = getLoc();
8294     if (getParser().parseAbsoluteExpression(Sels[i]))
8295       return MatchOperand_ParseFail;
8296     if (0 > Sels[i] || 7 < Sels[i]) {
8297       Error(Loc, "expected a 3-bit value");
8298       return MatchOperand_ParseFail;
8299     }
8300   }
8301 
8302   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8303     return MatchOperand_ParseFail;
8304 
8305   unsigned DPP8 = 0;
8306   for (size_t i = 0; i < 8; ++i)
8307     DPP8 |= (Sels[i] << (i * 3));
8308 
8309   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8310   return MatchOperand_Success;
8311 }
8312 
8313 bool
8314 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8315                                     const OperandVector &Operands) {
8316   if (Ctrl == "row_newbcast")
8317     return isGFX90A();
8318 
8319   if (Ctrl == "row_share" ||
8320       Ctrl == "row_xmask")
8321     return isGFX10Plus();
8322 
8323   if (Ctrl == "wave_shl" ||
8324       Ctrl == "wave_shr" ||
8325       Ctrl == "wave_rol" ||
8326       Ctrl == "wave_ror" ||
8327       Ctrl == "row_bcast")
8328     return isVI() || isGFX9();
8329 
8330   return Ctrl == "row_mirror" ||
8331          Ctrl == "row_half_mirror" ||
8332          Ctrl == "quad_perm" ||
8333          Ctrl == "row_shl" ||
8334          Ctrl == "row_shr" ||
8335          Ctrl == "row_ror";
8336 }
8337 
8338 int64_t
8339 AMDGPUAsmParser::parseDPPCtrlPerm() {
8340   // quad_perm:[%d,%d,%d,%d]
8341 
8342   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8343     return -1;
8344 
8345   int64_t Val = 0;
8346   for (int i = 0; i < 4; ++i) {
8347     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8348       return -1;
8349 
8350     int64_t Temp;
8351     SMLoc Loc = getLoc();
8352     if (getParser().parseAbsoluteExpression(Temp))
8353       return -1;
8354     if (Temp < 0 || Temp > 3) {
8355       Error(Loc, "expected a 2-bit value");
8356       return -1;
8357     }
8358 
8359     Val += (Temp << i * 2);
8360   }
8361 
8362   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8363     return -1;
8364 
8365   return Val;
8366 }
8367 
8368 int64_t
8369 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8370   using namespace AMDGPU::DPP;
8371 
8372   // sel:%d
8373 
8374   int64_t Val;
8375   SMLoc Loc = getLoc();
8376 
8377   if (getParser().parseAbsoluteExpression(Val))
8378     return -1;
8379 
8380   struct DppCtrlCheck {
8381     int64_t Ctrl;
8382     int Lo;
8383     int Hi;
8384   };
8385 
8386   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8387     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8388     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8389     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8390     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8391     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8392     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8393     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8394     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8395     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8396     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8397     .Default({-1, 0, 0});
8398 
8399   bool Valid;
8400   if (Check.Ctrl == -1) {
8401     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8402     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8403   } else {
8404     Valid = Check.Lo <= Val && Val <= Check.Hi;
8405     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8406   }
8407 
8408   if (!Valid) {
8409     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8410     return -1;
8411   }
8412 
8413   return Val;
8414 }
8415 
8416 OperandMatchResultTy
8417 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8418   using namespace AMDGPU::DPP;
8419 
8420   if (!isToken(AsmToken::Identifier) ||
8421       !isSupportedDPPCtrl(getTokenStr(), Operands))
8422     return MatchOperand_NoMatch;
8423 
8424   SMLoc S = getLoc();
8425   int64_t Val = -1;
8426   StringRef Ctrl;
8427 
8428   parseId(Ctrl);
8429 
8430   if (Ctrl == "row_mirror") {
8431     Val = DppCtrl::ROW_MIRROR;
8432   } else if (Ctrl == "row_half_mirror") {
8433     Val = DppCtrl::ROW_HALF_MIRROR;
8434   } else {
8435     if (skipToken(AsmToken::Colon, "expected a colon")) {
8436       if (Ctrl == "quad_perm") {
8437         Val = parseDPPCtrlPerm();
8438       } else {
8439         Val = parseDPPCtrlSel(Ctrl);
8440       }
8441     }
8442   }
8443 
8444   if (Val == -1)
8445     return MatchOperand_ParseFail;
8446 
8447   Operands.push_back(
8448     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8449   return MatchOperand_Success;
8450 }
8451 
8452 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8453   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8454 }
8455 
8456 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8457   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8458 }
8459 
8460 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8461   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8462 }
8463 
8464 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8465   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8466 }
8467 
8468 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8469   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8470 }
8471 
8472 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8473   OptionalImmIndexMap OptionalIdx;
8474 
8475   unsigned Opc = Inst.getOpcode();
8476   bool HasModifiers =
8477       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8478   unsigned I = 1;
8479   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8480   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8481     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8482   }
8483 
8484   int Fi = 0;
8485   for (unsigned E = Operands.size(); I != E; ++I) {
8486     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8487                                             MCOI::TIED_TO);
8488     if (TiedTo != -1) {
8489       assert((unsigned)TiedTo < Inst.getNumOperands());
8490       // handle tied old or src2 for MAC instructions
8491       Inst.addOperand(Inst.getOperand(TiedTo));
8492     }
8493     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8494     // Add the register arguments
8495     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8496       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8497       // Skip it.
8498       continue;
8499     }
8500 
8501     if (IsDPP8) {
8502       if (Op.isDPP8()) {
8503         Op.addImmOperands(Inst, 1);
8504       } else if (HasModifiers &&
8505                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8506         Op.addRegWithFPInputModsOperands(Inst, 2);
8507       } else if (Op.isFI()) {
8508         Fi = Op.getImm();
8509       } else if (Op.isReg()) {
8510         Op.addRegOperands(Inst, 1);
8511       } else {
8512         llvm_unreachable("Invalid operand type");
8513       }
8514     } else {
8515       if (HasModifiers &&
8516           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8517         Op.addRegWithFPInputModsOperands(Inst, 2);
8518       } else if (Op.isReg()) {
8519         Op.addRegOperands(Inst, 1);
8520       } else if (Op.isDPPCtrl()) {
8521         Op.addImmOperands(Inst, 1);
8522       } else if (Op.isImm()) {
8523         // Handle optional arguments
8524         OptionalIdx[Op.getImmTy()] = I;
8525       } else {
8526         llvm_unreachable("Invalid operand type");
8527       }
8528     }
8529   }
8530 
8531   if (IsDPP8) {
8532     using namespace llvm::AMDGPU::DPP;
8533     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8534   } else {
8535     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8536     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8537     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8538     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8539       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8540     }
8541   }
8542 }
8543 
8544 //===----------------------------------------------------------------------===//
8545 // sdwa
8546 //===----------------------------------------------------------------------===//
8547 
8548 OperandMatchResultTy
8549 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8550                               AMDGPUOperand::ImmTy Type) {
8551   using namespace llvm::AMDGPU::SDWA;
8552 
8553   SMLoc S = getLoc();
8554   StringRef Value;
8555   OperandMatchResultTy res;
8556 
8557   SMLoc StringLoc;
8558   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8559   if (res != MatchOperand_Success) {
8560     return res;
8561   }
8562 
8563   int64_t Int;
8564   Int = StringSwitch<int64_t>(Value)
8565         .Case("BYTE_0", SdwaSel::BYTE_0)
8566         .Case("BYTE_1", SdwaSel::BYTE_1)
8567         .Case("BYTE_2", SdwaSel::BYTE_2)
8568         .Case("BYTE_3", SdwaSel::BYTE_3)
8569         .Case("WORD_0", SdwaSel::WORD_0)
8570         .Case("WORD_1", SdwaSel::WORD_1)
8571         .Case("DWORD", SdwaSel::DWORD)
8572         .Default(0xffffffff);
8573 
8574   if (Int == 0xffffffff) {
8575     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8576     return MatchOperand_ParseFail;
8577   }
8578 
8579   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8580   return MatchOperand_Success;
8581 }
8582 
8583 OperandMatchResultTy
8584 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8585   using namespace llvm::AMDGPU::SDWA;
8586 
8587   SMLoc S = getLoc();
8588   StringRef Value;
8589   OperandMatchResultTy res;
8590 
8591   SMLoc StringLoc;
8592   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8593   if (res != MatchOperand_Success) {
8594     return res;
8595   }
8596 
8597   int64_t Int;
8598   Int = StringSwitch<int64_t>(Value)
8599         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8600         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8601         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8602         .Default(0xffffffff);
8603 
8604   if (Int == 0xffffffff) {
8605     Error(StringLoc, "invalid dst_unused value");
8606     return MatchOperand_ParseFail;
8607   }
8608 
8609   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8610   return MatchOperand_Success;
8611 }
8612 
8613 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8614   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8615 }
8616 
8617 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8618   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8619 }
8620 
8621 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8622   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8623 }
8624 
8625 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8626   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8627 }
8628 
8629 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8630   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8631 }
8632 
8633 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8634                               uint64_t BasicInstType,
8635                               bool SkipDstVcc,
8636                               bool SkipSrcVcc) {
8637   using namespace llvm::AMDGPU::SDWA;
8638 
8639   OptionalImmIndexMap OptionalIdx;
8640   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8641   bool SkippedVcc = false;
8642 
8643   unsigned I = 1;
8644   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8645   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8646     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8647   }
8648 
8649   for (unsigned E = Operands.size(); I != E; ++I) {
8650     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8651     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8652         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8653       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8654       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8655       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8656       // Skip VCC only if we didn't skip it on previous iteration.
8657       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8658       if (BasicInstType == SIInstrFlags::VOP2 &&
8659           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8660            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8661         SkippedVcc = true;
8662         continue;
8663       } else if (BasicInstType == SIInstrFlags::VOPC &&
8664                  Inst.getNumOperands() == 0) {
8665         SkippedVcc = true;
8666         continue;
8667       }
8668     }
8669     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8670       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8671     } else if (Op.isImm()) {
8672       // Handle optional arguments
8673       OptionalIdx[Op.getImmTy()] = I;
8674     } else {
8675       llvm_unreachable("Invalid operand type");
8676     }
8677     SkippedVcc = false;
8678   }
8679 
8680   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8681       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8682       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8683     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8684     switch (BasicInstType) {
8685     case SIInstrFlags::VOP1:
8686       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8687       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8688         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8689       }
8690       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8691       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8692       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8693       break;
8694 
8695     case SIInstrFlags::VOP2:
8696       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8697       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8698         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8699       }
8700       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8701       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8702       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8703       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8704       break;
8705 
8706     case SIInstrFlags::VOPC:
8707       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8708         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8709       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8710       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8711       break;
8712 
8713     default:
8714       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8715     }
8716   }
8717 
8718   // special case v_mac_{f16, f32}:
8719   // it has src2 register operand that is tied to dst operand
8720   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8721       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8722     auto it = Inst.begin();
8723     std::advance(
8724       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8725     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8726   }
8727 }
8728 
8729 //===----------------------------------------------------------------------===//
8730 // mAI
8731 //===----------------------------------------------------------------------===//
8732 
8733 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8734   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8735 }
8736 
8737 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8738   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8739 }
8740 
8741 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8742   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8743 }
8744 
8745 /// Force static initialization.
8746 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8747   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8748   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8749 }
8750 
8751 #define GET_REGISTER_MATCHER
8752 #define GET_MATCHER_IMPLEMENTATION
8753 #define GET_MNEMONIC_SPELL_CHECKER
8754 #define GET_MNEMONIC_CHECKER
8755 #include "AMDGPUGenAsmMatcher.inc"
8756 
8757 // This function should be defined after auto-generated include so that we have
8758 // MatchClassKind enum defined
8759 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8760                                                      unsigned Kind) {
8761   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8762   // But MatchInstructionImpl() expects to meet token and fails to validate
8763   // operand. This method checks if we are given immediate operand but expect to
8764   // get corresponding token.
8765   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8766   switch (Kind) {
8767   case MCK_addr64:
8768     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8769   case MCK_gds:
8770     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8771   case MCK_lds:
8772     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8773   case MCK_idxen:
8774     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8775   case MCK_offen:
8776     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8777   case MCK_SSrcB32:
8778     // When operands have expression values, they will return true for isToken,
8779     // because it is not possible to distinguish between a token and an
8780     // expression at parse time. MatchInstructionImpl() will always try to
8781     // match an operand as a token, when isToken returns true, and when the
8782     // name of the expression is not a valid token, the match will fail,
8783     // so we need to handle it here.
8784     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8785   case MCK_SSrcF32:
8786     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8787   case MCK_SoppBrTarget:
8788     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8789   case MCK_VReg32OrOff:
8790     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8791   case MCK_InterpSlot:
8792     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8793   case MCK_Attr:
8794     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8795   case MCK_AttrChan:
8796     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8797   case MCK_ImmSMEMOffset:
8798     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8799   case MCK_SReg_64:
8800   case MCK_SReg_64_XEXEC:
8801     // Null is defined as a 32-bit register but
8802     // it should also be enabled with 64-bit operands.
8803     // The following code enables it for SReg_64 operands
8804     // used as source and destination. Remaining source
8805     // operands are handled in isInlinableImm.
8806     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8807   default:
8808     return Match_InvalidOperand;
8809   }
8810 }
8811 
8812 //===----------------------------------------------------------------------===//
8813 // endpgm
8814 //===----------------------------------------------------------------------===//
8815 
8816 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8817   SMLoc S = getLoc();
8818   int64_t Imm = 0;
8819 
8820   if (!parseExpr(Imm)) {
8821     // The operand is optional, if not present default to 0
8822     Imm = 0;
8823   }
8824 
8825   if (!isUInt<16>(Imm)) {
8826     Error(S, "expected a 16-bit value");
8827     return MatchOperand_ParseFail;
8828   }
8829 
8830   Operands.push_back(
8831       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8832   return MatchOperand_Success;
8833 }
8834 
8835 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8836