1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/TargetParser.h"
40 
41 using namespace llvm;
42 using namespace llvm::AMDGPU;
43 using namespace llvm::amdhsa;
44 
45 namespace {
46 
47 class AMDGPUAsmParser;
48 
49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
50 
51 //===----------------------------------------------------------------------===//
52 // Operand
53 //===----------------------------------------------------------------------===//
54 
55 class AMDGPUOperand : public MCParsedAsmOperand {
56   enum KindTy {
57     Token,
58     Immediate,
59     Register,
60     Expression
61   } Kind;
62 
63   SMLoc StartLoc, EndLoc;
64   const AMDGPUAsmParser *AsmParser;
65 
66 public:
67   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
68       : Kind(Kind_), AsmParser(AsmParser_) {}
69 
70   using Ptr = std::unique_ptr<AMDGPUOperand>;
71 
72   struct Modifiers {
73     bool Abs = false;
74     bool Neg = false;
75     bool Sext = false;
76 
77     bool hasFPModifiers() const { return Abs || Neg; }
78     bool hasIntModifiers() const { return Sext; }
79     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
80 
81     int64_t getFPModifiersOperand() const {
82       int64_t Operand = 0;
83       Operand |= Abs ? SISrcMods::ABS : 0u;
84       Operand |= Neg ? SISrcMods::NEG : 0u;
85       return Operand;
86     }
87 
88     int64_t getIntModifiersOperand() const {
89       int64_t Operand = 0;
90       Operand |= Sext ? SISrcMods::SEXT : 0u;
91       return Operand;
92     }
93 
94     int64_t getModifiersOperand() const {
95       assert(!(hasFPModifiers() && hasIntModifiers())
96            && "fp and int modifiers should not be used simultaneously");
97       if (hasFPModifiers()) {
98         return getFPModifiersOperand();
99       } else if (hasIntModifiers()) {
100         return getIntModifiersOperand();
101       } else {
102         return 0;
103       }
104     }
105 
106     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
107   };
108 
109   enum ImmTy {
110     ImmTyNone,
111     ImmTyGDS,
112     ImmTyLDS,
113     ImmTyOffen,
114     ImmTyIdxen,
115     ImmTyAddr64,
116     ImmTyOffset,
117     ImmTyInstOffset,
118     ImmTyOffset0,
119     ImmTyOffset1,
120     ImmTyCPol,
121     ImmTySWZ,
122     ImmTyTFE,
123     ImmTyD16,
124     ImmTyClampSI,
125     ImmTyOModSI,
126     ImmTyDPP8,
127     ImmTyDppCtrl,
128     ImmTyDppRowMask,
129     ImmTyDppBankMask,
130     ImmTyDppBoundCtrl,
131     ImmTyDppFi,
132     ImmTySdwaDstSel,
133     ImmTySdwaSrc0Sel,
134     ImmTySdwaSrc1Sel,
135     ImmTySdwaDstUnused,
136     ImmTyDMask,
137     ImmTyDim,
138     ImmTyUNorm,
139     ImmTyDA,
140     ImmTyR128A16,
141     ImmTyA16,
142     ImmTyLWE,
143     ImmTyExpTgt,
144     ImmTyExpCompr,
145     ImmTyExpVM,
146     ImmTyFORMAT,
147     ImmTyHwreg,
148     ImmTyOff,
149     ImmTySendMsg,
150     ImmTyInterpSlot,
151     ImmTyInterpAttr,
152     ImmTyAttrChan,
153     ImmTyOpSel,
154     ImmTyOpSelHi,
155     ImmTyNegLo,
156     ImmTyNegHi,
157     ImmTySwizzle,
158     ImmTyGprIdxMode,
159     ImmTyHigh,
160     ImmTyBLGP,
161     ImmTyCBSZ,
162     ImmTyABID,
163     ImmTyEndpgm,
164   };
165 
166   enum ImmKindTy {
167     ImmKindTyNone,
168     ImmKindTyLiteral,
169     ImmKindTyConst,
170   };
171 
172 private:
173   struct TokOp {
174     const char *Data;
175     unsigned Length;
176   };
177 
178   struct ImmOp {
179     int64_t Val;
180     ImmTy Type;
181     bool IsFPImm;
182     mutable ImmKindTy Kind;
183     Modifiers Mods;
184   };
185 
186   struct RegOp {
187     unsigned RegNo;
188     Modifiers Mods;
189   };
190 
191   union {
192     TokOp Tok;
193     ImmOp Imm;
194     RegOp Reg;
195     const MCExpr *Expr;
196   };
197 
198 public:
199   bool isToken() const override {
200     if (Kind == Token)
201       return true;
202 
203     // When parsing operands, we can't always tell if something was meant to be
204     // a token, like 'gds', or an expression that references a global variable.
205     // In this case, we assume the string is an expression, and if we need to
206     // interpret is a token, then we treat the symbol name as the token.
207     return isSymbolRefExpr();
208   }
209 
210   bool isSymbolRefExpr() const {
211     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
212   }
213 
214   bool isImm() const override {
215     return Kind == Immediate;
216   }
217 
218   void setImmKindNone() const {
219     assert(isImm());
220     Imm.Kind = ImmKindTyNone;
221   }
222 
223   void setImmKindLiteral() const {
224     assert(isImm());
225     Imm.Kind = ImmKindTyLiteral;
226   }
227 
228   void setImmKindConst() const {
229     assert(isImm());
230     Imm.Kind = ImmKindTyConst;
231   }
232 
233   bool IsImmKindLiteral() const {
234     return isImm() && Imm.Kind == ImmKindTyLiteral;
235   }
236 
237   bool isImmKindConst() const {
238     return isImm() && Imm.Kind == ImmKindTyConst;
239   }
240 
241   bool isInlinableImm(MVT type) const;
242   bool isLiteralImm(MVT type) const;
243 
244   bool isRegKind() const {
245     return Kind == Register;
246   }
247 
248   bool isReg() const override {
249     return isRegKind() && !hasModifiers();
250   }
251 
252   bool isRegOrInline(unsigned RCID, MVT type) const {
253     return isRegClass(RCID) || isInlinableImm(type);
254   }
255 
256   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
257     return isRegOrInline(RCID, type) || isLiteralImm(type);
258   }
259 
260   bool isRegOrImmWithInt16InputMods() const {
261     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
262   }
263 
264   bool isRegOrImmWithInt32InputMods() const {
265     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
266   }
267 
268   bool isRegOrImmWithInt64InputMods() const {
269     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
270   }
271 
272   bool isRegOrImmWithFP16InputMods() const {
273     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
274   }
275 
276   bool isRegOrImmWithFP32InputMods() const {
277     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
278   }
279 
280   bool isRegOrImmWithFP64InputMods() const {
281     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
282   }
283 
284   bool isVReg() const {
285     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
286            isRegClass(AMDGPU::VReg_64RegClassID) ||
287            isRegClass(AMDGPU::VReg_96RegClassID) ||
288            isRegClass(AMDGPU::VReg_128RegClassID) ||
289            isRegClass(AMDGPU::VReg_160RegClassID) ||
290            isRegClass(AMDGPU::VReg_192RegClassID) ||
291            isRegClass(AMDGPU::VReg_256RegClassID) ||
292            isRegClass(AMDGPU::VReg_512RegClassID) ||
293            isRegClass(AMDGPU::VReg_1024RegClassID);
294   }
295 
296   bool isVReg32() const {
297     return isRegClass(AMDGPU::VGPR_32RegClassID);
298   }
299 
300   bool isVReg32OrOff() const {
301     return isOff() || isVReg32();
302   }
303 
304   bool isNull() const {
305     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
306   }
307 
308   bool isVRegWithInputMods() const;
309 
310   bool isSDWAOperand(MVT type) const;
311   bool isSDWAFP16Operand() const;
312   bool isSDWAFP32Operand() const;
313   bool isSDWAInt16Operand() const;
314   bool isSDWAInt32Operand() const;
315 
316   bool isImmTy(ImmTy ImmT) const {
317     return isImm() && Imm.Type == ImmT;
318   }
319 
320   bool isImmModifier() const {
321     return isImm() && Imm.Type != ImmTyNone;
322   }
323 
324   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
325   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
326   bool isDMask() const { return isImmTy(ImmTyDMask); }
327   bool isDim() const { return isImmTy(ImmTyDim); }
328   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
329   bool isDA() const { return isImmTy(ImmTyDA); }
330   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
331   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
332   bool isLWE() const { return isImmTy(ImmTyLWE); }
333   bool isOff() const { return isImmTy(ImmTyOff); }
334   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
335   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
336   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
337   bool isOffen() const { return isImmTy(ImmTyOffen); }
338   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
339   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
340   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
341   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
342   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
343 
344   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
345   bool isGDS() const { return isImmTy(ImmTyGDS); }
346   bool isLDS() const { return isImmTy(ImmTyLDS); }
347   bool isCPol() const { return isImmTy(ImmTyCPol); }
348   bool isSWZ() const { return isImmTy(ImmTySWZ); }
349   bool isTFE() const { return isImmTy(ImmTyTFE); }
350   bool isD16() const { return isImmTy(ImmTyD16); }
351   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
352   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
353   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
354   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
355   bool isFI() const { return isImmTy(ImmTyDppFi); }
356   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
357   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
358   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
359   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
360   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
361   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
362   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
363   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
364   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
365   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
366   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
367   bool isHigh() const { return isImmTy(ImmTyHigh); }
368 
369   bool isMod() const {
370     return isClampSI() || isOModSI();
371   }
372 
373   bool isRegOrImm() const {
374     return isReg() || isImm();
375   }
376 
377   bool isRegClass(unsigned RCID) const;
378 
379   bool isInlineValue() const;
380 
381   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
382     return isRegOrInline(RCID, type) && !hasModifiers();
383   }
384 
385   bool isSCSrcB16() const {
386     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
387   }
388 
389   bool isSCSrcV2B16() const {
390     return isSCSrcB16();
391   }
392 
393   bool isSCSrcB32() const {
394     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
395   }
396 
397   bool isSCSrcB64() const {
398     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
399   }
400 
401   bool isBoolReg() const;
402 
403   bool isSCSrcF16() const {
404     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
405   }
406 
407   bool isSCSrcV2F16() const {
408     return isSCSrcF16();
409   }
410 
411   bool isSCSrcF32() const {
412     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
413   }
414 
415   bool isSCSrcF64() const {
416     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
417   }
418 
419   bool isSSrcB32() const {
420     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
421   }
422 
423   bool isSSrcB16() const {
424     return isSCSrcB16() || isLiteralImm(MVT::i16);
425   }
426 
427   bool isSSrcV2B16() const {
428     llvm_unreachable("cannot happen");
429     return isSSrcB16();
430   }
431 
432   bool isSSrcB64() const {
433     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
434     // See isVSrc64().
435     return isSCSrcB64() || isLiteralImm(MVT::i64);
436   }
437 
438   bool isSSrcF32() const {
439     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
440   }
441 
442   bool isSSrcF64() const {
443     return isSCSrcB64() || isLiteralImm(MVT::f64);
444   }
445 
446   bool isSSrcF16() const {
447     return isSCSrcB16() || isLiteralImm(MVT::f16);
448   }
449 
450   bool isSSrcV2F16() const {
451     llvm_unreachable("cannot happen");
452     return isSSrcF16();
453   }
454 
455   bool isSSrcV2FP32() const {
456     llvm_unreachable("cannot happen");
457     return isSSrcF32();
458   }
459 
460   bool isSCSrcV2FP32() const {
461     llvm_unreachable("cannot happen");
462     return isSCSrcF32();
463   }
464 
465   bool isSSrcV2INT32() const {
466     llvm_unreachable("cannot happen");
467     return isSSrcB32();
468   }
469 
470   bool isSCSrcV2INT32() const {
471     llvm_unreachable("cannot happen");
472     return isSCSrcB32();
473   }
474 
475   bool isSSrcOrLdsB32() const {
476     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
477            isLiteralImm(MVT::i32) || isExpr();
478   }
479 
480   bool isVCSrcB32() const {
481     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
482   }
483 
484   bool isVCSrcB64() const {
485     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
486   }
487 
488   bool isVCSrcB16() const {
489     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
490   }
491 
492   bool isVCSrcV2B16() const {
493     return isVCSrcB16();
494   }
495 
496   bool isVCSrcF32() const {
497     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
498   }
499 
500   bool isVCSrcF64() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
502   }
503 
504   bool isVCSrcF16() const {
505     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
506   }
507 
508   bool isVCSrcV2F16() const {
509     return isVCSrcF16();
510   }
511 
512   bool isVSrcB32() const {
513     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
514   }
515 
516   bool isVSrcB64() const {
517     return isVCSrcF64() || isLiteralImm(MVT::i64);
518   }
519 
520   bool isVSrcB16() const {
521     return isVCSrcB16() || isLiteralImm(MVT::i16);
522   }
523 
524   bool isVSrcV2B16() const {
525     return isVSrcB16() || isLiteralImm(MVT::v2i16);
526   }
527 
528   bool isVCSrcV2FP32() const {
529     return isVCSrcF64();
530   }
531 
532   bool isVSrcV2FP32() const {
533     return isVSrcF64() || isLiteralImm(MVT::v2f32);
534   }
535 
536   bool isVCSrcV2INT32() const {
537     return isVCSrcB64();
538   }
539 
540   bool isVSrcV2INT32() const {
541     return isVSrcB64() || isLiteralImm(MVT::v2i32);
542   }
543 
544   bool isVSrcF32() const {
545     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
546   }
547 
548   bool isVSrcF64() const {
549     return isVCSrcF64() || isLiteralImm(MVT::f64);
550   }
551 
552   bool isVSrcF16() const {
553     return isVCSrcF16() || isLiteralImm(MVT::f16);
554   }
555 
556   bool isVSrcV2F16() const {
557     return isVSrcF16() || isLiteralImm(MVT::v2f16);
558   }
559 
560   bool isVISrcB32() const {
561     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
562   }
563 
564   bool isVISrcB16() const {
565     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
566   }
567 
568   bool isVISrcV2B16() const {
569     return isVISrcB16();
570   }
571 
572   bool isVISrcF32() const {
573     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
574   }
575 
576   bool isVISrcF16() const {
577     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
578   }
579 
580   bool isVISrcV2F16() const {
581     return isVISrcF16() || isVISrcB32();
582   }
583 
584   bool isVISrc_64B64() const {
585     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
586   }
587 
588   bool isVISrc_64F64() const {
589     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
590   }
591 
592   bool isVISrc_64V2FP32() const {
593     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
594   }
595 
596   bool isVISrc_64V2INT32() const {
597     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
598   }
599 
600   bool isVISrc_256B64() const {
601     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
602   }
603 
604   bool isVISrc_256F64() const {
605     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
606   }
607 
608   bool isVISrc_128B16() const {
609     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
610   }
611 
612   bool isVISrc_128V2B16() const {
613     return isVISrc_128B16();
614   }
615 
616   bool isVISrc_128B32() const {
617     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
618   }
619 
620   bool isVISrc_128F32() const {
621     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
622   }
623 
624   bool isVISrc_256V2FP32() const {
625     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
626   }
627 
628   bool isVISrc_256V2INT32() const {
629     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
630   }
631 
632   bool isVISrc_512B32() const {
633     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
634   }
635 
636   bool isVISrc_512B16() const {
637     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
638   }
639 
640   bool isVISrc_512V2B16() const {
641     return isVISrc_512B16();
642   }
643 
644   bool isVISrc_512F32() const {
645     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
646   }
647 
648   bool isVISrc_512F16() const {
649     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
650   }
651 
652   bool isVISrc_512V2F16() const {
653     return isVISrc_512F16() || isVISrc_512B32();
654   }
655 
656   bool isVISrc_1024B32() const {
657     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
658   }
659 
660   bool isVISrc_1024B16() const {
661     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
662   }
663 
664   bool isVISrc_1024V2B16() const {
665     return isVISrc_1024B16();
666   }
667 
668   bool isVISrc_1024F32() const {
669     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
670   }
671 
672   bool isVISrc_1024F16() const {
673     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
674   }
675 
676   bool isVISrc_1024V2F16() const {
677     return isVISrc_1024F16() || isVISrc_1024B32();
678   }
679 
680   bool isAISrcB32() const {
681     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
682   }
683 
684   bool isAISrcB16() const {
685     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
686   }
687 
688   bool isAISrcV2B16() const {
689     return isAISrcB16();
690   }
691 
692   bool isAISrcF32() const {
693     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
694   }
695 
696   bool isAISrcF16() const {
697     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
698   }
699 
700   bool isAISrcV2F16() const {
701     return isAISrcF16() || isAISrcB32();
702   }
703 
704   bool isAISrc_64B64() const {
705     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
706   }
707 
708   bool isAISrc_64F64() const {
709     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
710   }
711 
712   bool isAISrc_128B32() const {
713     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
714   }
715 
716   bool isAISrc_128B16() const {
717     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
718   }
719 
720   bool isAISrc_128V2B16() const {
721     return isAISrc_128B16();
722   }
723 
724   bool isAISrc_128F32() const {
725     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
726   }
727 
728   bool isAISrc_128F16() const {
729     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
730   }
731 
732   bool isAISrc_128V2F16() const {
733     return isAISrc_128F16() || isAISrc_128B32();
734   }
735 
736   bool isVISrc_128F16() const {
737     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
738   }
739 
740   bool isVISrc_128V2F16() const {
741     return isVISrc_128F16() || isVISrc_128B32();
742   }
743 
744   bool isAISrc_256B64() const {
745     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
746   }
747 
748   bool isAISrc_256F64() const {
749     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
750   }
751 
752   bool isAISrc_512B32() const {
753     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
754   }
755 
756   bool isAISrc_512B16() const {
757     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
758   }
759 
760   bool isAISrc_512V2B16() const {
761     return isAISrc_512B16();
762   }
763 
764   bool isAISrc_512F32() const {
765     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
766   }
767 
768   bool isAISrc_512F16() const {
769     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
770   }
771 
772   bool isAISrc_512V2F16() const {
773     return isAISrc_512F16() || isAISrc_512B32();
774   }
775 
776   bool isAISrc_1024B32() const {
777     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
778   }
779 
780   bool isAISrc_1024B16() const {
781     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
782   }
783 
784   bool isAISrc_1024V2B16() const {
785     return isAISrc_1024B16();
786   }
787 
788   bool isAISrc_1024F32() const {
789     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
790   }
791 
792   bool isAISrc_1024F16() const {
793     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
794   }
795 
796   bool isAISrc_1024V2F16() const {
797     return isAISrc_1024F16() || isAISrc_1024B32();
798   }
799 
800   bool isKImmFP32() const {
801     return isLiteralImm(MVT::f32);
802   }
803 
804   bool isKImmFP16() const {
805     return isLiteralImm(MVT::f16);
806   }
807 
808   bool isMem() const override {
809     return false;
810   }
811 
812   bool isExpr() const {
813     return Kind == Expression;
814   }
815 
816   bool isSoppBrTarget() const {
817     return isExpr() || isImm();
818   }
819 
820   bool isSWaitCnt() const;
821   bool isHwreg() const;
822   bool isSendMsg() const;
823   bool isSwizzle() const;
824   bool isSMRDOffset8() const;
825   bool isSMEMOffset() const;
826   bool isSMRDLiteralOffset() const;
827   bool isDPP8() const;
828   bool isDPPCtrl() const;
829   bool isBLGP() const;
830   bool isCBSZ() const;
831   bool isABID() const;
832   bool isGPRIdxMode() const;
833   bool isS16Imm() const;
834   bool isU16Imm() const;
835   bool isEndpgm() const;
836 
837   StringRef getExpressionAsToken() const {
838     assert(isExpr());
839     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
840     return S->getSymbol().getName();
841   }
842 
843   StringRef getToken() const {
844     assert(isToken());
845 
846     if (Kind == Expression)
847       return getExpressionAsToken();
848 
849     return StringRef(Tok.Data, Tok.Length);
850   }
851 
852   int64_t getImm() const {
853     assert(isImm());
854     return Imm.Val;
855   }
856 
857   void setImm(int64_t Val) {
858     assert(isImm());
859     Imm.Val = Val;
860   }
861 
862   ImmTy getImmTy() const {
863     assert(isImm());
864     return Imm.Type;
865   }
866 
867   unsigned getReg() const override {
868     assert(isRegKind());
869     return Reg.RegNo;
870   }
871 
872   SMLoc getStartLoc() const override {
873     return StartLoc;
874   }
875 
876   SMLoc getEndLoc() const override {
877     return EndLoc;
878   }
879 
880   SMRange getLocRange() const {
881     return SMRange(StartLoc, EndLoc);
882   }
883 
884   Modifiers getModifiers() const {
885     assert(isRegKind() || isImmTy(ImmTyNone));
886     return isRegKind() ? Reg.Mods : Imm.Mods;
887   }
888 
889   void setModifiers(Modifiers Mods) {
890     assert(isRegKind() || isImmTy(ImmTyNone));
891     if (isRegKind())
892       Reg.Mods = Mods;
893     else
894       Imm.Mods = Mods;
895   }
896 
897   bool hasModifiers() const {
898     return getModifiers().hasModifiers();
899   }
900 
901   bool hasFPModifiers() const {
902     return getModifiers().hasFPModifiers();
903   }
904 
905   bool hasIntModifiers() const {
906     return getModifiers().hasIntModifiers();
907   }
908 
909   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
910 
911   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
912 
913   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
914 
915   template <unsigned Bitwidth>
916   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
917 
918   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
919     addKImmFPOperands<16>(Inst, N);
920   }
921 
922   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
923     addKImmFPOperands<32>(Inst, N);
924   }
925 
926   void addRegOperands(MCInst &Inst, unsigned N) const;
927 
928   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
929     addRegOperands(Inst, N);
930   }
931 
932   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
933     if (isRegKind())
934       addRegOperands(Inst, N);
935     else if (isExpr())
936       Inst.addOperand(MCOperand::createExpr(Expr));
937     else
938       addImmOperands(Inst, N);
939   }
940 
941   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
942     Modifiers Mods = getModifiers();
943     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
944     if (isRegKind()) {
945       addRegOperands(Inst, N);
946     } else {
947       addImmOperands(Inst, N, false);
948     }
949   }
950 
951   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
952     assert(!hasIntModifiers());
953     addRegOrImmWithInputModsOperands(Inst, N);
954   }
955 
956   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
957     assert(!hasFPModifiers());
958     addRegOrImmWithInputModsOperands(Inst, N);
959   }
960 
961   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
962     Modifiers Mods = getModifiers();
963     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
964     assert(isRegKind());
965     addRegOperands(Inst, N);
966   }
967 
968   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
969     assert(!hasIntModifiers());
970     addRegWithInputModsOperands(Inst, N);
971   }
972 
973   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
974     assert(!hasFPModifiers());
975     addRegWithInputModsOperands(Inst, N);
976   }
977 
978   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
979     if (isImm())
980       addImmOperands(Inst, N);
981     else {
982       assert(isExpr());
983       Inst.addOperand(MCOperand::createExpr(Expr));
984     }
985   }
986 
987   static void printImmTy(raw_ostream& OS, ImmTy Type) {
988     switch (Type) {
989     case ImmTyNone: OS << "None"; break;
990     case ImmTyGDS: OS << "GDS"; break;
991     case ImmTyLDS: OS << "LDS"; break;
992     case ImmTyOffen: OS << "Offen"; break;
993     case ImmTyIdxen: OS << "Idxen"; break;
994     case ImmTyAddr64: OS << "Addr64"; break;
995     case ImmTyOffset: OS << "Offset"; break;
996     case ImmTyInstOffset: OS << "InstOffset"; break;
997     case ImmTyOffset0: OS << "Offset0"; break;
998     case ImmTyOffset1: OS << "Offset1"; break;
999     case ImmTyCPol: OS << "CPol"; break;
1000     case ImmTySWZ: OS << "SWZ"; break;
1001     case ImmTyTFE: OS << "TFE"; break;
1002     case ImmTyD16: OS << "D16"; break;
1003     case ImmTyFORMAT: OS << "FORMAT"; break;
1004     case ImmTyClampSI: OS << "ClampSI"; break;
1005     case ImmTyOModSI: OS << "OModSI"; break;
1006     case ImmTyDPP8: OS << "DPP8"; break;
1007     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1008     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1009     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1010     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1011     case ImmTyDppFi: OS << "FI"; break;
1012     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1013     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1014     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1015     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1016     case ImmTyDMask: OS << "DMask"; break;
1017     case ImmTyDim: OS << "Dim"; break;
1018     case ImmTyUNorm: OS << "UNorm"; break;
1019     case ImmTyDA: OS << "DA"; break;
1020     case ImmTyR128A16: OS << "R128A16"; break;
1021     case ImmTyA16: OS << "A16"; break;
1022     case ImmTyLWE: OS << "LWE"; break;
1023     case ImmTyOff: OS << "Off"; break;
1024     case ImmTyExpTgt: OS << "ExpTgt"; break;
1025     case ImmTyExpCompr: OS << "ExpCompr"; break;
1026     case ImmTyExpVM: OS << "ExpVM"; break;
1027     case ImmTyHwreg: OS << "Hwreg"; break;
1028     case ImmTySendMsg: OS << "SendMsg"; break;
1029     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1030     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1031     case ImmTyAttrChan: OS << "AttrChan"; break;
1032     case ImmTyOpSel: OS << "OpSel"; break;
1033     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1034     case ImmTyNegLo: OS << "NegLo"; break;
1035     case ImmTyNegHi: OS << "NegHi"; break;
1036     case ImmTySwizzle: OS << "Swizzle"; break;
1037     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1038     case ImmTyHigh: OS << "High"; break;
1039     case ImmTyBLGP: OS << "BLGP"; break;
1040     case ImmTyCBSZ: OS << "CBSZ"; break;
1041     case ImmTyABID: OS << "ABID"; break;
1042     case ImmTyEndpgm: OS << "Endpgm"; break;
1043     }
1044   }
1045 
1046   void print(raw_ostream &OS) const override {
1047     switch (Kind) {
1048     case Register:
1049       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1050       break;
1051     case Immediate:
1052       OS << '<' << getImm();
1053       if (getImmTy() != ImmTyNone) {
1054         OS << " type: "; printImmTy(OS, getImmTy());
1055       }
1056       OS << " mods: " << Imm.Mods << '>';
1057       break;
1058     case Token:
1059       OS << '\'' << getToken() << '\'';
1060       break;
1061     case Expression:
1062       OS << "<expr " << *Expr << '>';
1063       break;
1064     }
1065   }
1066 
1067   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1068                                       int64_t Val, SMLoc Loc,
1069                                       ImmTy Type = ImmTyNone,
1070                                       bool IsFPImm = false) {
1071     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1072     Op->Imm.Val = Val;
1073     Op->Imm.IsFPImm = IsFPImm;
1074     Op->Imm.Kind = ImmKindTyNone;
1075     Op->Imm.Type = Type;
1076     Op->Imm.Mods = Modifiers();
1077     Op->StartLoc = Loc;
1078     Op->EndLoc = Loc;
1079     return Op;
1080   }
1081 
1082   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1083                                         StringRef Str, SMLoc Loc,
1084                                         bool HasExplicitEncodingSize = true) {
1085     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1086     Res->Tok.Data = Str.data();
1087     Res->Tok.Length = Str.size();
1088     Res->StartLoc = Loc;
1089     Res->EndLoc = Loc;
1090     return Res;
1091   }
1092 
1093   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1094                                       unsigned RegNo, SMLoc S,
1095                                       SMLoc E) {
1096     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1097     Op->Reg.RegNo = RegNo;
1098     Op->Reg.Mods = Modifiers();
1099     Op->StartLoc = S;
1100     Op->EndLoc = E;
1101     return Op;
1102   }
1103 
1104   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1105                                        const class MCExpr *Expr, SMLoc S) {
1106     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1107     Op->Expr = Expr;
1108     Op->StartLoc = S;
1109     Op->EndLoc = S;
1110     return Op;
1111   }
1112 };
1113 
1114 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1115   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1116   return OS;
1117 }
1118 
1119 //===----------------------------------------------------------------------===//
1120 // AsmParser
1121 //===----------------------------------------------------------------------===//
1122 
1123 // Holds info related to the current kernel, e.g. count of SGPRs used.
1124 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1125 // .amdgpu_hsa_kernel or at EOF.
1126 class KernelScopeInfo {
1127   int SgprIndexUnusedMin = -1;
1128   int VgprIndexUnusedMin = -1;
1129   int AgprIndexUnusedMin = -1;
1130   MCContext *Ctx = nullptr;
1131   MCSubtargetInfo const *MSTI = nullptr;
1132 
1133   void usesSgprAt(int i) {
1134     if (i >= SgprIndexUnusedMin) {
1135       SgprIndexUnusedMin = ++i;
1136       if (Ctx) {
1137         MCSymbol* const Sym =
1138           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1139         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1140       }
1141     }
1142   }
1143 
1144   void usesVgprAt(int i) {
1145     if (i >= VgprIndexUnusedMin) {
1146       VgprIndexUnusedMin = ++i;
1147       if (Ctx) {
1148         MCSymbol* const Sym =
1149           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1150         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1151                                          VgprIndexUnusedMin);
1152         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1153       }
1154     }
1155   }
1156 
1157   void usesAgprAt(int i) {
1158     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1159     if (!hasMAIInsts(*MSTI))
1160       return;
1161 
1162     if (i >= AgprIndexUnusedMin) {
1163       AgprIndexUnusedMin = ++i;
1164       if (Ctx) {
1165         MCSymbol* const Sym =
1166           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1167         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1168 
1169         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1170         MCSymbol* const vSym =
1171           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1172         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1173                                          VgprIndexUnusedMin);
1174         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1175       }
1176     }
1177   }
1178 
1179 public:
1180   KernelScopeInfo() = default;
1181 
1182   void initialize(MCContext &Context) {
1183     Ctx = &Context;
1184     MSTI = Ctx->getSubtargetInfo();
1185 
1186     usesSgprAt(SgprIndexUnusedMin = -1);
1187     usesVgprAt(VgprIndexUnusedMin = -1);
1188     if (hasMAIInsts(*MSTI)) {
1189       usesAgprAt(AgprIndexUnusedMin = -1);
1190     }
1191   }
1192 
1193   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1194                     unsigned RegWidth) {
1195     switch (RegKind) {
1196     case IS_SGPR:
1197       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1198       break;
1199     case IS_AGPR:
1200       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1201       break;
1202     case IS_VGPR:
1203       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1204       break;
1205     default:
1206       break;
1207     }
1208   }
1209 };
1210 
1211 class AMDGPUAsmParser : public MCTargetAsmParser {
1212   MCAsmParser &Parser;
1213 
1214   // Number of extra operands parsed after the first optional operand.
1215   // This may be necessary to skip hardcoded mandatory operands.
1216   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1217 
1218   unsigned ForcedEncodingSize = 0;
1219   bool ForcedDPP = false;
1220   bool ForcedSDWA = false;
1221   KernelScopeInfo KernelScope;
1222   unsigned CPolSeen;
1223 
1224   /// @name Auto-generated Match Functions
1225   /// {
1226 
1227 #define GET_ASSEMBLER_HEADER
1228 #include "AMDGPUGenAsmMatcher.inc"
1229 
1230   /// }
1231 
1232 private:
1233   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1234   bool OutOfRangeError(SMRange Range);
1235   /// Calculate VGPR/SGPR blocks required for given target, reserved
1236   /// registers, and user-specified NextFreeXGPR values.
1237   ///
1238   /// \param Features [in] Target features, used for bug corrections.
1239   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1240   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1241   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1242   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1243   /// descriptor field, if valid.
1244   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1245   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1246   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1247   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1248   /// \param VGPRBlocks [out] Result VGPR block count.
1249   /// \param SGPRBlocks [out] Result SGPR block count.
1250   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1251                           bool FlatScrUsed, bool XNACKUsed,
1252                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1253                           SMRange VGPRRange, unsigned NextFreeSGPR,
1254                           SMRange SGPRRange, unsigned &VGPRBlocks,
1255                           unsigned &SGPRBlocks);
1256   bool ParseDirectiveAMDGCNTarget();
1257   bool ParseDirectiveAMDHSAKernel();
1258   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1259   bool ParseDirectiveHSACodeObjectVersion();
1260   bool ParseDirectiveHSACodeObjectISA();
1261   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1262   bool ParseDirectiveAMDKernelCodeT();
1263   // TODO: Possibly make subtargetHasRegister const.
1264   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1265   bool ParseDirectiveAMDGPUHsaKernel();
1266 
1267   bool ParseDirectiveISAVersion();
1268   bool ParseDirectiveHSAMetadata();
1269   bool ParseDirectivePALMetadataBegin();
1270   bool ParseDirectivePALMetadata();
1271   bool ParseDirectiveAMDGPULDS();
1272 
1273   /// Common code to parse out a block of text (typically YAML) between start and
1274   /// end directives.
1275   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1276                            const char *AssemblerDirectiveEnd,
1277                            std::string &CollectString);
1278 
1279   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1280                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1281   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1282                            unsigned &RegNum, unsigned &RegWidth,
1283                            bool RestoreOnFailure = false);
1284   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1285                            unsigned &RegNum, unsigned &RegWidth,
1286                            SmallVectorImpl<AsmToken> &Tokens);
1287   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1288                            unsigned &RegWidth,
1289                            SmallVectorImpl<AsmToken> &Tokens);
1290   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1291                            unsigned &RegWidth,
1292                            SmallVectorImpl<AsmToken> &Tokens);
1293   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1294                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1295   bool ParseRegRange(unsigned& Num, unsigned& Width);
1296   unsigned getRegularReg(RegisterKind RegKind,
1297                          unsigned RegNum,
1298                          unsigned RegWidth,
1299                          SMLoc Loc);
1300 
1301   bool isRegister();
1302   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1303   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1304   void initializeGprCountSymbol(RegisterKind RegKind);
1305   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1306                              unsigned RegWidth);
1307   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1308                     bool IsAtomic, bool IsLds = false);
1309   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1310                  bool IsGdsHardcoded);
1311 
1312 public:
1313   enum AMDGPUMatchResultTy {
1314     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1315   };
1316   enum OperandMode {
1317     OperandMode_Default,
1318     OperandMode_NSA,
1319   };
1320 
1321   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1322 
1323   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1324                const MCInstrInfo &MII,
1325                const MCTargetOptions &Options)
1326       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1327     MCAsmParserExtension::Initialize(Parser);
1328 
1329     if (getFeatureBits().none()) {
1330       // Set default features.
1331       copySTI().ToggleFeature("southern-islands");
1332     }
1333 
1334     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1335 
1336     {
1337       // TODO: make those pre-defined variables read-only.
1338       // Currently there is none suitable machinery in the core llvm-mc for this.
1339       // MCSymbol::isRedefinable is intended for another purpose, and
1340       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1341       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1342       MCContext &Ctx = getContext();
1343       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1344         MCSymbol *Sym =
1345             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1346         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1347         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1348         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1349         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1350         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1351       } else {
1352         MCSymbol *Sym =
1353             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1354         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1355         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1356         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1357         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1358         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1359       }
1360       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1361         initializeGprCountSymbol(IS_VGPR);
1362         initializeGprCountSymbol(IS_SGPR);
1363       } else
1364         KernelScope.initialize(getContext());
1365     }
1366   }
1367 
1368   bool hasMIMG_R128() const {
1369     return AMDGPU::hasMIMG_R128(getSTI());
1370   }
1371 
1372   bool hasPackedD16() const {
1373     return AMDGPU::hasPackedD16(getSTI());
1374   }
1375 
1376   bool hasGFX10A16() const {
1377     return AMDGPU::hasGFX10A16(getSTI());
1378   }
1379 
1380   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1381 
1382   bool isSI() const {
1383     return AMDGPU::isSI(getSTI());
1384   }
1385 
1386   bool isCI() const {
1387     return AMDGPU::isCI(getSTI());
1388   }
1389 
1390   bool isVI() const {
1391     return AMDGPU::isVI(getSTI());
1392   }
1393 
1394   bool isGFX9() const {
1395     return AMDGPU::isGFX9(getSTI());
1396   }
1397 
1398   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1399   bool isGFX90A() const {
1400     return AMDGPU::isGFX90A(getSTI());
1401   }
1402 
1403   bool isGFX940() const {
1404     return AMDGPU::isGFX940(getSTI());
1405   }
1406 
1407   bool isGFX9Plus() const {
1408     return AMDGPU::isGFX9Plus(getSTI());
1409   }
1410 
1411   bool isGFX10() const {
1412     return AMDGPU::isGFX10(getSTI());
1413   }
1414 
1415   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1416 
1417   bool isGFX10_BEncoding() const {
1418     return AMDGPU::isGFX10_BEncoding(getSTI());
1419   }
1420 
1421   bool hasInv2PiInlineImm() const {
1422     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1423   }
1424 
1425   bool hasFlatOffsets() const {
1426     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1427   }
1428 
1429   bool hasArchitectedFlatScratch() const {
1430     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1431   }
1432 
1433   bool hasSGPR102_SGPR103() const {
1434     return !isVI() && !isGFX9();
1435   }
1436 
1437   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1438 
1439   bool hasIntClamp() const {
1440     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1441   }
1442 
1443   AMDGPUTargetStreamer &getTargetStreamer() {
1444     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1445     return static_cast<AMDGPUTargetStreamer &>(TS);
1446   }
1447 
1448   const MCRegisterInfo *getMRI() const {
1449     // We need this const_cast because for some reason getContext() is not const
1450     // in MCAsmParser.
1451     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1452   }
1453 
1454   const MCInstrInfo *getMII() const {
1455     return &MII;
1456   }
1457 
1458   const FeatureBitset &getFeatureBits() const {
1459     return getSTI().getFeatureBits();
1460   }
1461 
1462   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1463   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1464   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1465 
1466   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1467   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1468   bool isForcedDPP() const { return ForcedDPP; }
1469   bool isForcedSDWA() const { return ForcedSDWA; }
1470   ArrayRef<unsigned> getMatchedVariants() const;
1471   StringRef getMatchedVariantName() const;
1472 
1473   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1474   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1475                      bool RestoreOnFailure);
1476   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1477   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1478                                         SMLoc &EndLoc) override;
1479   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1480   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1481                                       unsigned Kind) override;
1482   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1483                                OperandVector &Operands, MCStreamer &Out,
1484                                uint64_t &ErrorInfo,
1485                                bool MatchingInlineAsm) override;
1486   bool ParseDirective(AsmToken DirectiveID) override;
1487   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1488                                     OperandMode Mode = OperandMode_Default);
1489   StringRef parseMnemonicSuffix(StringRef Name);
1490   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1491                         SMLoc NameLoc, OperandVector &Operands) override;
1492   //bool ProcessInstruction(MCInst &Inst);
1493 
1494   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1495 
1496   OperandMatchResultTy
1497   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1498                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1499                      bool (*ConvertResult)(int64_t &) = nullptr);
1500 
1501   OperandMatchResultTy
1502   parseOperandArrayWithPrefix(const char *Prefix,
1503                               OperandVector &Operands,
1504                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1505                               bool (*ConvertResult)(int64_t&) = nullptr);
1506 
1507   OperandMatchResultTy
1508   parseNamedBit(StringRef Name, OperandVector &Operands,
1509                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1510   OperandMatchResultTy parseCPol(OperandVector &Operands);
1511   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1512                                              StringRef &Value,
1513                                              SMLoc &StringLoc);
1514 
1515   bool isModifier();
1516   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1517   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1518   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1519   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1520   bool parseSP3NegModifier();
1521   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1522   OperandMatchResultTy parseReg(OperandVector &Operands);
1523   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1524   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1525   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1526   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1527   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1528   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1529   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1530   OperandMatchResultTy parseUfmt(int64_t &Format);
1531   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1532   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1533   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1534   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1535   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1536   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1537   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1538 
1539   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1540   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1541   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1542   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1543 
1544   bool parseCnt(int64_t &IntVal);
1545   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1546   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1547 
1548 private:
1549   struct OperandInfoTy {
1550     SMLoc Loc;
1551     int64_t Id;
1552     bool IsSymbolic = false;
1553     bool IsDefined = false;
1554 
1555     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1556   };
1557 
1558   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1559   bool validateSendMsg(const OperandInfoTy &Msg,
1560                        const OperandInfoTy &Op,
1561                        const OperandInfoTy &Stream);
1562 
1563   bool parseHwregBody(OperandInfoTy &HwReg,
1564                       OperandInfoTy &Offset,
1565                       OperandInfoTy &Width);
1566   bool validateHwreg(const OperandInfoTy &HwReg,
1567                      const OperandInfoTy &Offset,
1568                      const OperandInfoTy &Width);
1569 
1570   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1571   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1572   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1573 
1574   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1575                       const OperandVector &Operands) const;
1576   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1577   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1578   SMLoc getLitLoc(const OperandVector &Operands) const;
1579   SMLoc getConstLoc(const OperandVector &Operands) const;
1580 
1581   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1582   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1583   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1584   bool validateSOPLiteral(const MCInst &Inst) const;
1585   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1586   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1587   bool validateIntClampSupported(const MCInst &Inst);
1588   bool validateMIMGAtomicDMask(const MCInst &Inst);
1589   bool validateMIMGGatherDMask(const MCInst &Inst);
1590   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1591   bool validateMIMGDataSize(const MCInst &Inst);
1592   bool validateMIMGAddrSize(const MCInst &Inst);
1593   bool validateMIMGD16(const MCInst &Inst);
1594   bool validateMIMGDim(const MCInst &Inst);
1595   bool validateMIMGMSAA(const MCInst &Inst);
1596   bool validateOpSel(const MCInst &Inst);
1597   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1598   bool validateVccOperand(unsigned Reg) const;
1599   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1600   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1601   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1602   bool validateAGPRLdSt(const MCInst &Inst) const;
1603   bool validateVGPRAlign(const MCInst &Inst) const;
1604   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1605   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1606   bool validateDivScale(const MCInst &Inst);
1607   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1608                              const SMLoc &IDLoc);
1609   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1610   unsigned getConstantBusLimit(unsigned Opcode) const;
1611   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1612   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1613   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1614 
1615   bool isSupportedMnemo(StringRef Mnemo,
1616                         const FeatureBitset &FBS);
1617   bool isSupportedMnemo(StringRef Mnemo,
1618                         const FeatureBitset &FBS,
1619                         ArrayRef<unsigned> Variants);
1620   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1621 
1622   bool isId(const StringRef Id) const;
1623   bool isId(const AsmToken &Token, const StringRef Id) const;
1624   bool isToken(const AsmToken::TokenKind Kind) const;
1625   bool trySkipId(const StringRef Id);
1626   bool trySkipId(const StringRef Pref, const StringRef Id);
1627   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1628   bool trySkipToken(const AsmToken::TokenKind Kind);
1629   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1630   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1631   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1632 
1633   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1634   AsmToken::TokenKind getTokenKind() const;
1635   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1636   bool parseExpr(OperandVector &Operands);
1637   StringRef getTokenStr() const;
1638   AsmToken peekToken();
1639   AsmToken getToken() const;
1640   SMLoc getLoc() const;
1641   void lex();
1642 
1643 public:
1644   void onBeginOfFile() override;
1645 
1646   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1647   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1648 
1649   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1650   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1651   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1652   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1653   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1654   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1655 
1656   bool parseSwizzleOperand(int64_t &Op,
1657                            const unsigned MinVal,
1658                            const unsigned MaxVal,
1659                            const StringRef ErrMsg,
1660                            SMLoc &Loc);
1661   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1662                             const unsigned MinVal,
1663                             const unsigned MaxVal,
1664                             const StringRef ErrMsg);
1665   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1666   bool parseSwizzleOffset(int64_t &Imm);
1667   bool parseSwizzleMacro(int64_t &Imm);
1668   bool parseSwizzleQuadPerm(int64_t &Imm);
1669   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1670   bool parseSwizzleBroadcast(int64_t &Imm);
1671   bool parseSwizzleSwap(int64_t &Imm);
1672   bool parseSwizzleReverse(int64_t &Imm);
1673 
1674   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1675   int64_t parseGPRIdxMacro();
1676 
1677   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1678   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1679   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1680   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1681 
1682   AMDGPUOperand::Ptr defaultCPol() const;
1683 
1684   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1685   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1686   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1687   AMDGPUOperand::Ptr defaultFlatOffset() const;
1688 
1689   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1690 
1691   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1692                OptionalImmIndexMap &OptionalIdx);
1693   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1694   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1695   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1696   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1697                 OptionalImmIndexMap &OptionalIdx);
1698 
1699   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1700 
1701   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1702                bool IsAtomic = false);
1703   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1704   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1705 
1706   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1707 
1708   bool parseDimId(unsigned &Encoding);
1709   OperandMatchResultTy parseDim(OperandVector &Operands);
1710   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1711   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1712   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1713   int64_t parseDPPCtrlSel(StringRef Ctrl);
1714   int64_t parseDPPCtrlPerm();
1715   AMDGPUOperand::Ptr defaultRowMask() const;
1716   AMDGPUOperand::Ptr defaultBankMask() const;
1717   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1718   AMDGPUOperand::Ptr defaultFI() const;
1719   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1720   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1721 
1722   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1723                                     AMDGPUOperand::ImmTy Type);
1724   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1725   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1726   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1727   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1728   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1729   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1730   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1731                uint64_t BasicInstType,
1732                bool SkipDstVcc = false,
1733                bool SkipSrcVcc = false);
1734 
1735   AMDGPUOperand::Ptr defaultBLGP() const;
1736   AMDGPUOperand::Ptr defaultCBSZ() const;
1737   AMDGPUOperand::Ptr defaultABID() const;
1738 
1739   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1740   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1741 };
1742 
1743 struct OptionalOperand {
1744   const char *Name;
1745   AMDGPUOperand::ImmTy Type;
1746   bool IsBit;
1747   bool (*ConvertResult)(int64_t&);
1748 };
1749 
1750 } // end anonymous namespace
1751 
1752 // May be called with integer type with equivalent bitwidth.
1753 static const fltSemantics *getFltSemantics(unsigned Size) {
1754   switch (Size) {
1755   case 4:
1756     return &APFloat::IEEEsingle();
1757   case 8:
1758     return &APFloat::IEEEdouble();
1759   case 2:
1760     return &APFloat::IEEEhalf();
1761   default:
1762     llvm_unreachable("unsupported fp type");
1763   }
1764 }
1765 
1766 static const fltSemantics *getFltSemantics(MVT VT) {
1767   return getFltSemantics(VT.getSizeInBits() / 8);
1768 }
1769 
1770 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1771   switch (OperandType) {
1772   case AMDGPU::OPERAND_REG_IMM_INT32:
1773   case AMDGPU::OPERAND_REG_IMM_FP32:
1774   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1775   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1776   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1777   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1778   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1779   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1780   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1781   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1782   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1783   case AMDGPU::OPERAND_KIMM32:
1784     return &APFloat::IEEEsingle();
1785   case AMDGPU::OPERAND_REG_IMM_INT64:
1786   case AMDGPU::OPERAND_REG_IMM_FP64:
1787   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1788   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1789   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1790     return &APFloat::IEEEdouble();
1791   case AMDGPU::OPERAND_REG_IMM_INT16:
1792   case AMDGPU::OPERAND_REG_IMM_FP16:
1793   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1794   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1795   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1796   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1797   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1798   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1799   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1800   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1801   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1802   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1803   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1804   case AMDGPU::OPERAND_KIMM16:
1805     return &APFloat::IEEEhalf();
1806   default:
1807     llvm_unreachable("unsupported fp type");
1808   }
1809 }
1810 
1811 //===----------------------------------------------------------------------===//
1812 // Operand
1813 //===----------------------------------------------------------------------===//
1814 
1815 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1816   bool Lost;
1817 
1818   // Convert literal to single precision
1819   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1820                                                APFloat::rmNearestTiesToEven,
1821                                                &Lost);
1822   // We allow precision lost but not overflow or underflow
1823   if (Status != APFloat::opOK &&
1824       Lost &&
1825       ((Status & APFloat::opOverflow)  != 0 ||
1826        (Status & APFloat::opUnderflow) != 0)) {
1827     return false;
1828   }
1829 
1830   return true;
1831 }
1832 
1833 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1834   return isUIntN(Size, Val) || isIntN(Size, Val);
1835 }
1836 
1837 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1838   if (VT.getScalarType() == MVT::i16) {
1839     // FP immediate values are broken.
1840     return isInlinableIntLiteral(Val);
1841   }
1842 
1843   // f16/v2f16 operands work correctly for all values.
1844   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1845 }
1846 
1847 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1848 
1849   // This is a hack to enable named inline values like
1850   // shared_base with both 32-bit and 64-bit operands.
1851   // Note that these values are defined as
1852   // 32-bit operands only.
1853   if (isInlineValue()) {
1854     return true;
1855   }
1856 
1857   if (!isImmTy(ImmTyNone)) {
1858     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1859     return false;
1860   }
1861   // TODO: We should avoid using host float here. It would be better to
1862   // check the float bit values which is what a few other places do.
1863   // We've had bot failures before due to weird NaN support on mips hosts.
1864 
1865   APInt Literal(64, Imm.Val);
1866 
1867   if (Imm.IsFPImm) { // We got fp literal token
1868     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1869       return AMDGPU::isInlinableLiteral64(Imm.Val,
1870                                           AsmParser->hasInv2PiInlineImm());
1871     }
1872 
1873     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1874     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1875       return false;
1876 
1877     if (type.getScalarSizeInBits() == 16) {
1878       return isInlineableLiteralOp16(
1879         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1880         type, AsmParser->hasInv2PiInlineImm());
1881     }
1882 
1883     // Check if single precision literal is inlinable
1884     return AMDGPU::isInlinableLiteral32(
1885       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1886       AsmParser->hasInv2PiInlineImm());
1887   }
1888 
1889   // We got int literal token.
1890   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1891     return AMDGPU::isInlinableLiteral64(Imm.Val,
1892                                         AsmParser->hasInv2PiInlineImm());
1893   }
1894 
1895   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1896     return false;
1897   }
1898 
1899   if (type.getScalarSizeInBits() == 16) {
1900     return isInlineableLiteralOp16(
1901       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1902       type, AsmParser->hasInv2PiInlineImm());
1903   }
1904 
1905   return AMDGPU::isInlinableLiteral32(
1906     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1907     AsmParser->hasInv2PiInlineImm());
1908 }
1909 
1910 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1911   // Check that this immediate can be added as literal
1912   if (!isImmTy(ImmTyNone)) {
1913     return false;
1914   }
1915 
1916   if (!Imm.IsFPImm) {
1917     // We got int literal token.
1918 
1919     if (type == MVT::f64 && hasFPModifiers()) {
1920       // Cannot apply fp modifiers to int literals preserving the same semantics
1921       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1922       // disable these cases.
1923       return false;
1924     }
1925 
1926     unsigned Size = type.getSizeInBits();
1927     if (Size == 64)
1928       Size = 32;
1929 
1930     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1931     // types.
1932     return isSafeTruncation(Imm.Val, Size);
1933   }
1934 
1935   // We got fp literal token
1936   if (type == MVT::f64) { // Expected 64-bit fp operand
1937     // We would set low 64-bits of literal to zeroes but we accept this literals
1938     return true;
1939   }
1940 
1941   if (type == MVT::i64) { // Expected 64-bit int operand
1942     // We don't allow fp literals in 64-bit integer instructions. It is
1943     // unclear how we should encode them.
1944     return false;
1945   }
1946 
1947   // We allow fp literals with f16x2 operands assuming that the specified
1948   // literal goes into the lower half and the upper half is zero. We also
1949   // require that the literal may be losslessly converted to f16.
1950   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1951                      (type == MVT::v2i16)? MVT::i16 :
1952                      (type == MVT::v2f32)? MVT::f32 : type;
1953 
1954   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1955   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1956 }
1957 
1958 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1959   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1960 }
1961 
1962 bool AMDGPUOperand::isVRegWithInputMods() const {
1963   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1964          // GFX90A allows DPP on 64-bit operands.
1965          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1966           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1967 }
1968 
1969 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1970   if (AsmParser->isVI())
1971     return isVReg32();
1972   else if (AsmParser->isGFX9Plus())
1973     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1974   else
1975     return false;
1976 }
1977 
1978 bool AMDGPUOperand::isSDWAFP16Operand() const {
1979   return isSDWAOperand(MVT::f16);
1980 }
1981 
1982 bool AMDGPUOperand::isSDWAFP32Operand() const {
1983   return isSDWAOperand(MVT::f32);
1984 }
1985 
1986 bool AMDGPUOperand::isSDWAInt16Operand() const {
1987   return isSDWAOperand(MVT::i16);
1988 }
1989 
1990 bool AMDGPUOperand::isSDWAInt32Operand() const {
1991   return isSDWAOperand(MVT::i32);
1992 }
1993 
1994 bool AMDGPUOperand::isBoolReg() const {
1995   auto FB = AsmParser->getFeatureBits();
1996   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1997                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1998 }
1999 
2000 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2001 {
2002   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2003   assert(Size == 2 || Size == 4 || Size == 8);
2004 
2005   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2006 
2007   if (Imm.Mods.Abs) {
2008     Val &= ~FpSignMask;
2009   }
2010   if (Imm.Mods.Neg) {
2011     Val ^= FpSignMask;
2012   }
2013 
2014   return Val;
2015 }
2016 
2017 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2018   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2019                              Inst.getNumOperands())) {
2020     addLiteralImmOperand(Inst, Imm.Val,
2021                          ApplyModifiers &
2022                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2023   } else {
2024     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2025     Inst.addOperand(MCOperand::createImm(Imm.Val));
2026     setImmKindNone();
2027   }
2028 }
2029 
2030 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2031   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2032   auto OpNum = Inst.getNumOperands();
2033   // Check that this operand accepts literals
2034   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2035 
2036   if (ApplyModifiers) {
2037     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2038     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2039     Val = applyInputFPModifiers(Val, Size);
2040   }
2041 
2042   APInt Literal(64, Val);
2043   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2044 
2045   if (Imm.IsFPImm) { // We got fp literal token
2046     switch (OpTy) {
2047     case AMDGPU::OPERAND_REG_IMM_INT64:
2048     case AMDGPU::OPERAND_REG_IMM_FP64:
2049     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2050     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2051     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2052       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2053                                        AsmParser->hasInv2PiInlineImm())) {
2054         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2055         setImmKindConst();
2056         return;
2057       }
2058 
2059       // Non-inlineable
2060       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2061         // For fp operands we check if low 32 bits are zeros
2062         if (Literal.getLoBits(32) != 0) {
2063           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2064           "Can't encode literal as exact 64-bit floating-point operand. "
2065           "Low 32-bits will be set to zero");
2066         }
2067 
2068         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2069         setImmKindLiteral();
2070         return;
2071       }
2072 
2073       // We don't allow fp literals in 64-bit integer instructions. It is
2074       // unclear how we should encode them. This case should be checked earlier
2075       // in predicate methods (isLiteralImm())
2076       llvm_unreachable("fp literal in 64-bit integer instruction.");
2077 
2078     case AMDGPU::OPERAND_REG_IMM_INT32:
2079     case AMDGPU::OPERAND_REG_IMM_FP32:
2080     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2081     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2082     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2083     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2084     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2085     case AMDGPU::OPERAND_REG_IMM_INT16:
2086     case AMDGPU::OPERAND_REG_IMM_FP16:
2087     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2088     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2089     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2090     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2091     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2092     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2093     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2094     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2095     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2096     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2097     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2098     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2099     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2100     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2101     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2102     case AMDGPU::OPERAND_KIMM32:
2103     case AMDGPU::OPERAND_KIMM16: {
2104       bool lost;
2105       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2106       // Convert literal to single precision
2107       FPLiteral.convert(*getOpFltSemantics(OpTy),
2108                         APFloat::rmNearestTiesToEven, &lost);
2109       // We allow precision lost but not overflow or underflow. This should be
2110       // checked earlier in isLiteralImm()
2111 
2112       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2113       Inst.addOperand(MCOperand::createImm(ImmVal));
2114       setImmKindLiteral();
2115       return;
2116     }
2117     default:
2118       llvm_unreachable("invalid operand size");
2119     }
2120 
2121     return;
2122   }
2123 
2124   // We got int literal token.
2125   // Only sign extend inline immediates.
2126   switch (OpTy) {
2127   case AMDGPU::OPERAND_REG_IMM_INT32:
2128   case AMDGPU::OPERAND_REG_IMM_FP32:
2129   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2130   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2131   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2132   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2133   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2134   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2135   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2136   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2137   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2138   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2139   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2140     if (isSafeTruncation(Val, 32) &&
2141         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2142                                      AsmParser->hasInv2PiInlineImm())) {
2143       Inst.addOperand(MCOperand::createImm(Val));
2144       setImmKindConst();
2145       return;
2146     }
2147 
2148     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2149     setImmKindLiteral();
2150     return;
2151 
2152   case AMDGPU::OPERAND_REG_IMM_INT64:
2153   case AMDGPU::OPERAND_REG_IMM_FP64:
2154   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2155   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2156   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2157     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2158       Inst.addOperand(MCOperand::createImm(Val));
2159       setImmKindConst();
2160       return;
2161     }
2162 
2163     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2164     setImmKindLiteral();
2165     return;
2166 
2167   case AMDGPU::OPERAND_REG_IMM_INT16:
2168   case AMDGPU::OPERAND_REG_IMM_FP16:
2169   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2170   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2171   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2172   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2173   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2174     if (isSafeTruncation(Val, 16) &&
2175         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2176                                      AsmParser->hasInv2PiInlineImm())) {
2177       Inst.addOperand(MCOperand::createImm(Val));
2178       setImmKindConst();
2179       return;
2180     }
2181 
2182     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2183     setImmKindLiteral();
2184     return;
2185 
2186   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2187   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2188   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2189   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2190     assert(isSafeTruncation(Val, 16));
2191     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2192                                         AsmParser->hasInv2PiInlineImm()));
2193 
2194     Inst.addOperand(MCOperand::createImm(Val));
2195     return;
2196   }
2197   case AMDGPU::OPERAND_KIMM32:
2198     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2199     setImmKindNone();
2200     return;
2201   case AMDGPU::OPERAND_KIMM16:
2202     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2203     setImmKindNone();
2204     return;
2205   default:
2206     llvm_unreachable("invalid operand size");
2207   }
2208 }
2209 
2210 template <unsigned Bitwidth>
2211 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2212   APInt Literal(64, Imm.Val);
2213   setImmKindNone();
2214 
2215   if (!Imm.IsFPImm) {
2216     // We got int literal token.
2217     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2218     return;
2219   }
2220 
2221   bool Lost;
2222   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2223   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2224                     APFloat::rmNearestTiesToEven, &Lost);
2225   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2226 }
2227 
2228 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2229   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2230 }
2231 
2232 static bool isInlineValue(unsigned Reg) {
2233   switch (Reg) {
2234   case AMDGPU::SRC_SHARED_BASE:
2235   case AMDGPU::SRC_SHARED_LIMIT:
2236   case AMDGPU::SRC_PRIVATE_BASE:
2237   case AMDGPU::SRC_PRIVATE_LIMIT:
2238   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2239     return true;
2240   case AMDGPU::SRC_VCCZ:
2241   case AMDGPU::SRC_EXECZ:
2242   case AMDGPU::SRC_SCC:
2243     return true;
2244   case AMDGPU::SGPR_NULL:
2245     return true;
2246   default:
2247     return false;
2248   }
2249 }
2250 
2251 bool AMDGPUOperand::isInlineValue() const {
2252   return isRegKind() && ::isInlineValue(getReg());
2253 }
2254 
2255 //===----------------------------------------------------------------------===//
2256 // AsmParser
2257 //===----------------------------------------------------------------------===//
2258 
2259 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2260   if (Is == IS_VGPR) {
2261     switch (RegWidth) {
2262       default: return -1;
2263       case 32:
2264         return AMDGPU::VGPR_32RegClassID;
2265       case 64:
2266         return AMDGPU::VReg_64RegClassID;
2267       case 96:
2268         return AMDGPU::VReg_96RegClassID;
2269       case 128:
2270         return AMDGPU::VReg_128RegClassID;
2271       case 160:
2272         return AMDGPU::VReg_160RegClassID;
2273       case 192:
2274         return AMDGPU::VReg_192RegClassID;
2275       case 224:
2276         return AMDGPU::VReg_224RegClassID;
2277       case 256:
2278         return AMDGPU::VReg_256RegClassID;
2279       case 512:
2280         return AMDGPU::VReg_512RegClassID;
2281       case 1024:
2282         return AMDGPU::VReg_1024RegClassID;
2283     }
2284   } else if (Is == IS_TTMP) {
2285     switch (RegWidth) {
2286       default: return -1;
2287       case 32:
2288         return AMDGPU::TTMP_32RegClassID;
2289       case 64:
2290         return AMDGPU::TTMP_64RegClassID;
2291       case 128:
2292         return AMDGPU::TTMP_128RegClassID;
2293       case 256:
2294         return AMDGPU::TTMP_256RegClassID;
2295       case 512:
2296         return AMDGPU::TTMP_512RegClassID;
2297     }
2298   } else if (Is == IS_SGPR) {
2299     switch (RegWidth) {
2300       default: return -1;
2301       case 32:
2302         return AMDGPU::SGPR_32RegClassID;
2303       case 64:
2304         return AMDGPU::SGPR_64RegClassID;
2305       case 96:
2306         return AMDGPU::SGPR_96RegClassID;
2307       case 128:
2308         return AMDGPU::SGPR_128RegClassID;
2309       case 160:
2310         return AMDGPU::SGPR_160RegClassID;
2311       case 192:
2312         return AMDGPU::SGPR_192RegClassID;
2313       case 224:
2314         return AMDGPU::SGPR_224RegClassID;
2315       case 256:
2316         return AMDGPU::SGPR_256RegClassID;
2317       case 512:
2318         return AMDGPU::SGPR_512RegClassID;
2319     }
2320   } else if (Is == IS_AGPR) {
2321     switch (RegWidth) {
2322       default: return -1;
2323       case 32:
2324         return AMDGPU::AGPR_32RegClassID;
2325       case 64:
2326         return AMDGPU::AReg_64RegClassID;
2327       case 96:
2328         return AMDGPU::AReg_96RegClassID;
2329       case 128:
2330         return AMDGPU::AReg_128RegClassID;
2331       case 160:
2332         return AMDGPU::AReg_160RegClassID;
2333       case 192:
2334         return AMDGPU::AReg_192RegClassID;
2335       case 224:
2336         return AMDGPU::AReg_224RegClassID;
2337       case 256:
2338         return AMDGPU::AReg_256RegClassID;
2339       case 512:
2340         return AMDGPU::AReg_512RegClassID;
2341       case 1024:
2342         return AMDGPU::AReg_1024RegClassID;
2343     }
2344   }
2345   return -1;
2346 }
2347 
2348 static unsigned getSpecialRegForName(StringRef RegName) {
2349   return StringSwitch<unsigned>(RegName)
2350     .Case("exec", AMDGPU::EXEC)
2351     .Case("vcc", AMDGPU::VCC)
2352     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2353     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2354     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2355     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2356     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2357     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2358     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2359     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2360     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2361     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2362     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2363     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2364     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2365     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2366     .Case("m0", AMDGPU::M0)
2367     .Case("vccz", AMDGPU::SRC_VCCZ)
2368     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2369     .Case("execz", AMDGPU::SRC_EXECZ)
2370     .Case("src_execz", AMDGPU::SRC_EXECZ)
2371     .Case("scc", AMDGPU::SRC_SCC)
2372     .Case("src_scc", AMDGPU::SRC_SCC)
2373     .Case("tba", AMDGPU::TBA)
2374     .Case("tma", AMDGPU::TMA)
2375     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2376     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2377     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2378     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2379     .Case("vcc_lo", AMDGPU::VCC_LO)
2380     .Case("vcc_hi", AMDGPU::VCC_HI)
2381     .Case("exec_lo", AMDGPU::EXEC_LO)
2382     .Case("exec_hi", AMDGPU::EXEC_HI)
2383     .Case("tma_lo", AMDGPU::TMA_LO)
2384     .Case("tma_hi", AMDGPU::TMA_HI)
2385     .Case("tba_lo", AMDGPU::TBA_LO)
2386     .Case("tba_hi", AMDGPU::TBA_HI)
2387     .Case("pc", AMDGPU::PC_REG)
2388     .Case("null", AMDGPU::SGPR_NULL)
2389     .Default(AMDGPU::NoRegister);
2390 }
2391 
2392 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2393                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2394   auto R = parseRegister();
2395   if (!R) return true;
2396   assert(R->isReg());
2397   RegNo = R->getReg();
2398   StartLoc = R->getStartLoc();
2399   EndLoc = R->getEndLoc();
2400   return false;
2401 }
2402 
2403 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2404                                     SMLoc &EndLoc) {
2405   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2406 }
2407 
2408 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2409                                                        SMLoc &StartLoc,
2410                                                        SMLoc &EndLoc) {
2411   bool Result =
2412       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2413   bool PendingErrors = getParser().hasPendingError();
2414   getParser().clearPendingErrors();
2415   if (PendingErrors)
2416     return MatchOperand_ParseFail;
2417   if (Result)
2418     return MatchOperand_NoMatch;
2419   return MatchOperand_Success;
2420 }
2421 
2422 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2423                                             RegisterKind RegKind, unsigned Reg1,
2424                                             SMLoc Loc) {
2425   switch (RegKind) {
2426   case IS_SPECIAL:
2427     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2428       Reg = AMDGPU::EXEC;
2429       RegWidth = 64;
2430       return true;
2431     }
2432     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2433       Reg = AMDGPU::FLAT_SCR;
2434       RegWidth = 64;
2435       return true;
2436     }
2437     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2438       Reg = AMDGPU::XNACK_MASK;
2439       RegWidth = 64;
2440       return true;
2441     }
2442     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2443       Reg = AMDGPU::VCC;
2444       RegWidth = 64;
2445       return true;
2446     }
2447     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2448       Reg = AMDGPU::TBA;
2449       RegWidth = 64;
2450       return true;
2451     }
2452     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2453       Reg = AMDGPU::TMA;
2454       RegWidth = 64;
2455       return true;
2456     }
2457     Error(Loc, "register does not fit in the list");
2458     return false;
2459   case IS_VGPR:
2460   case IS_SGPR:
2461   case IS_AGPR:
2462   case IS_TTMP:
2463     if (Reg1 != Reg + RegWidth / 32) {
2464       Error(Loc, "registers in a list must have consecutive indices");
2465       return false;
2466     }
2467     RegWidth += 32;
2468     return true;
2469   default:
2470     llvm_unreachable("unexpected register kind");
2471   }
2472 }
2473 
2474 struct RegInfo {
2475   StringLiteral Name;
2476   RegisterKind Kind;
2477 };
2478 
2479 static constexpr RegInfo RegularRegisters[] = {
2480   {{"v"},    IS_VGPR},
2481   {{"s"},    IS_SGPR},
2482   {{"ttmp"}, IS_TTMP},
2483   {{"acc"},  IS_AGPR},
2484   {{"a"},    IS_AGPR},
2485 };
2486 
2487 static bool isRegularReg(RegisterKind Kind) {
2488   return Kind == IS_VGPR ||
2489          Kind == IS_SGPR ||
2490          Kind == IS_TTMP ||
2491          Kind == IS_AGPR;
2492 }
2493 
2494 static const RegInfo* getRegularRegInfo(StringRef Str) {
2495   for (const RegInfo &Reg : RegularRegisters)
2496     if (Str.startswith(Reg.Name))
2497       return &Reg;
2498   return nullptr;
2499 }
2500 
2501 static bool getRegNum(StringRef Str, unsigned& Num) {
2502   return !Str.getAsInteger(10, Num);
2503 }
2504 
2505 bool
2506 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2507                             const AsmToken &NextToken) const {
2508 
2509   // A list of consecutive registers: [s0,s1,s2,s3]
2510   if (Token.is(AsmToken::LBrac))
2511     return true;
2512 
2513   if (!Token.is(AsmToken::Identifier))
2514     return false;
2515 
2516   // A single register like s0 or a range of registers like s[0:1]
2517 
2518   StringRef Str = Token.getString();
2519   const RegInfo *Reg = getRegularRegInfo(Str);
2520   if (Reg) {
2521     StringRef RegName = Reg->Name;
2522     StringRef RegSuffix = Str.substr(RegName.size());
2523     if (!RegSuffix.empty()) {
2524       unsigned Num;
2525       // A single register with an index: rXX
2526       if (getRegNum(RegSuffix, Num))
2527         return true;
2528     } else {
2529       // A range of registers: r[XX:YY].
2530       if (NextToken.is(AsmToken::LBrac))
2531         return true;
2532     }
2533   }
2534 
2535   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2536 }
2537 
2538 bool
2539 AMDGPUAsmParser::isRegister()
2540 {
2541   return isRegister(getToken(), peekToken());
2542 }
2543 
2544 unsigned
2545 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2546                                unsigned RegNum,
2547                                unsigned RegWidth,
2548                                SMLoc Loc) {
2549 
2550   assert(isRegularReg(RegKind));
2551 
2552   unsigned AlignSize = 1;
2553   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2554     // SGPR and TTMP registers must be aligned.
2555     // Max required alignment is 4 dwords.
2556     AlignSize = std::min(RegWidth / 32, 4u);
2557   }
2558 
2559   if (RegNum % AlignSize != 0) {
2560     Error(Loc, "invalid register alignment");
2561     return AMDGPU::NoRegister;
2562   }
2563 
2564   unsigned RegIdx = RegNum / AlignSize;
2565   int RCID = getRegClass(RegKind, RegWidth);
2566   if (RCID == -1) {
2567     Error(Loc, "invalid or unsupported register size");
2568     return AMDGPU::NoRegister;
2569   }
2570 
2571   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2572   const MCRegisterClass RC = TRI->getRegClass(RCID);
2573   if (RegIdx >= RC.getNumRegs()) {
2574     Error(Loc, "register index is out of range");
2575     return AMDGPU::NoRegister;
2576   }
2577 
2578   return RC.getRegister(RegIdx);
2579 }
2580 
2581 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2582   int64_t RegLo, RegHi;
2583   if (!skipToken(AsmToken::LBrac, "missing register index"))
2584     return false;
2585 
2586   SMLoc FirstIdxLoc = getLoc();
2587   SMLoc SecondIdxLoc;
2588 
2589   if (!parseExpr(RegLo))
2590     return false;
2591 
2592   if (trySkipToken(AsmToken::Colon)) {
2593     SecondIdxLoc = getLoc();
2594     if (!parseExpr(RegHi))
2595       return false;
2596   } else {
2597     RegHi = RegLo;
2598   }
2599 
2600   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2601     return false;
2602 
2603   if (!isUInt<32>(RegLo)) {
2604     Error(FirstIdxLoc, "invalid register index");
2605     return false;
2606   }
2607 
2608   if (!isUInt<32>(RegHi)) {
2609     Error(SecondIdxLoc, "invalid register index");
2610     return false;
2611   }
2612 
2613   if (RegLo > RegHi) {
2614     Error(FirstIdxLoc, "first register index should not exceed second index");
2615     return false;
2616   }
2617 
2618   Num = static_cast<unsigned>(RegLo);
2619   RegWidth = 32 * ((RegHi - RegLo) + 1);
2620   return true;
2621 }
2622 
2623 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2624                                           unsigned &RegNum, unsigned &RegWidth,
2625                                           SmallVectorImpl<AsmToken> &Tokens) {
2626   assert(isToken(AsmToken::Identifier));
2627   unsigned Reg = getSpecialRegForName(getTokenStr());
2628   if (Reg) {
2629     RegNum = 0;
2630     RegWidth = 32;
2631     RegKind = IS_SPECIAL;
2632     Tokens.push_back(getToken());
2633     lex(); // skip register name
2634   }
2635   return Reg;
2636 }
2637 
2638 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2639                                           unsigned &RegNum, unsigned &RegWidth,
2640                                           SmallVectorImpl<AsmToken> &Tokens) {
2641   assert(isToken(AsmToken::Identifier));
2642   StringRef RegName = getTokenStr();
2643   auto Loc = getLoc();
2644 
2645   const RegInfo *RI = getRegularRegInfo(RegName);
2646   if (!RI) {
2647     Error(Loc, "invalid register name");
2648     return AMDGPU::NoRegister;
2649   }
2650 
2651   Tokens.push_back(getToken());
2652   lex(); // skip register name
2653 
2654   RegKind = RI->Kind;
2655   StringRef RegSuffix = RegName.substr(RI->Name.size());
2656   if (!RegSuffix.empty()) {
2657     // Single 32-bit register: vXX.
2658     if (!getRegNum(RegSuffix, RegNum)) {
2659       Error(Loc, "invalid register index");
2660       return AMDGPU::NoRegister;
2661     }
2662     RegWidth = 32;
2663   } else {
2664     // Range of registers: v[XX:YY]. ":YY" is optional.
2665     if (!ParseRegRange(RegNum, RegWidth))
2666       return AMDGPU::NoRegister;
2667   }
2668 
2669   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2670 }
2671 
2672 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2673                                        unsigned &RegWidth,
2674                                        SmallVectorImpl<AsmToken> &Tokens) {
2675   unsigned Reg = AMDGPU::NoRegister;
2676   auto ListLoc = getLoc();
2677 
2678   if (!skipToken(AsmToken::LBrac,
2679                  "expected a register or a list of registers")) {
2680     return AMDGPU::NoRegister;
2681   }
2682 
2683   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2684 
2685   auto Loc = getLoc();
2686   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2687     return AMDGPU::NoRegister;
2688   if (RegWidth != 32) {
2689     Error(Loc, "expected a single 32-bit register");
2690     return AMDGPU::NoRegister;
2691   }
2692 
2693   for (; trySkipToken(AsmToken::Comma); ) {
2694     RegisterKind NextRegKind;
2695     unsigned NextReg, NextRegNum, NextRegWidth;
2696     Loc = getLoc();
2697 
2698     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2699                              NextRegNum, NextRegWidth,
2700                              Tokens)) {
2701       return AMDGPU::NoRegister;
2702     }
2703     if (NextRegWidth != 32) {
2704       Error(Loc, "expected a single 32-bit register");
2705       return AMDGPU::NoRegister;
2706     }
2707     if (NextRegKind != RegKind) {
2708       Error(Loc, "registers in a list must be of the same kind");
2709       return AMDGPU::NoRegister;
2710     }
2711     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2712       return AMDGPU::NoRegister;
2713   }
2714 
2715   if (!skipToken(AsmToken::RBrac,
2716                  "expected a comma or a closing square bracket")) {
2717     return AMDGPU::NoRegister;
2718   }
2719 
2720   if (isRegularReg(RegKind))
2721     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2722 
2723   return Reg;
2724 }
2725 
2726 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2727                                           unsigned &RegNum, unsigned &RegWidth,
2728                                           SmallVectorImpl<AsmToken> &Tokens) {
2729   auto Loc = getLoc();
2730   Reg = AMDGPU::NoRegister;
2731 
2732   if (isToken(AsmToken::Identifier)) {
2733     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2734     if (Reg == AMDGPU::NoRegister)
2735       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2736   } else {
2737     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2738   }
2739 
2740   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2741   if (Reg == AMDGPU::NoRegister) {
2742     assert(Parser.hasPendingError());
2743     return false;
2744   }
2745 
2746   if (!subtargetHasRegister(*TRI, Reg)) {
2747     if (Reg == AMDGPU::SGPR_NULL) {
2748       Error(Loc, "'null' operand is not supported on this GPU");
2749     } else {
2750       Error(Loc, "register not available on this GPU");
2751     }
2752     return false;
2753   }
2754 
2755   return true;
2756 }
2757 
2758 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2759                                           unsigned &RegNum, unsigned &RegWidth,
2760                                           bool RestoreOnFailure /*=false*/) {
2761   Reg = AMDGPU::NoRegister;
2762 
2763   SmallVector<AsmToken, 1> Tokens;
2764   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2765     if (RestoreOnFailure) {
2766       while (!Tokens.empty()) {
2767         getLexer().UnLex(Tokens.pop_back_val());
2768       }
2769     }
2770     return true;
2771   }
2772   return false;
2773 }
2774 
2775 Optional<StringRef>
2776 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2777   switch (RegKind) {
2778   case IS_VGPR:
2779     return StringRef(".amdgcn.next_free_vgpr");
2780   case IS_SGPR:
2781     return StringRef(".amdgcn.next_free_sgpr");
2782   default:
2783     return None;
2784   }
2785 }
2786 
2787 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2788   auto SymbolName = getGprCountSymbolName(RegKind);
2789   assert(SymbolName && "initializing invalid register kind");
2790   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2791   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2792 }
2793 
2794 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2795                                             unsigned DwordRegIndex,
2796                                             unsigned RegWidth) {
2797   // Symbols are only defined for GCN targets
2798   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2799     return true;
2800 
2801   auto SymbolName = getGprCountSymbolName(RegKind);
2802   if (!SymbolName)
2803     return true;
2804   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2805 
2806   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2807   int64_t OldCount;
2808 
2809   if (!Sym->isVariable())
2810     return !Error(getLoc(),
2811                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2812   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2813     return !Error(
2814         getLoc(),
2815         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2816 
2817   if (OldCount <= NewMax)
2818     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2819 
2820   return true;
2821 }
2822 
2823 std::unique_ptr<AMDGPUOperand>
2824 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2825   const auto &Tok = getToken();
2826   SMLoc StartLoc = Tok.getLoc();
2827   SMLoc EndLoc = Tok.getEndLoc();
2828   RegisterKind RegKind;
2829   unsigned Reg, RegNum, RegWidth;
2830 
2831   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2832     return nullptr;
2833   }
2834   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2835     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2836       return nullptr;
2837   } else
2838     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2839   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2840 }
2841 
2842 OperandMatchResultTy
2843 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2844   // TODO: add syntactic sugar for 1/(2*PI)
2845 
2846   assert(!isRegister());
2847   assert(!isModifier());
2848 
2849   const auto& Tok = getToken();
2850   const auto& NextTok = peekToken();
2851   bool IsReal = Tok.is(AsmToken::Real);
2852   SMLoc S = getLoc();
2853   bool Negate = false;
2854 
2855   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2856     lex();
2857     IsReal = true;
2858     Negate = true;
2859   }
2860 
2861   if (IsReal) {
2862     // Floating-point expressions are not supported.
2863     // Can only allow floating-point literals with an
2864     // optional sign.
2865 
2866     StringRef Num = getTokenStr();
2867     lex();
2868 
2869     APFloat RealVal(APFloat::IEEEdouble());
2870     auto roundMode = APFloat::rmNearestTiesToEven;
2871     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2872       return MatchOperand_ParseFail;
2873     }
2874     if (Negate)
2875       RealVal.changeSign();
2876 
2877     Operands.push_back(
2878       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2879                                AMDGPUOperand::ImmTyNone, true));
2880 
2881     return MatchOperand_Success;
2882 
2883   } else {
2884     int64_t IntVal;
2885     const MCExpr *Expr;
2886     SMLoc S = getLoc();
2887 
2888     if (HasSP3AbsModifier) {
2889       // This is a workaround for handling expressions
2890       // as arguments of SP3 'abs' modifier, for example:
2891       //     |1.0|
2892       //     |-1|
2893       //     |1+x|
2894       // This syntax is not compatible with syntax of standard
2895       // MC expressions (due to the trailing '|').
2896       SMLoc EndLoc;
2897       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2898         return MatchOperand_ParseFail;
2899     } else {
2900       if (Parser.parseExpression(Expr))
2901         return MatchOperand_ParseFail;
2902     }
2903 
2904     if (Expr->evaluateAsAbsolute(IntVal)) {
2905       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2906     } else {
2907       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2908     }
2909 
2910     return MatchOperand_Success;
2911   }
2912 
2913   return MatchOperand_NoMatch;
2914 }
2915 
2916 OperandMatchResultTy
2917 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2918   if (!isRegister())
2919     return MatchOperand_NoMatch;
2920 
2921   if (auto R = parseRegister()) {
2922     assert(R->isReg());
2923     Operands.push_back(std::move(R));
2924     return MatchOperand_Success;
2925   }
2926   return MatchOperand_ParseFail;
2927 }
2928 
2929 OperandMatchResultTy
2930 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2931   auto res = parseReg(Operands);
2932   if (res != MatchOperand_NoMatch) {
2933     return res;
2934   } else if (isModifier()) {
2935     return MatchOperand_NoMatch;
2936   } else {
2937     return parseImm(Operands, HasSP3AbsMod);
2938   }
2939 }
2940 
2941 bool
2942 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2943   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2944     const auto &str = Token.getString();
2945     return str == "abs" || str == "neg" || str == "sext";
2946   }
2947   return false;
2948 }
2949 
2950 bool
2951 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2952   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2953 }
2954 
2955 bool
2956 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2957   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2958 }
2959 
2960 bool
2961 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2962   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2963 }
2964 
2965 // Check if this is an operand modifier or an opcode modifier
2966 // which may look like an expression but it is not. We should
2967 // avoid parsing these modifiers as expressions. Currently
2968 // recognized sequences are:
2969 //   |...|
2970 //   abs(...)
2971 //   neg(...)
2972 //   sext(...)
2973 //   -reg
2974 //   -|...|
2975 //   -abs(...)
2976 //   name:...
2977 // Note that simple opcode modifiers like 'gds' may be parsed as
2978 // expressions; this is a special case. See getExpressionAsToken.
2979 //
2980 bool
2981 AMDGPUAsmParser::isModifier() {
2982 
2983   AsmToken Tok = getToken();
2984   AsmToken NextToken[2];
2985   peekTokens(NextToken);
2986 
2987   return isOperandModifier(Tok, NextToken[0]) ||
2988          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2989          isOpcodeModifierWithVal(Tok, NextToken[0]);
2990 }
2991 
2992 // Check if the current token is an SP3 'neg' modifier.
2993 // Currently this modifier is allowed in the following context:
2994 //
2995 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2996 // 2. Before an 'abs' modifier: -abs(...)
2997 // 3. Before an SP3 'abs' modifier: -|...|
2998 //
2999 // In all other cases "-" is handled as a part
3000 // of an expression that follows the sign.
3001 //
3002 // Note: When "-" is followed by an integer literal,
3003 // this is interpreted as integer negation rather
3004 // than a floating-point NEG modifier applied to N.
3005 // Beside being contr-intuitive, such use of floating-point
3006 // NEG modifier would have resulted in different meaning
3007 // of integer literals used with VOP1/2/C and VOP3,
3008 // for example:
3009 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3010 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3011 // Negative fp literals with preceding "-" are
3012 // handled likewise for uniformity
3013 //
3014 bool
3015 AMDGPUAsmParser::parseSP3NegModifier() {
3016 
3017   AsmToken NextToken[2];
3018   peekTokens(NextToken);
3019 
3020   if (isToken(AsmToken::Minus) &&
3021       (isRegister(NextToken[0], NextToken[1]) ||
3022        NextToken[0].is(AsmToken::Pipe) ||
3023        isId(NextToken[0], "abs"))) {
3024     lex();
3025     return true;
3026   }
3027 
3028   return false;
3029 }
3030 
3031 OperandMatchResultTy
3032 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3033                                               bool AllowImm) {
3034   bool Neg, SP3Neg;
3035   bool Abs, SP3Abs;
3036   SMLoc Loc;
3037 
3038   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3039   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3040     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3041     return MatchOperand_ParseFail;
3042   }
3043 
3044   SP3Neg = parseSP3NegModifier();
3045 
3046   Loc = getLoc();
3047   Neg = trySkipId("neg");
3048   if (Neg && SP3Neg) {
3049     Error(Loc, "expected register or immediate");
3050     return MatchOperand_ParseFail;
3051   }
3052   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3053     return MatchOperand_ParseFail;
3054 
3055   Abs = trySkipId("abs");
3056   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3057     return MatchOperand_ParseFail;
3058 
3059   Loc = getLoc();
3060   SP3Abs = trySkipToken(AsmToken::Pipe);
3061   if (Abs && SP3Abs) {
3062     Error(Loc, "expected register or immediate");
3063     return MatchOperand_ParseFail;
3064   }
3065 
3066   OperandMatchResultTy Res;
3067   if (AllowImm) {
3068     Res = parseRegOrImm(Operands, SP3Abs);
3069   } else {
3070     Res = parseReg(Operands);
3071   }
3072   if (Res != MatchOperand_Success) {
3073     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3074   }
3075 
3076   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3077     return MatchOperand_ParseFail;
3078   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3079     return MatchOperand_ParseFail;
3080   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3081     return MatchOperand_ParseFail;
3082 
3083   AMDGPUOperand::Modifiers Mods;
3084   Mods.Abs = Abs || SP3Abs;
3085   Mods.Neg = Neg || SP3Neg;
3086 
3087   if (Mods.hasFPModifiers()) {
3088     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3089     if (Op.isExpr()) {
3090       Error(Op.getStartLoc(), "expected an absolute expression");
3091       return MatchOperand_ParseFail;
3092     }
3093     Op.setModifiers(Mods);
3094   }
3095   return MatchOperand_Success;
3096 }
3097 
3098 OperandMatchResultTy
3099 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3100                                                bool AllowImm) {
3101   bool Sext = trySkipId("sext");
3102   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3103     return MatchOperand_ParseFail;
3104 
3105   OperandMatchResultTy Res;
3106   if (AllowImm) {
3107     Res = parseRegOrImm(Operands);
3108   } else {
3109     Res = parseReg(Operands);
3110   }
3111   if (Res != MatchOperand_Success) {
3112     return Sext? MatchOperand_ParseFail : Res;
3113   }
3114 
3115   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3116     return MatchOperand_ParseFail;
3117 
3118   AMDGPUOperand::Modifiers Mods;
3119   Mods.Sext = Sext;
3120 
3121   if (Mods.hasIntModifiers()) {
3122     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3123     if (Op.isExpr()) {
3124       Error(Op.getStartLoc(), "expected an absolute expression");
3125       return MatchOperand_ParseFail;
3126     }
3127     Op.setModifiers(Mods);
3128   }
3129 
3130   return MatchOperand_Success;
3131 }
3132 
3133 OperandMatchResultTy
3134 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3135   return parseRegOrImmWithFPInputMods(Operands, false);
3136 }
3137 
3138 OperandMatchResultTy
3139 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3140   return parseRegOrImmWithIntInputMods(Operands, false);
3141 }
3142 
3143 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3144   auto Loc = getLoc();
3145   if (trySkipId("off")) {
3146     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3147                                                 AMDGPUOperand::ImmTyOff, false));
3148     return MatchOperand_Success;
3149   }
3150 
3151   if (!isRegister())
3152     return MatchOperand_NoMatch;
3153 
3154   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3155   if (Reg) {
3156     Operands.push_back(std::move(Reg));
3157     return MatchOperand_Success;
3158   }
3159 
3160   return MatchOperand_ParseFail;
3161 
3162 }
3163 
3164 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3165   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3166 
3167   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3168       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3169       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3170       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3171     return Match_InvalidOperand;
3172 
3173   if ((TSFlags & SIInstrFlags::VOP3) &&
3174       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3175       getForcedEncodingSize() != 64)
3176     return Match_PreferE32;
3177 
3178   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3179       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3180     // v_mac_f32/16 allow only dst_sel == DWORD;
3181     auto OpNum =
3182         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3183     const auto &Op = Inst.getOperand(OpNum);
3184     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3185       return Match_InvalidOperand;
3186     }
3187   }
3188 
3189   return Match_Success;
3190 }
3191 
3192 static ArrayRef<unsigned> getAllVariants() {
3193   static const unsigned Variants[] = {
3194     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3195     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3196   };
3197 
3198   return makeArrayRef(Variants);
3199 }
3200 
3201 // What asm variants we should check
3202 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3203   if (getForcedEncodingSize() == 32) {
3204     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3205     return makeArrayRef(Variants);
3206   }
3207 
3208   if (isForcedVOP3()) {
3209     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3210     return makeArrayRef(Variants);
3211   }
3212 
3213   if (isForcedSDWA()) {
3214     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3215                                         AMDGPUAsmVariants::SDWA9};
3216     return makeArrayRef(Variants);
3217   }
3218 
3219   if (isForcedDPP()) {
3220     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3221     return makeArrayRef(Variants);
3222   }
3223 
3224   return getAllVariants();
3225 }
3226 
3227 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3228   if (getForcedEncodingSize() == 32)
3229     return "e32";
3230 
3231   if (isForcedVOP3())
3232     return "e64";
3233 
3234   if (isForcedSDWA())
3235     return "sdwa";
3236 
3237   if (isForcedDPP())
3238     return "dpp";
3239 
3240   return "";
3241 }
3242 
3243 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3244   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3245   const unsigned Num = Desc.getNumImplicitUses();
3246   for (unsigned i = 0; i < Num; ++i) {
3247     unsigned Reg = Desc.ImplicitUses[i];
3248     switch (Reg) {
3249     case AMDGPU::FLAT_SCR:
3250     case AMDGPU::VCC:
3251     case AMDGPU::VCC_LO:
3252     case AMDGPU::VCC_HI:
3253     case AMDGPU::M0:
3254       return Reg;
3255     default:
3256       break;
3257     }
3258   }
3259   return AMDGPU::NoRegister;
3260 }
3261 
3262 // NB: This code is correct only when used to check constant
3263 // bus limitations because GFX7 support no f16 inline constants.
3264 // Note that there are no cases when a GFX7 opcode violates
3265 // constant bus limitations due to the use of an f16 constant.
3266 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3267                                        unsigned OpIdx) const {
3268   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3269 
3270   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3271     return false;
3272   }
3273 
3274   const MCOperand &MO = Inst.getOperand(OpIdx);
3275 
3276   int64_t Val = MO.getImm();
3277   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3278 
3279   switch (OpSize) { // expected operand size
3280   case 8:
3281     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3282   case 4:
3283     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3284   case 2: {
3285     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3286     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3287         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3288         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3289       return AMDGPU::isInlinableIntLiteral(Val);
3290 
3291     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3292         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3293         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3294       return AMDGPU::isInlinableIntLiteralV216(Val);
3295 
3296     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3297         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3298         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3299       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3300 
3301     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3302   }
3303   default:
3304     llvm_unreachable("invalid operand size");
3305   }
3306 }
3307 
3308 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3309   if (!isGFX10Plus())
3310     return 1;
3311 
3312   switch (Opcode) {
3313   // 64-bit shift instructions can use only one scalar value input
3314   case AMDGPU::V_LSHLREV_B64_e64:
3315   case AMDGPU::V_LSHLREV_B64_gfx10:
3316   case AMDGPU::V_LSHRREV_B64_e64:
3317   case AMDGPU::V_LSHRREV_B64_gfx10:
3318   case AMDGPU::V_ASHRREV_I64_e64:
3319   case AMDGPU::V_ASHRREV_I64_gfx10:
3320   case AMDGPU::V_LSHL_B64_e64:
3321   case AMDGPU::V_LSHR_B64_e64:
3322   case AMDGPU::V_ASHR_I64_e64:
3323     return 1;
3324   default:
3325     return 2;
3326   }
3327 }
3328 
3329 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3330   const MCOperand &MO = Inst.getOperand(OpIdx);
3331   if (MO.isImm()) {
3332     return !isInlineConstant(Inst, OpIdx);
3333   } else if (MO.isReg()) {
3334     auto Reg = MO.getReg();
3335     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3336     auto PReg = mc2PseudoReg(Reg);
3337     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3338   } else {
3339     return true;
3340   }
3341 }
3342 
3343 bool
3344 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3345                                                 const OperandVector &Operands) {
3346   const unsigned Opcode = Inst.getOpcode();
3347   const MCInstrDesc &Desc = MII.get(Opcode);
3348   unsigned LastSGPR = AMDGPU::NoRegister;
3349   unsigned ConstantBusUseCount = 0;
3350   unsigned NumLiterals = 0;
3351   unsigned LiteralSize;
3352 
3353   if (Desc.TSFlags &
3354       (SIInstrFlags::VOPC |
3355        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3356        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3357        SIInstrFlags::SDWA)) {
3358     // Check special imm operands (used by madmk, etc)
3359     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3360       ++NumLiterals;
3361       LiteralSize = 4;
3362     }
3363 
3364     SmallDenseSet<unsigned> SGPRsUsed;
3365     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3366     if (SGPRUsed != AMDGPU::NoRegister) {
3367       SGPRsUsed.insert(SGPRUsed);
3368       ++ConstantBusUseCount;
3369     }
3370 
3371     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3372     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3373     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3374 
3375     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3376 
3377     for (int OpIdx : OpIndices) {
3378       if (OpIdx == -1) break;
3379 
3380       const MCOperand &MO = Inst.getOperand(OpIdx);
3381       if (usesConstantBus(Inst, OpIdx)) {
3382         if (MO.isReg()) {
3383           LastSGPR = mc2PseudoReg(MO.getReg());
3384           // Pairs of registers with a partial intersections like these
3385           //   s0, s[0:1]
3386           //   flat_scratch_lo, flat_scratch
3387           //   flat_scratch_lo, flat_scratch_hi
3388           // are theoretically valid but they are disabled anyway.
3389           // Note that this code mimics SIInstrInfo::verifyInstruction
3390           if (!SGPRsUsed.count(LastSGPR)) {
3391             SGPRsUsed.insert(LastSGPR);
3392             ++ConstantBusUseCount;
3393           }
3394         } else { // Expression or a literal
3395 
3396           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3397             continue; // special operand like VINTERP attr_chan
3398 
3399           // An instruction may use only one literal.
3400           // This has been validated on the previous step.
3401           // See validateVOPLiteral.
3402           // This literal may be used as more than one operand.
3403           // If all these operands are of the same size,
3404           // this literal counts as one scalar value.
3405           // Otherwise it counts as 2 scalar values.
3406           // See "GFX10 Shader Programming", section 3.6.2.3.
3407 
3408           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3409           if (Size < 4) Size = 4;
3410 
3411           if (NumLiterals == 0) {
3412             NumLiterals = 1;
3413             LiteralSize = Size;
3414           } else if (LiteralSize != Size) {
3415             NumLiterals = 2;
3416           }
3417         }
3418       }
3419     }
3420   }
3421   ConstantBusUseCount += NumLiterals;
3422 
3423   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3424     return true;
3425 
3426   SMLoc LitLoc = getLitLoc(Operands);
3427   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3428   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3429   Error(Loc, "invalid operand (violates constant bus restrictions)");
3430   return false;
3431 }
3432 
3433 bool
3434 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3435                                                  const OperandVector &Operands) {
3436   const unsigned Opcode = Inst.getOpcode();
3437   const MCInstrDesc &Desc = MII.get(Opcode);
3438 
3439   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3440   if (DstIdx == -1 ||
3441       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3442     return true;
3443   }
3444 
3445   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3446 
3447   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3448   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3449   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3450 
3451   assert(DstIdx != -1);
3452   const MCOperand &Dst = Inst.getOperand(DstIdx);
3453   assert(Dst.isReg());
3454 
3455   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3456 
3457   for (int SrcIdx : SrcIndices) {
3458     if (SrcIdx == -1) break;
3459     const MCOperand &Src = Inst.getOperand(SrcIdx);
3460     if (Src.isReg()) {
3461       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3462         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3463         Error(getRegLoc(SrcReg, Operands),
3464           "destination must be different than all sources");
3465         return false;
3466       }
3467     }
3468   }
3469 
3470   return true;
3471 }
3472 
3473 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3474 
3475   const unsigned Opc = Inst.getOpcode();
3476   const MCInstrDesc &Desc = MII.get(Opc);
3477 
3478   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3479     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3480     assert(ClampIdx != -1);
3481     return Inst.getOperand(ClampIdx).getImm() == 0;
3482   }
3483 
3484   return true;
3485 }
3486 
3487 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3488 
3489   const unsigned Opc = Inst.getOpcode();
3490   const MCInstrDesc &Desc = MII.get(Opc);
3491 
3492   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3493     return true;
3494 
3495   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3496   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3497   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3498 
3499   assert(VDataIdx != -1);
3500 
3501   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3502     return true;
3503 
3504   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3505   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3506   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3507   if (DMask == 0)
3508     DMask = 1;
3509 
3510   unsigned DataSize =
3511     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3512   if (hasPackedD16()) {
3513     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3514     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3515       DataSize = (DataSize + 1) / 2;
3516   }
3517 
3518   return (VDataSize / 4) == DataSize + TFESize;
3519 }
3520 
3521 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3522   const unsigned Opc = Inst.getOpcode();
3523   const MCInstrDesc &Desc = MII.get(Opc);
3524 
3525   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3526     return true;
3527 
3528   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3529 
3530   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3531       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3532   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3533   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3534   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3535   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3536 
3537   assert(VAddr0Idx != -1);
3538   assert(SrsrcIdx != -1);
3539   assert(SrsrcIdx > VAddr0Idx);
3540 
3541   if (DimIdx == -1)
3542     return true; // intersect_ray
3543 
3544   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3545   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3546   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3547   unsigned ActualAddrSize =
3548       IsNSA ? SrsrcIdx - VAddr0Idx
3549             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3550   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3551 
3552   unsigned ExpectedAddrSize =
3553       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3554 
3555   if (!IsNSA) {
3556     if (ExpectedAddrSize > 8)
3557       ExpectedAddrSize = 16;
3558 
3559     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3560     // This provides backward compatibility for assembly created
3561     // before 160b/192b/224b types were directly supported.
3562     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3563       return true;
3564   }
3565 
3566   return ActualAddrSize == ExpectedAddrSize;
3567 }
3568 
3569 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3570 
3571   const unsigned Opc = Inst.getOpcode();
3572   const MCInstrDesc &Desc = MII.get(Opc);
3573 
3574   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3575     return true;
3576   if (!Desc.mayLoad() || !Desc.mayStore())
3577     return true; // Not atomic
3578 
3579   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3580   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3581 
3582   // This is an incomplete check because image_atomic_cmpswap
3583   // may only use 0x3 and 0xf while other atomic operations
3584   // may use 0x1 and 0x3. However these limitations are
3585   // verified when we check that dmask matches dst size.
3586   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3587 }
3588 
3589 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3590 
3591   const unsigned Opc = Inst.getOpcode();
3592   const MCInstrDesc &Desc = MII.get(Opc);
3593 
3594   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3595     return true;
3596 
3597   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3598   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3599 
3600   // GATHER4 instructions use dmask in a different fashion compared to
3601   // other MIMG instructions. The only useful DMASK values are
3602   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3603   // (red,red,red,red) etc.) The ISA document doesn't mention
3604   // this.
3605   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3606 }
3607 
3608 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3609   const unsigned Opc = Inst.getOpcode();
3610   const MCInstrDesc &Desc = MII.get(Opc);
3611 
3612   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3613     return true;
3614 
3615   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3616   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3617       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3618 
3619   if (!BaseOpcode->MSAA)
3620     return true;
3621 
3622   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3623   assert(DimIdx != -1);
3624 
3625   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3626   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3627 
3628   return DimInfo->MSAA;
3629 }
3630 
3631 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3632 {
3633   switch (Opcode) {
3634   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3635   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3636   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3637     return true;
3638   default:
3639     return false;
3640   }
3641 }
3642 
3643 // movrels* opcodes should only allow VGPRS as src0.
3644 // This is specified in .td description for vop1/vop3,
3645 // but sdwa is handled differently. See isSDWAOperand.
3646 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3647                                       const OperandVector &Operands) {
3648 
3649   const unsigned Opc = Inst.getOpcode();
3650   const MCInstrDesc &Desc = MII.get(Opc);
3651 
3652   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3653     return true;
3654 
3655   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3656   assert(Src0Idx != -1);
3657 
3658   SMLoc ErrLoc;
3659   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3660   if (Src0.isReg()) {
3661     auto Reg = mc2PseudoReg(Src0.getReg());
3662     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3663     if (!isSGPR(Reg, TRI))
3664       return true;
3665     ErrLoc = getRegLoc(Reg, Operands);
3666   } else {
3667     ErrLoc = getConstLoc(Operands);
3668   }
3669 
3670   Error(ErrLoc, "source operand must be a VGPR");
3671   return false;
3672 }
3673 
3674 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3675                                           const OperandVector &Operands) {
3676 
3677   const unsigned Opc = Inst.getOpcode();
3678 
3679   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3680     return true;
3681 
3682   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3683   assert(Src0Idx != -1);
3684 
3685   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3686   if (!Src0.isReg())
3687     return true;
3688 
3689   auto Reg = mc2PseudoReg(Src0.getReg());
3690   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3691   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3692     Error(getRegLoc(Reg, Operands),
3693           "source operand must be either a VGPR or an inline constant");
3694     return false;
3695   }
3696 
3697   return true;
3698 }
3699 
3700 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3701                                    const OperandVector &Operands) {
3702   const unsigned Opc = Inst.getOpcode();
3703   const MCInstrDesc &Desc = MII.get(Opc);
3704 
3705   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3706     return true;
3707 
3708   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3709   if (Src2Idx == -1)
3710     return true;
3711 
3712   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3713   if (!Src2.isReg())
3714     return true;
3715 
3716   MCRegister Src2Reg = Src2.getReg();
3717   MCRegister DstReg = Inst.getOperand(0).getReg();
3718   if (Src2Reg == DstReg)
3719     return true;
3720 
3721   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3722   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3723     return true;
3724 
3725   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3726     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3727           "source 2 operand must not partially overlap with dst");
3728     return false;
3729   }
3730 
3731   return true;
3732 }
3733 
3734 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3735   switch (Inst.getOpcode()) {
3736   default:
3737     return true;
3738   case V_DIV_SCALE_F32_gfx6_gfx7:
3739   case V_DIV_SCALE_F32_vi:
3740   case V_DIV_SCALE_F32_gfx10:
3741   case V_DIV_SCALE_F64_gfx6_gfx7:
3742   case V_DIV_SCALE_F64_vi:
3743   case V_DIV_SCALE_F64_gfx10:
3744     break;
3745   }
3746 
3747   // TODO: Check that src0 = src1 or src2.
3748 
3749   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3750                     AMDGPU::OpName::src2_modifiers,
3751                     AMDGPU::OpName::src2_modifiers}) {
3752     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3753             .getImm() &
3754         SISrcMods::ABS) {
3755       return false;
3756     }
3757   }
3758 
3759   return true;
3760 }
3761 
3762 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3763 
3764   const unsigned Opc = Inst.getOpcode();
3765   const MCInstrDesc &Desc = MII.get(Opc);
3766 
3767   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3768     return true;
3769 
3770   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3771   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3772     if (isCI() || isSI())
3773       return false;
3774   }
3775 
3776   return true;
3777 }
3778 
3779 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3780   const unsigned Opc = Inst.getOpcode();
3781   const MCInstrDesc &Desc = MII.get(Opc);
3782 
3783   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3784     return true;
3785 
3786   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3787   if (DimIdx < 0)
3788     return true;
3789 
3790   long Imm = Inst.getOperand(DimIdx).getImm();
3791   if (Imm < 0 || Imm >= 8)
3792     return false;
3793 
3794   return true;
3795 }
3796 
3797 static bool IsRevOpcode(const unsigned Opcode)
3798 {
3799   switch (Opcode) {
3800   case AMDGPU::V_SUBREV_F32_e32:
3801   case AMDGPU::V_SUBREV_F32_e64:
3802   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3803   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3804   case AMDGPU::V_SUBREV_F32_e32_vi:
3805   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3806   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3807   case AMDGPU::V_SUBREV_F32_e64_vi:
3808 
3809   case AMDGPU::V_SUBREV_CO_U32_e32:
3810   case AMDGPU::V_SUBREV_CO_U32_e64:
3811   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3812   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3813 
3814   case AMDGPU::V_SUBBREV_U32_e32:
3815   case AMDGPU::V_SUBBREV_U32_e64:
3816   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3817   case AMDGPU::V_SUBBREV_U32_e32_vi:
3818   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3819   case AMDGPU::V_SUBBREV_U32_e64_vi:
3820 
3821   case AMDGPU::V_SUBREV_U32_e32:
3822   case AMDGPU::V_SUBREV_U32_e64:
3823   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3824   case AMDGPU::V_SUBREV_U32_e32_vi:
3825   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3826   case AMDGPU::V_SUBREV_U32_e64_vi:
3827 
3828   case AMDGPU::V_SUBREV_F16_e32:
3829   case AMDGPU::V_SUBREV_F16_e64:
3830   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3831   case AMDGPU::V_SUBREV_F16_e32_vi:
3832   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3833   case AMDGPU::V_SUBREV_F16_e64_vi:
3834 
3835   case AMDGPU::V_SUBREV_U16_e32:
3836   case AMDGPU::V_SUBREV_U16_e64:
3837   case AMDGPU::V_SUBREV_U16_e32_vi:
3838   case AMDGPU::V_SUBREV_U16_e64_vi:
3839 
3840   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3841   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3842   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3843 
3844   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3845   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3846 
3847   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3848   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3849 
3850   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3851   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3852 
3853   case AMDGPU::V_LSHRREV_B32_e32:
3854   case AMDGPU::V_LSHRREV_B32_e64:
3855   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3856   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3857   case AMDGPU::V_LSHRREV_B32_e32_vi:
3858   case AMDGPU::V_LSHRREV_B32_e64_vi:
3859   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3860   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3861 
3862   case AMDGPU::V_ASHRREV_I32_e32:
3863   case AMDGPU::V_ASHRREV_I32_e64:
3864   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3865   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3866   case AMDGPU::V_ASHRREV_I32_e32_vi:
3867   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3868   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3869   case AMDGPU::V_ASHRREV_I32_e64_vi:
3870 
3871   case AMDGPU::V_LSHLREV_B32_e32:
3872   case AMDGPU::V_LSHLREV_B32_e64:
3873   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3874   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3875   case AMDGPU::V_LSHLREV_B32_e32_vi:
3876   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3877   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3878   case AMDGPU::V_LSHLREV_B32_e64_vi:
3879 
3880   case AMDGPU::V_LSHLREV_B16_e32:
3881   case AMDGPU::V_LSHLREV_B16_e64:
3882   case AMDGPU::V_LSHLREV_B16_e32_vi:
3883   case AMDGPU::V_LSHLREV_B16_e64_vi:
3884   case AMDGPU::V_LSHLREV_B16_gfx10:
3885 
3886   case AMDGPU::V_LSHRREV_B16_e32:
3887   case AMDGPU::V_LSHRREV_B16_e64:
3888   case AMDGPU::V_LSHRREV_B16_e32_vi:
3889   case AMDGPU::V_LSHRREV_B16_e64_vi:
3890   case AMDGPU::V_LSHRREV_B16_gfx10:
3891 
3892   case AMDGPU::V_ASHRREV_I16_e32:
3893   case AMDGPU::V_ASHRREV_I16_e64:
3894   case AMDGPU::V_ASHRREV_I16_e32_vi:
3895   case AMDGPU::V_ASHRREV_I16_e64_vi:
3896   case AMDGPU::V_ASHRREV_I16_gfx10:
3897 
3898   case AMDGPU::V_LSHLREV_B64_e64:
3899   case AMDGPU::V_LSHLREV_B64_gfx10:
3900   case AMDGPU::V_LSHLREV_B64_vi:
3901 
3902   case AMDGPU::V_LSHRREV_B64_e64:
3903   case AMDGPU::V_LSHRREV_B64_gfx10:
3904   case AMDGPU::V_LSHRREV_B64_vi:
3905 
3906   case AMDGPU::V_ASHRREV_I64_e64:
3907   case AMDGPU::V_ASHRREV_I64_gfx10:
3908   case AMDGPU::V_ASHRREV_I64_vi:
3909 
3910   case AMDGPU::V_PK_LSHLREV_B16:
3911   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3912   case AMDGPU::V_PK_LSHLREV_B16_vi:
3913 
3914   case AMDGPU::V_PK_LSHRREV_B16:
3915   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3916   case AMDGPU::V_PK_LSHRREV_B16_vi:
3917   case AMDGPU::V_PK_ASHRREV_I16:
3918   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3919   case AMDGPU::V_PK_ASHRREV_I16_vi:
3920     return true;
3921   default:
3922     return false;
3923   }
3924 }
3925 
3926 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3927 
3928   using namespace SIInstrFlags;
3929   const unsigned Opcode = Inst.getOpcode();
3930   const MCInstrDesc &Desc = MII.get(Opcode);
3931 
3932   // lds_direct register is defined so that it can be used
3933   // with 9-bit operands only. Ignore encodings which do not accept these.
3934   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3935   if ((Desc.TSFlags & Enc) == 0)
3936     return None;
3937 
3938   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3939     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3940     if (SrcIdx == -1)
3941       break;
3942     const auto &Src = Inst.getOperand(SrcIdx);
3943     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3944 
3945       if (isGFX90A())
3946         return StringRef("lds_direct is not supported on this GPU");
3947 
3948       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3949         return StringRef("lds_direct cannot be used with this instruction");
3950 
3951       if (SrcName != OpName::src0)
3952         return StringRef("lds_direct may be used as src0 only");
3953     }
3954   }
3955 
3956   return None;
3957 }
3958 
3959 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3960   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3961     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3962     if (Op.isFlatOffset())
3963       return Op.getStartLoc();
3964   }
3965   return getLoc();
3966 }
3967 
3968 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3969                                          const OperandVector &Operands) {
3970   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3971   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3972     return true;
3973 
3974   auto Opcode = Inst.getOpcode();
3975   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3976   assert(OpNum != -1);
3977 
3978   const auto &Op = Inst.getOperand(OpNum);
3979   if (!hasFlatOffsets() && Op.getImm() != 0) {
3980     Error(getFlatOffsetLoc(Operands),
3981           "flat offset modifier is not supported on this GPU");
3982     return false;
3983   }
3984 
3985   // For FLAT segment the offset must be positive;
3986   // MSB is ignored and forced to zero.
3987   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3988     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3989     if (!isIntN(OffsetSize, Op.getImm())) {
3990       Error(getFlatOffsetLoc(Operands),
3991             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3992       return false;
3993     }
3994   } else {
3995     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3996     if (!isUIntN(OffsetSize, Op.getImm())) {
3997       Error(getFlatOffsetLoc(Operands),
3998             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3999       return false;
4000     }
4001   }
4002 
4003   return true;
4004 }
4005 
4006 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4007   // Start with second operand because SMEM Offset cannot be dst or src0.
4008   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4009     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4010     if (Op.isSMEMOffset())
4011       return Op.getStartLoc();
4012   }
4013   return getLoc();
4014 }
4015 
4016 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4017                                          const OperandVector &Operands) {
4018   if (isCI() || isSI())
4019     return true;
4020 
4021   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4022   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4023     return true;
4024 
4025   auto Opcode = Inst.getOpcode();
4026   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4027   if (OpNum == -1)
4028     return true;
4029 
4030   const auto &Op = Inst.getOperand(OpNum);
4031   if (!Op.isImm())
4032     return true;
4033 
4034   uint64_t Offset = Op.getImm();
4035   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4036   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4037       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4038     return true;
4039 
4040   Error(getSMEMOffsetLoc(Operands),
4041         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4042                                "expected a 21-bit signed offset");
4043 
4044   return false;
4045 }
4046 
4047 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4048   unsigned Opcode = Inst.getOpcode();
4049   const MCInstrDesc &Desc = MII.get(Opcode);
4050   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4051     return true;
4052 
4053   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4054   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4055 
4056   const int OpIndices[] = { Src0Idx, Src1Idx };
4057 
4058   unsigned NumExprs = 0;
4059   unsigned NumLiterals = 0;
4060   uint32_t LiteralValue;
4061 
4062   for (int OpIdx : OpIndices) {
4063     if (OpIdx == -1) break;
4064 
4065     const MCOperand &MO = Inst.getOperand(OpIdx);
4066     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4067     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4068       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4069         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4070         if (NumLiterals == 0 || LiteralValue != Value) {
4071           LiteralValue = Value;
4072           ++NumLiterals;
4073         }
4074       } else if (MO.isExpr()) {
4075         ++NumExprs;
4076       }
4077     }
4078   }
4079 
4080   return NumLiterals + NumExprs <= 1;
4081 }
4082 
4083 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4084   const unsigned Opc = Inst.getOpcode();
4085   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4086       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4087     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4088     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4089 
4090     if (OpSel & ~3)
4091       return false;
4092   }
4093 
4094   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4095     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4096     if (OpSelIdx != -1) {
4097       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4098         return false;
4099     }
4100     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4101     if (OpSelHiIdx != -1) {
4102       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4103         return false;
4104     }
4105   }
4106 
4107   return true;
4108 }
4109 
4110 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4111                                   const OperandVector &Operands) {
4112   const unsigned Opc = Inst.getOpcode();
4113   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4114   if (DppCtrlIdx < 0)
4115     return true;
4116   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4117 
4118   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4119     // DPP64 is supported for row_newbcast only.
4120     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4121     if (Src0Idx >= 0 &&
4122         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4123       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4124       Error(S, "64 bit dpp only supports row_newbcast");
4125       return false;
4126     }
4127   }
4128 
4129   return true;
4130 }
4131 
4132 // Check if VCC register matches wavefront size
4133 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4134   auto FB = getFeatureBits();
4135   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4136     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4137 }
4138 
4139 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4140 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4141                                          const OperandVector &Operands) {
4142   unsigned Opcode = Inst.getOpcode();
4143   const MCInstrDesc &Desc = MII.get(Opcode);
4144   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4145   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4146       ImmIdx == -1)
4147     return true;
4148 
4149   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4150   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4151   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4152 
4153   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4154 
4155   unsigned NumExprs = 0;
4156   unsigned NumLiterals = 0;
4157   uint32_t LiteralValue;
4158 
4159   for (int OpIdx : OpIndices) {
4160     if (OpIdx == -1)
4161       continue;
4162 
4163     const MCOperand &MO = Inst.getOperand(OpIdx);
4164     if (!MO.isImm() && !MO.isExpr())
4165       continue;
4166     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4167       continue;
4168 
4169     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4170         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4171       Error(getConstLoc(Operands),
4172             "inline constants are not allowed for this operand");
4173       return false;
4174     }
4175 
4176     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4177       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4178       if (NumLiterals == 0 || LiteralValue != Value) {
4179         LiteralValue = Value;
4180         ++NumLiterals;
4181       }
4182     } else if (MO.isExpr()) {
4183       ++NumExprs;
4184     }
4185   }
4186   NumLiterals += NumExprs;
4187 
4188   if (!NumLiterals)
4189     return true;
4190 
4191   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4192     Error(getLitLoc(Operands), "literal operands are not supported");
4193     return false;
4194   }
4195 
4196   if (NumLiterals > 1) {
4197     Error(getLitLoc(Operands), "only one literal operand is allowed");
4198     return false;
4199   }
4200 
4201   return true;
4202 }
4203 
4204 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4205 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4206                          const MCRegisterInfo *MRI) {
4207   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4208   if (OpIdx < 0)
4209     return -1;
4210 
4211   const MCOperand &Op = Inst.getOperand(OpIdx);
4212   if (!Op.isReg())
4213     return -1;
4214 
4215   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4216   auto Reg = Sub ? Sub : Op.getReg();
4217   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4218   return AGPR32.contains(Reg) ? 1 : 0;
4219 }
4220 
4221 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4222   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4223   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4224                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4225                   SIInstrFlags::DS)) == 0)
4226     return true;
4227 
4228   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4229                                                       : AMDGPU::OpName::vdata;
4230 
4231   const MCRegisterInfo *MRI = getMRI();
4232   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4233   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4234 
4235   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4236     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4237     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4238       return false;
4239   }
4240 
4241   auto FB = getFeatureBits();
4242   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4243     if (DataAreg < 0 || DstAreg < 0)
4244       return true;
4245     return DstAreg == DataAreg;
4246   }
4247 
4248   return DstAreg < 1 && DataAreg < 1;
4249 }
4250 
4251 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4252   auto FB = getFeatureBits();
4253   if (!FB[AMDGPU::FeatureGFX90AInsts])
4254     return true;
4255 
4256   const MCRegisterInfo *MRI = getMRI();
4257   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4258   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4259   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4260     const MCOperand &Op = Inst.getOperand(I);
4261     if (!Op.isReg())
4262       continue;
4263 
4264     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4265     if (!Sub)
4266       continue;
4267 
4268     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4269       return false;
4270     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4271       return false;
4272   }
4273 
4274   return true;
4275 }
4276 
4277 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4278   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4279     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4280     if (Op.isBLGP())
4281       return Op.getStartLoc();
4282   }
4283   return SMLoc();
4284 }
4285 
4286 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4287                                    const OperandVector &Operands) {
4288   unsigned Opc = Inst.getOpcode();
4289   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4290   if (BlgpIdx == -1)
4291     return true;
4292   SMLoc BLGPLoc = getBLGPLoc(Operands);
4293   if (!BLGPLoc.isValid())
4294     return true;
4295   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4296   auto FB = getFeatureBits();
4297   bool UsesNeg = false;
4298   if (FB[AMDGPU::FeatureGFX940Insts]) {
4299     switch (Opc) {
4300     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4301     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4302     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4303     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4304       UsesNeg = true;
4305     }
4306   }
4307 
4308   if (IsNeg == UsesNeg)
4309     return true;
4310 
4311   Error(BLGPLoc,
4312         UsesNeg ? "invalid modifier: blgp is not supported"
4313                 : "invalid modifier: neg is not supported");
4314 
4315   return false;
4316 }
4317 
4318 // gfx90a has an undocumented limitation:
4319 // DS_GWS opcodes must use even aligned registers.
4320 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4321                                   const OperandVector &Operands) {
4322   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4323     return true;
4324 
4325   int Opc = Inst.getOpcode();
4326   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4327       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4328     return true;
4329 
4330   const MCRegisterInfo *MRI = getMRI();
4331   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4332   int Data0Pos =
4333       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4334   assert(Data0Pos != -1);
4335   auto Reg = Inst.getOperand(Data0Pos).getReg();
4336   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4337   if (RegIdx & 1) {
4338     SMLoc RegLoc = getRegLoc(Reg, Operands);
4339     Error(RegLoc, "vgpr must be even aligned");
4340     return false;
4341   }
4342 
4343   return true;
4344 }
4345 
4346 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4347                                             const OperandVector &Operands,
4348                                             const SMLoc &IDLoc) {
4349   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4350                                            AMDGPU::OpName::cpol);
4351   if (CPolPos == -1)
4352     return true;
4353 
4354   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4355 
4356   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4357   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4358       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4359     Error(IDLoc, "invalid cache policy for SMRD instruction");
4360     return false;
4361   }
4362 
4363   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4364     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4365     StringRef CStr(S.getPointer());
4366     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4367     Error(S, "scc is not supported on this GPU");
4368     return false;
4369   }
4370 
4371   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4372     return true;
4373 
4374   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4375     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4376       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4377                               : "instruction must use glc");
4378       return false;
4379     }
4380   } else {
4381     if (CPol & CPol::GLC) {
4382       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4383       StringRef CStr(S.getPointer());
4384       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4385       Error(S, isGFX940() ? "instruction must not use sc0"
4386                           : "instruction must not use glc");
4387       return false;
4388     }
4389   }
4390 
4391   return true;
4392 }
4393 
4394 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4395                                           const SMLoc &IDLoc,
4396                                           const OperandVector &Operands) {
4397   if (auto ErrMsg = validateLdsDirect(Inst)) {
4398     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4399     return false;
4400   }
4401   if (!validateSOPLiteral(Inst)) {
4402     Error(getLitLoc(Operands),
4403       "only one literal operand is allowed");
4404     return false;
4405   }
4406   if (!validateVOPLiteral(Inst, Operands)) {
4407     return false;
4408   }
4409   if (!validateConstantBusLimitations(Inst, Operands)) {
4410     return false;
4411   }
4412   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4413     return false;
4414   }
4415   if (!validateIntClampSupported(Inst)) {
4416     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4417       "integer clamping is not supported on this GPU");
4418     return false;
4419   }
4420   if (!validateOpSel(Inst)) {
4421     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4422       "invalid op_sel operand");
4423     return false;
4424   }
4425   if (!validateDPP(Inst, Operands)) {
4426     return false;
4427   }
4428   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4429   if (!validateMIMGD16(Inst)) {
4430     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4431       "d16 modifier is not supported on this GPU");
4432     return false;
4433   }
4434   if (!validateMIMGDim(Inst)) {
4435     Error(IDLoc, "dim modifier is required on this GPU");
4436     return false;
4437   }
4438   if (!validateMIMGMSAA(Inst)) {
4439     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4440           "invalid dim; must be MSAA type");
4441     return false;
4442   }
4443   if (!validateMIMGDataSize(Inst)) {
4444     Error(IDLoc,
4445       "image data size does not match dmask and tfe");
4446     return false;
4447   }
4448   if (!validateMIMGAddrSize(Inst)) {
4449     Error(IDLoc,
4450       "image address size does not match dim and a16");
4451     return false;
4452   }
4453   if (!validateMIMGAtomicDMask(Inst)) {
4454     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4455       "invalid atomic image dmask");
4456     return false;
4457   }
4458   if (!validateMIMGGatherDMask(Inst)) {
4459     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4460       "invalid image_gather dmask: only one bit must be set");
4461     return false;
4462   }
4463   if (!validateMovrels(Inst, Operands)) {
4464     return false;
4465   }
4466   if (!validateFlatOffset(Inst, Operands)) {
4467     return false;
4468   }
4469   if (!validateSMEMOffset(Inst, Operands)) {
4470     return false;
4471   }
4472   if (!validateMAIAccWrite(Inst, Operands)) {
4473     return false;
4474   }
4475   if (!validateMFMA(Inst, Operands)) {
4476     return false;
4477   }
4478   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4479     return false;
4480   }
4481 
4482   if (!validateAGPRLdSt(Inst)) {
4483     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4484     ? "invalid register class: data and dst should be all VGPR or AGPR"
4485     : "invalid register class: agpr loads and stores not supported on this GPU"
4486     );
4487     return false;
4488   }
4489   if (!validateVGPRAlign(Inst)) {
4490     Error(IDLoc,
4491       "invalid register class: vgpr tuples must be 64 bit aligned");
4492     return false;
4493   }
4494   if (!validateGWS(Inst, Operands)) {
4495     return false;
4496   }
4497 
4498   if (!validateBLGP(Inst, Operands)) {
4499     return false;
4500   }
4501 
4502   if (!validateDivScale(Inst)) {
4503     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4504     return false;
4505   }
4506   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4507     return false;
4508   }
4509 
4510   return true;
4511 }
4512 
4513 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4514                                             const FeatureBitset &FBS,
4515                                             unsigned VariantID = 0);
4516 
4517 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4518                                 const FeatureBitset &AvailableFeatures,
4519                                 unsigned VariantID);
4520 
4521 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4522                                        const FeatureBitset &FBS) {
4523   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4524 }
4525 
4526 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4527                                        const FeatureBitset &FBS,
4528                                        ArrayRef<unsigned> Variants) {
4529   for (auto Variant : Variants) {
4530     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4531       return true;
4532   }
4533 
4534   return false;
4535 }
4536 
4537 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4538                                                   const SMLoc &IDLoc) {
4539   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4540 
4541   // Check if requested instruction variant is supported.
4542   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4543     return false;
4544 
4545   // This instruction is not supported.
4546   // Clear any other pending errors because they are no longer relevant.
4547   getParser().clearPendingErrors();
4548 
4549   // Requested instruction variant is not supported.
4550   // Check if any other variants are supported.
4551   StringRef VariantName = getMatchedVariantName();
4552   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4553     return Error(IDLoc,
4554                  Twine(VariantName,
4555                        " variant of this instruction is not supported"));
4556   }
4557 
4558   // Finally check if this instruction is supported on any other GPU.
4559   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4560     return Error(IDLoc, "instruction not supported on this GPU");
4561   }
4562 
4563   // Instruction not supported on any GPU. Probably a typo.
4564   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4565   return Error(IDLoc, "invalid instruction" + Suggestion);
4566 }
4567 
4568 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4569                                               OperandVector &Operands,
4570                                               MCStreamer &Out,
4571                                               uint64_t &ErrorInfo,
4572                                               bool MatchingInlineAsm) {
4573   MCInst Inst;
4574   unsigned Result = Match_Success;
4575   for (auto Variant : getMatchedVariants()) {
4576     uint64_t EI;
4577     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4578                                   Variant);
4579     // We order match statuses from least to most specific. We use most specific
4580     // status as resulting
4581     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4582     if ((R == Match_Success) ||
4583         (R == Match_PreferE32) ||
4584         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4585         (R == Match_InvalidOperand && Result != Match_MissingFeature
4586                                    && Result != Match_PreferE32) ||
4587         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4588                                    && Result != Match_MissingFeature
4589                                    && Result != Match_PreferE32)) {
4590       Result = R;
4591       ErrorInfo = EI;
4592     }
4593     if (R == Match_Success)
4594       break;
4595   }
4596 
4597   if (Result == Match_Success) {
4598     if (!validateInstruction(Inst, IDLoc, Operands)) {
4599       return true;
4600     }
4601     Inst.setLoc(IDLoc);
4602     Out.emitInstruction(Inst, getSTI());
4603     return false;
4604   }
4605 
4606   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4607   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4608     return true;
4609   }
4610 
4611   switch (Result) {
4612   default: break;
4613   case Match_MissingFeature:
4614     // It has been verified that the specified instruction
4615     // mnemonic is valid. A match was found but it requires
4616     // features which are not supported on this GPU.
4617     return Error(IDLoc, "operands are not valid for this GPU or mode");
4618 
4619   case Match_InvalidOperand: {
4620     SMLoc ErrorLoc = IDLoc;
4621     if (ErrorInfo != ~0ULL) {
4622       if (ErrorInfo >= Operands.size()) {
4623         return Error(IDLoc, "too few operands for instruction");
4624       }
4625       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4626       if (ErrorLoc == SMLoc())
4627         ErrorLoc = IDLoc;
4628     }
4629     return Error(ErrorLoc, "invalid operand for instruction");
4630   }
4631 
4632   case Match_PreferE32:
4633     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4634                         "should be encoded as e32");
4635   case Match_MnemonicFail:
4636     llvm_unreachable("Invalid instructions should have been handled already");
4637   }
4638   llvm_unreachable("Implement any new match types added!");
4639 }
4640 
4641 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4642   int64_t Tmp = -1;
4643   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4644     return true;
4645   }
4646   if (getParser().parseAbsoluteExpression(Tmp)) {
4647     return true;
4648   }
4649   Ret = static_cast<uint32_t>(Tmp);
4650   return false;
4651 }
4652 
4653 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4654                                                uint32_t &Minor) {
4655   if (ParseAsAbsoluteExpression(Major))
4656     return TokError("invalid major version");
4657 
4658   if (!trySkipToken(AsmToken::Comma))
4659     return TokError("minor version number required, comma expected");
4660 
4661   if (ParseAsAbsoluteExpression(Minor))
4662     return TokError("invalid minor version");
4663 
4664   return false;
4665 }
4666 
4667 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4668   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4669     return TokError("directive only supported for amdgcn architecture");
4670 
4671   std::string TargetIDDirective;
4672   SMLoc TargetStart = getTok().getLoc();
4673   if (getParser().parseEscapedString(TargetIDDirective))
4674     return true;
4675 
4676   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4677   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4678     return getParser().Error(TargetRange.Start,
4679         (Twine(".amdgcn_target directive's target id ") +
4680          Twine(TargetIDDirective) +
4681          Twine(" does not match the specified target id ") +
4682          Twine(getTargetStreamer().getTargetID()->toString())).str());
4683 
4684   return false;
4685 }
4686 
4687 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4688   return Error(Range.Start, "value out of range", Range);
4689 }
4690 
4691 bool AMDGPUAsmParser::calculateGPRBlocks(
4692     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4693     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4694     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4695     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4696   // TODO(scott.linder): These calculations are duplicated from
4697   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4698   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4699 
4700   unsigned NumVGPRs = NextFreeVGPR;
4701   unsigned NumSGPRs = NextFreeSGPR;
4702 
4703   if (Version.Major >= 10)
4704     NumSGPRs = 0;
4705   else {
4706     unsigned MaxAddressableNumSGPRs =
4707         IsaInfo::getAddressableNumSGPRs(&getSTI());
4708 
4709     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4710         NumSGPRs > MaxAddressableNumSGPRs)
4711       return OutOfRangeError(SGPRRange);
4712 
4713     NumSGPRs +=
4714         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4715 
4716     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4717         NumSGPRs > MaxAddressableNumSGPRs)
4718       return OutOfRangeError(SGPRRange);
4719 
4720     if (Features.test(FeatureSGPRInitBug))
4721       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4722   }
4723 
4724   VGPRBlocks =
4725       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4726   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4727 
4728   return false;
4729 }
4730 
4731 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4732   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4733     return TokError("directive only supported for amdgcn architecture");
4734 
4735   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4736     return TokError("directive only supported for amdhsa OS");
4737 
4738   StringRef KernelName;
4739   if (getParser().parseIdentifier(KernelName))
4740     return true;
4741 
4742   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4743 
4744   StringSet<> Seen;
4745 
4746   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4747 
4748   SMRange VGPRRange;
4749   uint64_t NextFreeVGPR = 0;
4750   uint64_t AccumOffset = 0;
4751   uint64_t SharedVGPRCount = 0;
4752   SMRange SGPRRange;
4753   uint64_t NextFreeSGPR = 0;
4754 
4755   // Count the number of user SGPRs implied from the enabled feature bits.
4756   unsigned ImpliedUserSGPRCount = 0;
4757 
4758   // Track if the asm explicitly contains the directive for the user SGPR
4759   // count.
4760   Optional<unsigned> ExplicitUserSGPRCount;
4761   bool ReserveVCC = true;
4762   bool ReserveFlatScr = true;
4763   Optional<bool> EnableWavefrontSize32;
4764 
4765   while (true) {
4766     while (trySkipToken(AsmToken::EndOfStatement));
4767 
4768     StringRef ID;
4769     SMRange IDRange = getTok().getLocRange();
4770     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4771       return true;
4772 
4773     if (ID == ".end_amdhsa_kernel")
4774       break;
4775 
4776     if (Seen.find(ID) != Seen.end())
4777       return TokError(".amdhsa_ directives cannot be repeated");
4778     Seen.insert(ID);
4779 
4780     SMLoc ValStart = getLoc();
4781     int64_t IVal;
4782     if (getParser().parseAbsoluteExpression(IVal))
4783       return true;
4784     SMLoc ValEnd = getLoc();
4785     SMRange ValRange = SMRange(ValStart, ValEnd);
4786 
4787     if (IVal < 0)
4788       return OutOfRangeError(ValRange);
4789 
4790     uint64_t Val = IVal;
4791 
4792 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4793   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4794     return OutOfRangeError(RANGE);                                             \
4795   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4796 
4797     if (ID == ".amdhsa_group_segment_fixed_size") {
4798       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4799         return OutOfRangeError(ValRange);
4800       KD.group_segment_fixed_size = Val;
4801     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4802       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4803         return OutOfRangeError(ValRange);
4804       KD.private_segment_fixed_size = Val;
4805     } else if (ID == ".amdhsa_kernarg_size") {
4806       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4807         return OutOfRangeError(ValRange);
4808       KD.kernarg_size = Val;
4809     } else if (ID == ".amdhsa_user_sgpr_count") {
4810       ExplicitUserSGPRCount = Val;
4811     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4812       if (hasArchitectedFlatScratch())
4813         return Error(IDRange.Start,
4814                      "directive is not supported with architected flat scratch",
4815                      IDRange);
4816       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4817                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4818                        Val, ValRange);
4819       if (Val)
4820         ImpliedUserSGPRCount += 4;
4821     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4822       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4823                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4824                        ValRange);
4825       if (Val)
4826         ImpliedUserSGPRCount += 2;
4827     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4828       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4829                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4830                        ValRange);
4831       if (Val)
4832         ImpliedUserSGPRCount += 2;
4833     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4834       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4835                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4836                        Val, ValRange);
4837       if (Val)
4838         ImpliedUserSGPRCount += 2;
4839     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4840       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4841                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4842                        ValRange);
4843       if (Val)
4844         ImpliedUserSGPRCount += 2;
4845     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4846       if (hasArchitectedFlatScratch())
4847         return Error(IDRange.Start,
4848                      "directive is not supported with architected flat scratch",
4849                      IDRange);
4850       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4851                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4852                        ValRange);
4853       if (Val)
4854         ImpliedUserSGPRCount += 2;
4855     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4856       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4857                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4858                        Val, ValRange);
4859       if (Val)
4860         ImpliedUserSGPRCount += 1;
4861     } else if (ID == ".amdhsa_wavefront_size32") {
4862       if (IVersion.Major < 10)
4863         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4864       EnableWavefrontSize32 = Val;
4865       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4866                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4867                        Val, ValRange);
4868     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4869       if (hasArchitectedFlatScratch())
4870         return Error(IDRange.Start,
4871                      "directive is not supported with architected flat scratch",
4872                      IDRange);
4873       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4874                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4875     } else if (ID == ".amdhsa_enable_private_segment") {
4876       if (!hasArchitectedFlatScratch())
4877         return Error(
4878             IDRange.Start,
4879             "directive is not supported without architected flat scratch",
4880             IDRange);
4881       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4882                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4883     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4884       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4885                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4886                        ValRange);
4887     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4888       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4889                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4890                        ValRange);
4891     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4892       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4893                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4894                        ValRange);
4895     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4896       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4897                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4898                        ValRange);
4899     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4900       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4901                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4902                        ValRange);
4903     } else if (ID == ".amdhsa_next_free_vgpr") {
4904       VGPRRange = ValRange;
4905       NextFreeVGPR = Val;
4906     } else if (ID == ".amdhsa_next_free_sgpr") {
4907       SGPRRange = ValRange;
4908       NextFreeSGPR = Val;
4909     } else if (ID == ".amdhsa_accum_offset") {
4910       if (!isGFX90A())
4911         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4912       AccumOffset = Val;
4913     } else if (ID == ".amdhsa_reserve_vcc") {
4914       if (!isUInt<1>(Val))
4915         return OutOfRangeError(ValRange);
4916       ReserveVCC = Val;
4917     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4918       if (IVersion.Major < 7)
4919         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4920       if (hasArchitectedFlatScratch())
4921         return Error(IDRange.Start,
4922                      "directive is not supported with architected flat scratch",
4923                      IDRange);
4924       if (!isUInt<1>(Val))
4925         return OutOfRangeError(ValRange);
4926       ReserveFlatScr = Val;
4927     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4928       if (IVersion.Major < 8)
4929         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4930       if (!isUInt<1>(Val))
4931         return OutOfRangeError(ValRange);
4932       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4933         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4934                                  IDRange);
4935     } else if (ID == ".amdhsa_float_round_mode_32") {
4936       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4937                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4938     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4939       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4940                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4941     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4942       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4943                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4944     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4945       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4946                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4947                        ValRange);
4948     } else if (ID == ".amdhsa_dx10_clamp") {
4949       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4950                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4951     } else if (ID == ".amdhsa_ieee_mode") {
4952       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4953                        Val, ValRange);
4954     } else if (ID == ".amdhsa_fp16_overflow") {
4955       if (IVersion.Major < 9)
4956         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4957       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4958                        ValRange);
4959     } else if (ID == ".amdhsa_tg_split") {
4960       if (!isGFX90A())
4961         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4962       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4963                        ValRange);
4964     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4965       if (IVersion.Major < 10)
4966         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4967       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4968                        ValRange);
4969     } else if (ID == ".amdhsa_memory_ordered") {
4970       if (IVersion.Major < 10)
4971         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4972       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4973                        ValRange);
4974     } else if (ID == ".amdhsa_forward_progress") {
4975       if (IVersion.Major < 10)
4976         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4977       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4978                        ValRange);
4979     } else if (ID == ".amdhsa_shared_vgpr_count") {
4980       if (IVersion.Major < 10)
4981         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4982       SharedVGPRCount = Val;
4983       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
4984                        COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
4985                        ValRange);
4986     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4987       PARSE_BITS_ENTRY(
4988           KD.compute_pgm_rsrc2,
4989           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4990           ValRange);
4991     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4992       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4993                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4994                        Val, ValRange);
4995     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4996       PARSE_BITS_ENTRY(
4997           KD.compute_pgm_rsrc2,
4998           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4999           ValRange);
5000     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5001       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5002                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5003                        Val, ValRange);
5004     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5005       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5006                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5007                        Val, ValRange);
5008     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5009       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5010                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5011                        Val, ValRange);
5012     } else if (ID == ".amdhsa_exception_int_div_zero") {
5013       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5014                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5015                        Val, ValRange);
5016     } else {
5017       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5018     }
5019 
5020 #undef PARSE_BITS_ENTRY
5021   }
5022 
5023   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5024     return TokError(".amdhsa_next_free_vgpr directive is required");
5025 
5026   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5027     return TokError(".amdhsa_next_free_sgpr directive is required");
5028 
5029   unsigned VGPRBlocks;
5030   unsigned SGPRBlocks;
5031   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5032                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5033                          EnableWavefrontSize32, NextFreeVGPR,
5034                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5035                          SGPRBlocks))
5036     return true;
5037 
5038   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5039           VGPRBlocks))
5040     return OutOfRangeError(VGPRRange);
5041   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5042                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5043 
5044   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5045           SGPRBlocks))
5046     return OutOfRangeError(SGPRRange);
5047   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5048                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5049                   SGPRBlocks);
5050 
5051   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5052     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5053                     "enabled user SGPRs");
5054 
5055   unsigned UserSGPRCount =
5056       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5057 
5058   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5059     return TokError("too many user SGPRs enabled");
5060   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5061                   UserSGPRCount);
5062 
5063   if (isGFX90A()) {
5064     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5065       return TokError(".amdhsa_accum_offset directive is required");
5066     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5067       return TokError("accum_offset should be in range [4..256] in "
5068                       "increments of 4");
5069     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5070       return TokError("accum_offset exceeds total VGPR allocation");
5071     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5072                     (AccumOffset / 4 - 1));
5073   }
5074 
5075   if (IVersion.Major == 10) {
5076     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5077     if (SharedVGPRCount && EnableWavefrontSize32) {
5078       return TokError("shared_vgpr_count directive not valid on "
5079                       "wavefront size 32");
5080     }
5081     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5082       return TokError("shared_vgpr_count*2 + "
5083                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5084                       "exceed 63\n");
5085     }
5086   }
5087 
5088   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5089       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5090       ReserveFlatScr);
5091   return false;
5092 }
5093 
5094 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5095   uint32_t Major;
5096   uint32_t Minor;
5097 
5098   if (ParseDirectiveMajorMinor(Major, Minor))
5099     return true;
5100 
5101   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5102   return false;
5103 }
5104 
5105 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5106   uint32_t Major;
5107   uint32_t Minor;
5108   uint32_t Stepping;
5109   StringRef VendorName;
5110   StringRef ArchName;
5111 
5112   // If this directive has no arguments, then use the ISA version for the
5113   // targeted GPU.
5114   if (isToken(AsmToken::EndOfStatement)) {
5115     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5116     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5117                                                         ISA.Stepping,
5118                                                         "AMD", "AMDGPU");
5119     return false;
5120   }
5121 
5122   if (ParseDirectiveMajorMinor(Major, Minor))
5123     return true;
5124 
5125   if (!trySkipToken(AsmToken::Comma))
5126     return TokError("stepping version number required, comma expected");
5127 
5128   if (ParseAsAbsoluteExpression(Stepping))
5129     return TokError("invalid stepping version");
5130 
5131   if (!trySkipToken(AsmToken::Comma))
5132     return TokError("vendor name required, comma expected");
5133 
5134   if (!parseString(VendorName, "invalid vendor name"))
5135     return true;
5136 
5137   if (!trySkipToken(AsmToken::Comma))
5138     return TokError("arch name required, comma expected");
5139 
5140   if (!parseString(ArchName, "invalid arch name"))
5141     return true;
5142 
5143   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5144                                                       VendorName, ArchName);
5145   return false;
5146 }
5147 
5148 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5149                                                amd_kernel_code_t &Header) {
5150   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5151   // assembly for backwards compatibility.
5152   if (ID == "max_scratch_backing_memory_byte_size") {
5153     Parser.eatToEndOfStatement();
5154     return false;
5155   }
5156 
5157   SmallString<40> ErrStr;
5158   raw_svector_ostream Err(ErrStr);
5159   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5160     return TokError(Err.str());
5161   }
5162   Lex();
5163 
5164   if (ID == "enable_wavefront_size32") {
5165     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5166       if (!isGFX10Plus())
5167         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5168       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5169         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5170     } else {
5171       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5172         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5173     }
5174   }
5175 
5176   if (ID == "wavefront_size") {
5177     if (Header.wavefront_size == 5) {
5178       if (!isGFX10Plus())
5179         return TokError("wavefront_size=5 is only allowed on GFX10+");
5180       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5181         return TokError("wavefront_size=5 requires +WavefrontSize32");
5182     } else if (Header.wavefront_size == 6) {
5183       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5184         return TokError("wavefront_size=6 requires +WavefrontSize64");
5185     }
5186   }
5187 
5188   if (ID == "enable_wgp_mode") {
5189     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5190         !isGFX10Plus())
5191       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5192   }
5193 
5194   if (ID == "enable_mem_ordered") {
5195     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5196         !isGFX10Plus())
5197       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5198   }
5199 
5200   if (ID == "enable_fwd_progress") {
5201     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5202         !isGFX10Plus())
5203       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5204   }
5205 
5206   return false;
5207 }
5208 
5209 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5210   amd_kernel_code_t Header;
5211   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5212 
5213   while (true) {
5214     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5215     // will set the current token to EndOfStatement.
5216     while(trySkipToken(AsmToken::EndOfStatement));
5217 
5218     StringRef ID;
5219     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5220       return true;
5221 
5222     if (ID == ".end_amd_kernel_code_t")
5223       break;
5224 
5225     if (ParseAMDKernelCodeTValue(ID, Header))
5226       return true;
5227   }
5228 
5229   getTargetStreamer().EmitAMDKernelCodeT(Header);
5230 
5231   return false;
5232 }
5233 
5234 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5235   StringRef KernelName;
5236   if (!parseId(KernelName, "expected symbol name"))
5237     return true;
5238 
5239   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5240                                            ELF::STT_AMDGPU_HSA_KERNEL);
5241 
5242   KernelScope.initialize(getContext());
5243   return false;
5244 }
5245 
5246 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5247   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5248     return Error(getLoc(),
5249                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5250                  "architectures");
5251   }
5252 
5253   auto TargetIDDirective = getLexer().getTok().getStringContents();
5254   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5255     return Error(getParser().getTok().getLoc(), "target id must match options");
5256 
5257   getTargetStreamer().EmitISAVersion();
5258   Lex();
5259 
5260   return false;
5261 }
5262 
5263 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5264   const char *AssemblerDirectiveBegin;
5265   const char *AssemblerDirectiveEnd;
5266   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5267       isHsaAbiVersion3AndAbove(&getSTI())
5268           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5269                             HSAMD::V3::AssemblerDirectiveEnd)
5270           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5271                             HSAMD::AssemblerDirectiveEnd);
5272 
5273   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5274     return Error(getLoc(),
5275                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5276                  "not available on non-amdhsa OSes")).str());
5277   }
5278 
5279   std::string HSAMetadataString;
5280   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5281                           HSAMetadataString))
5282     return true;
5283 
5284   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5285     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5286       return Error(getLoc(), "invalid HSA metadata");
5287   } else {
5288     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5289       return Error(getLoc(), "invalid HSA metadata");
5290   }
5291 
5292   return false;
5293 }
5294 
5295 /// Common code to parse out a block of text (typically YAML) between start and
5296 /// end directives.
5297 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5298                                           const char *AssemblerDirectiveEnd,
5299                                           std::string &CollectString) {
5300 
5301   raw_string_ostream CollectStream(CollectString);
5302 
5303   getLexer().setSkipSpace(false);
5304 
5305   bool FoundEnd = false;
5306   while (!isToken(AsmToken::Eof)) {
5307     while (isToken(AsmToken::Space)) {
5308       CollectStream << getTokenStr();
5309       Lex();
5310     }
5311 
5312     if (trySkipId(AssemblerDirectiveEnd)) {
5313       FoundEnd = true;
5314       break;
5315     }
5316 
5317     CollectStream << Parser.parseStringToEndOfStatement()
5318                   << getContext().getAsmInfo()->getSeparatorString();
5319 
5320     Parser.eatToEndOfStatement();
5321   }
5322 
5323   getLexer().setSkipSpace(true);
5324 
5325   if (isToken(AsmToken::Eof) && !FoundEnd) {
5326     return TokError(Twine("expected directive ") +
5327                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5328   }
5329 
5330   CollectStream.flush();
5331   return false;
5332 }
5333 
5334 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5335 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5336   std::string String;
5337   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5338                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5339     return true;
5340 
5341   auto PALMetadata = getTargetStreamer().getPALMetadata();
5342   if (!PALMetadata->setFromString(String))
5343     return Error(getLoc(), "invalid PAL metadata");
5344   return false;
5345 }
5346 
5347 /// Parse the assembler directive for old linear-format PAL metadata.
5348 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5349   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5350     return Error(getLoc(),
5351                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5352                  "not available on non-amdpal OSes")).str());
5353   }
5354 
5355   auto PALMetadata = getTargetStreamer().getPALMetadata();
5356   PALMetadata->setLegacy();
5357   for (;;) {
5358     uint32_t Key, Value;
5359     if (ParseAsAbsoluteExpression(Key)) {
5360       return TokError(Twine("invalid value in ") +
5361                       Twine(PALMD::AssemblerDirective));
5362     }
5363     if (!trySkipToken(AsmToken::Comma)) {
5364       return TokError(Twine("expected an even number of values in ") +
5365                       Twine(PALMD::AssemblerDirective));
5366     }
5367     if (ParseAsAbsoluteExpression(Value)) {
5368       return TokError(Twine("invalid value in ") +
5369                       Twine(PALMD::AssemblerDirective));
5370     }
5371     PALMetadata->setRegister(Key, Value);
5372     if (!trySkipToken(AsmToken::Comma))
5373       break;
5374   }
5375   return false;
5376 }
5377 
5378 /// ParseDirectiveAMDGPULDS
5379 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5380 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5381   if (getParser().checkForValidSection())
5382     return true;
5383 
5384   StringRef Name;
5385   SMLoc NameLoc = getLoc();
5386   if (getParser().parseIdentifier(Name))
5387     return TokError("expected identifier in directive");
5388 
5389   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5390   if (parseToken(AsmToken::Comma, "expected ','"))
5391     return true;
5392 
5393   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5394 
5395   int64_t Size;
5396   SMLoc SizeLoc = getLoc();
5397   if (getParser().parseAbsoluteExpression(Size))
5398     return true;
5399   if (Size < 0)
5400     return Error(SizeLoc, "size must be non-negative");
5401   if (Size > LocalMemorySize)
5402     return Error(SizeLoc, "size is too large");
5403 
5404   int64_t Alignment = 4;
5405   if (trySkipToken(AsmToken::Comma)) {
5406     SMLoc AlignLoc = getLoc();
5407     if (getParser().parseAbsoluteExpression(Alignment))
5408       return true;
5409     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5410       return Error(AlignLoc, "alignment must be a power of two");
5411 
5412     // Alignment larger than the size of LDS is possible in theory, as long
5413     // as the linker manages to place to symbol at address 0, but we do want
5414     // to make sure the alignment fits nicely into a 32-bit integer.
5415     if (Alignment >= 1u << 31)
5416       return Error(AlignLoc, "alignment is too large");
5417   }
5418 
5419   if (parseToken(AsmToken::EndOfStatement,
5420                  "unexpected token in '.amdgpu_lds' directive"))
5421     return true;
5422 
5423   Symbol->redefineIfPossible();
5424   if (!Symbol->isUndefined())
5425     return Error(NameLoc, "invalid symbol redefinition");
5426 
5427   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5428   return false;
5429 }
5430 
5431 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5432   StringRef IDVal = DirectiveID.getString();
5433 
5434   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5435     if (IDVal == ".amdhsa_kernel")
5436      return ParseDirectiveAMDHSAKernel();
5437 
5438     // TODO: Restructure/combine with PAL metadata directive.
5439     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5440       return ParseDirectiveHSAMetadata();
5441   } else {
5442     if (IDVal == ".hsa_code_object_version")
5443       return ParseDirectiveHSACodeObjectVersion();
5444 
5445     if (IDVal == ".hsa_code_object_isa")
5446       return ParseDirectiveHSACodeObjectISA();
5447 
5448     if (IDVal == ".amd_kernel_code_t")
5449       return ParseDirectiveAMDKernelCodeT();
5450 
5451     if (IDVal == ".amdgpu_hsa_kernel")
5452       return ParseDirectiveAMDGPUHsaKernel();
5453 
5454     if (IDVal == ".amd_amdgpu_isa")
5455       return ParseDirectiveISAVersion();
5456 
5457     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5458       return ParseDirectiveHSAMetadata();
5459   }
5460 
5461   if (IDVal == ".amdgcn_target")
5462     return ParseDirectiveAMDGCNTarget();
5463 
5464   if (IDVal == ".amdgpu_lds")
5465     return ParseDirectiveAMDGPULDS();
5466 
5467   if (IDVal == PALMD::AssemblerDirectiveBegin)
5468     return ParseDirectivePALMetadataBegin();
5469 
5470   if (IDVal == PALMD::AssemblerDirective)
5471     return ParseDirectivePALMetadata();
5472 
5473   return true;
5474 }
5475 
5476 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5477                                            unsigned RegNo) {
5478 
5479   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5480     return isGFX9Plus();
5481 
5482   // GFX10 has 2 more SGPRs 104 and 105.
5483   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5484     return hasSGPR104_SGPR105();
5485 
5486   switch (RegNo) {
5487   case AMDGPU::SRC_SHARED_BASE:
5488   case AMDGPU::SRC_SHARED_LIMIT:
5489   case AMDGPU::SRC_PRIVATE_BASE:
5490   case AMDGPU::SRC_PRIVATE_LIMIT:
5491   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5492     return isGFX9Plus();
5493   case AMDGPU::TBA:
5494   case AMDGPU::TBA_LO:
5495   case AMDGPU::TBA_HI:
5496   case AMDGPU::TMA:
5497   case AMDGPU::TMA_LO:
5498   case AMDGPU::TMA_HI:
5499     return !isGFX9Plus();
5500   case AMDGPU::XNACK_MASK:
5501   case AMDGPU::XNACK_MASK_LO:
5502   case AMDGPU::XNACK_MASK_HI:
5503     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5504   case AMDGPU::SGPR_NULL:
5505     return isGFX10Plus();
5506   default:
5507     break;
5508   }
5509 
5510   if (isCI())
5511     return true;
5512 
5513   if (isSI() || isGFX10Plus()) {
5514     // No flat_scr on SI.
5515     // On GFX10 flat scratch is not a valid register operand and can only be
5516     // accessed with s_setreg/s_getreg.
5517     switch (RegNo) {
5518     case AMDGPU::FLAT_SCR:
5519     case AMDGPU::FLAT_SCR_LO:
5520     case AMDGPU::FLAT_SCR_HI:
5521       return false;
5522     default:
5523       return true;
5524     }
5525   }
5526 
5527   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5528   // SI/CI have.
5529   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5530     return hasSGPR102_SGPR103();
5531 
5532   return true;
5533 }
5534 
5535 OperandMatchResultTy
5536 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5537                               OperandMode Mode) {
5538   // Try to parse with a custom parser
5539   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5540 
5541   // If we successfully parsed the operand or if there as an error parsing,
5542   // we are done.
5543   //
5544   // If we are parsing after we reach EndOfStatement then this means we
5545   // are appending default values to the Operands list.  This is only done
5546   // by custom parser, so we shouldn't continue on to the generic parsing.
5547   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5548       isToken(AsmToken::EndOfStatement))
5549     return ResTy;
5550 
5551   SMLoc RBraceLoc;
5552   SMLoc LBraceLoc = getLoc();
5553   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5554     unsigned Prefix = Operands.size();
5555 
5556     for (;;) {
5557       auto Loc = getLoc();
5558       ResTy = parseReg(Operands);
5559       if (ResTy == MatchOperand_NoMatch)
5560         Error(Loc, "expected a register");
5561       if (ResTy != MatchOperand_Success)
5562         return MatchOperand_ParseFail;
5563 
5564       RBraceLoc = getLoc();
5565       if (trySkipToken(AsmToken::RBrac))
5566         break;
5567 
5568       if (!skipToken(AsmToken::Comma,
5569                      "expected a comma or a closing square bracket")) {
5570         return MatchOperand_ParseFail;
5571       }
5572     }
5573 
5574     if (Operands.size() - Prefix > 1) {
5575       Operands.insert(Operands.begin() + Prefix,
5576                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5577       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5578     }
5579 
5580     return MatchOperand_Success;
5581   }
5582 
5583   return parseRegOrImm(Operands);
5584 }
5585 
5586 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5587   // Clear any forced encodings from the previous instruction.
5588   setForcedEncodingSize(0);
5589   setForcedDPP(false);
5590   setForcedSDWA(false);
5591 
5592   if (Name.endswith("_e64")) {
5593     setForcedEncodingSize(64);
5594     return Name.substr(0, Name.size() - 4);
5595   } else if (Name.endswith("_e32")) {
5596     setForcedEncodingSize(32);
5597     return Name.substr(0, Name.size() - 4);
5598   } else if (Name.endswith("_dpp")) {
5599     setForcedDPP(true);
5600     return Name.substr(0, Name.size() - 4);
5601   } else if (Name.endswith("_sdwa")) {
5602     setForcedSDWA(true);
5603     return Name.substr(0, Name.size() - 5);
5604   }
5605   return Name;
5606 }
5607 
5608 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5609                                        StringRef Name,
5610                                        SMLoc NameLoc, OperandVector &Operands) {
5611   // Add the instruction mnemonic
5612   Name = parseMnemonicSuffix(Name);
5613   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5614 
5615   bool IsMIMG = Name.startswith("image_");
5616 
5617   while (!trySkipToken(AsmToken::EndOfStatement)) {
5618     OperandMode Mode = OperandMode_Default;
5619     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5620       Mode = OperandMode_NSA;
5621     CPolSeen = 0;
5622     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5623 
5624     if (Res != MatchOperand_Success) {
5625       checkUnsupportedInstruction(Name, NameLoc);
5626       if (!Parser.hasPendingError()) {
5627         // FIXME: use real operand location rather than the current location.
5628         StringRef Msg =
5629           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5630                                             "not a valid operand.";
5631         Error(getLoc(), Msg);
5632       }
5633       while (!trySkipToken(AsmToken::EndOfStatement)) {
5634         lex();
5635       }
5636       return true;
5637     }
5638 
5639     // Eat the comma or space if there is one.
5640     trySkipToken(AsmToken::Comma);
5641   }
5642 
5643   return false;
5644 }
5645 
5646 //===----------------------------------------------------------------------===//
5647 // Utility functions
5648 //===----------------------------------------------------------------------===//
5649 
5650 OperandMatchResultTy
5651 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5652 
5653   if (!trySkipId(Prefix, AsmToken::Colon))
5654     return MatchOperand_NoMatch;
5655 
5656   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5657 }
5658 
5659 OperandMatchResultTy
5660 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5661                                     AMDGPUOperand::ImmTy ImmTy,
5662                                     bool (*ConvertResult)(int64_t&)) {
5663   SMLoc S = getLoc();
5664   int64_t Value = 0;
5665 
5666   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5667   if (Res != MatchOperand_Success)
5668     return Res;
5669 
5670   if (ConvertResult && !ConvertResult(Value)) {
5671     Error(S, "invalid " + StringRef(Prefix) + " value.");
5672   }
5673 
5674   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5675   return MatchOperand_Success;
5676 }
5677 
5678 OperandMatchResultTy
5679 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5680                                              OperandVector &Operands,
5681                                              AMDGPUOperand::ImmTy ImmTy,
5682                                              bool (*ConvertResult)(int64_t&)) {
5683   SMLoc S = getLoc();
5684   if (!trySkipId(Prefix, AsmToken::Colon))
5685     return MatchOperand_NoMatch;
5686 
5687   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5688     return MatchOperand_ParseFail;
5689 
5690   unsigned Val = 0;
5691   const unsigned MaxSize = 4;
5692 
5693   // FIXME: How to verify the number of elements matches the number of src
5694   // operands?
5695   for (int I = 0; ; ++I) {
5696     int64_t Op;
5697     SMLoc Loc = getLoc();
5698     if (!parseExpr(Op))
5699       return MatchOperand_ParseFail;
5700 
5701     if (Op != 0 && Op != 1) {
5702       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5703       return MatchOperand_ParseFail;
5704     }
5705 
5706     Val |= (Op << I);
5707 
5708     if (trySkipToken(AsmToken::RBrac))
5709       break;
5710 
5711     if (I + 1 == MaxSize) {
5712       Error(getLoc(), "expected a closing square bracket");
5713       return MatchOperand_ParseFail;
5714     }
5715 
5716     if (!skipToken(AsmToken::Comma, "expected a comma"))
5717       return MatchOperand_ParseFail;
5718   }
5719 
5720   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5721   return MatchOperand_Success;
5722 }
5723 
5724 OperandMatchResultTy
5725 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5726                                AMDGPUOperand::ImmTy ImmTy) {
5727   int64_t Bit;
5728   SMLoc S = getLoc();
5729 
5730   if (trySkipId(Name)) {
5731     Bit = 1;
5732   } else if (trySkipId("no", Name)) {
5733     Bit = 0;
5734   } else {
5735     return MatchOperand_NoMatch;
5736   }
5737 
5738   if (Name == "r128" && !hasMIMG_R128()) {
5739     Error(S, "r128 modifier is not supported on this GPU");
5740     return MatchOperand_ParseFail;
5741   }
5742   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5743     Error(S, "a16 modifier is not supported on this GPU");
5744     return MatchOperand_ParseFail;
5745   }
5746 
5747   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5748     ImmTy = AMDGPUOperand::ImmTyR128A16;
5749 
5750   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5751   return MatchOperand_Success;
5752 }
5753 
5754 OperandMatchResultTy
5755 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5756   unsigned CPolOn = 0;
5757   unsigned CPolOff = 0;
5758   SMLoc S = getLoc();
5759 
5760   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5761   if (isGFX940() && !Mnemo.startswith("s_")) {
5762     if (trySkipId("sc0"))
5763       CPolOn = AMDGPU::CPol::SC0;
5764     else if (trySkipId("nosc0"))
5765       CPolOff = AMDGPU::CPol::SC0;
5766     else if (trySkipId("nt"))
5767       CPolOn = AMDGPU::CPol::NT;
5768     else if (trySkipId("nont"))
5769       CPolOff = AMDGPU::CPol::NT;
5770     else if (trySkipId("sc1"))
5771       CPolOn = AMDGPU::CPol::SC1;
5772     else if (trySkipId("nosc1"))
5773       CPolOff = AMDGPU::CPol::SC1;
5774     else
5775       return MatchOperand_NoMatch;
5776   }
5777   else if (trySkipId("glc"))
5778     CPolOn = AMDGPU::CPol::GLC;
5779   else if (trySkipId("noglc"))
5780     CPolOff = AMDGPU::CPol::GLC;
5781   else if (trySkipId("slc"))
5782     CPolOn = AMDGPU::CPol::SLC;
5783   else if (trySkipId("noslc"))
5784     CPolOff = AMDGPU::CPol::SLC;
5785   else if (trySkipId("dlc"))
5786     CPolOn = AMDGPU::CPol::DLC;
5787   else if (trySkipId("nodlc"))
5788     CPolOff = AMDGPU::CPol::DLC;
5789   else if (trySkipId("scc"))
5790     CPolOn = AMDGPU::CPol::SCC;
5791   else if (trySkipId("noscc"))
5792     CPolOff = AMDGPU::CPol::SCC;
5793   else
5794     return MatchOperand_NoMatch;
5795 
5796   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5797     Error(S, "dlc modifier is not supported on this GPU");
5798     return MatchOperand_ParseFail;
5799   }
5800 
5801   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5802     Error(S, "scc modifier is not supported on this GPU");
5803     return MatchOperand_ParseFail;
5804   }
5805 
5806   if (CPolSeen & (CPolOn | CPolOff)) {
5807     Error(S, "duplicate cache policy modifier");
5808     return MatchOperand_ParseFail;
5809   }
5810 
5811   CPolSeen |= (CPolOn | CPolOff);
5812 
5813   for (unsigned I = 1; I != Operands.size(); ++I) {
5814     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5815     if (Op.isCPol()) {
5816       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5817       return MatchOperand_Success;
5818     }
5819   }
5820 
5821   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5822                                               AMDGPUOperand::ImmTyCPol));
5823 
5824   return MatchOperand_Success;
5825 }
5826 
5827 static void addOptionalImmOperand(
5828   MCInst& Inst, const OperandVector& Operands,
5829   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5830   AMDGPUOperand::ImmTy ImmT,
5831   int64_t Default = 0) {
5832   auto i = OptionalIdx.find(ImmT);
5833   if (i != OptionalIdx.end()) {
5834     unsigned Idx = i->second;
5835     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5836   } else {
5837     Inst.addOperand(MCOperand::createImm(Default));
5838   }
5839 }
5840 
5841 OperandMatchResultTy
5842 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5843                                        StringRef &Value,
5844                                        SMLoc &StringLoc) {
5845   if (!trySkipId(Prefix, AsmToken::Colon))
5846     return MatchOperand_NoMatch;
5847 
5848   StringLoc = getLoc();
5849   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5850                                                   : MatchOperand_ParseFail;
5851 }
5852 
5853 //===----------------------------------------------------------------------===//
5854 // MTBUF format
5855 //===----------------------------------------------------------------------===//
5856 
5857 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5858                                   int64_t MaxVal,
5859                                   int64_t &Fmt) {
5860   int64_t Val;
5861   SMLoc Loc = getLoc();
5862 
5863   auto Res = parseIntWithPrefix(Pref, Val);
5864   if (Res == MatchOperand_ParseFail)
5865     return false;
5866   if (Res == MatchOperand_NoMatch)
5867     return true;
5868 
5869   if (Val < 0 || Val > MaxVal) {
5870     Error(Loc, Twine("out of range ", StringRef(Pref)));
5871     return false;
5872   }
5873 
5874   Fmt = Val;
5875   return true;
5876 }
5877 
5878 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5879 // values to live in a joint format operand in the MCInst encoding.
5880 OperandMatchResultTy
5881 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5882   using namespace llvm::AMDGPU::MTBUFFormat;
5883 
5884   int64_t Dfmt = DFMT_UNDEF;
5885   int64_t Nfmt = NFMT_UNDEF;
5886 
5887   // dfmt and nfmt can appear in either order, and each is optional.
5888   for (int I = 0; I < 2; ++I) {
5889     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5890       return MatchOperand_ParseFail;
5891 
5892     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5893       return MatchOperand_ParseFail;
5894     }
5895     // Skip optional comma between dfmt/nfmt
5896     // but guard against 2 commas following each other.
5897     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5898         !peekToken().is(AsmToken::Comma)) {
5899       trySkipToken(AsmToken::Comma);
5900     }
5901   }
5902 
5903   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5904     return MatchOperand_NoMatch;
5905 
5906   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5907   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5908 
5909   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5910   return MatchOperand_Success;
5911 }
5912 
5913 OperandMatchResultTy
5914 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5915   using namespace llvm::AMDGPU::MTBUFFormat;
5916 
5917   int64_t Fmt = UFMT_UNDEF;
5918 
5919   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5920     return MatchOperand_ParseFail;
5921 
5922   if (Fmt == UFMT_UNDEF)
5923     return MatchOperand_NoMatch;
5924 
5925   Format = Fmt;
5926   return MatchOperand_Success;
5927 }
5928 
5929 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5930                                     int64_t &Nfmt,
5931                                     StringRef FormatStr,
5932                                     SMLoc Loc) {
5933   using namespace llvm::AMDGPU::MTBUFFormat;
5934   int64_t Format;
5935 
5936   Format = getDfmt(FormatStr);
5937   if (Format != DFMT_UNDEF) {
5938     Dfmt = Format;
5939     return true;
5940   }
5941 
5942   Format = getNfmt(FormatStr, getSTI());
5943   if (Format != NFMT_UNDEF) {
5944     Nfmt = Format;
5945     return true;
5946   }
5947 
5948   Error(Loc, "unsupported format");
5949   return false;
5950 }
5951 
5952 OperandMatchResultTy
5953 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5954                                           SMLoc FormatLoc,
5955                                           int64_t &Format) {
5956   using namespace llvm::AMDGPU::MTBUFFormat;
5957 
5958   int64_t Dfmt = DFMT_UNDEF;
5959   int64_t Nfmt = NFMT_UNDEF;
5960   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5961     return MatchOperand_ParseFail;
5962 
5963   if (trySkipToken(AsmToken::Comma)) {
5964     StringRef Str;
5965     SMLoc Loc = getLoc();
5966     if (!parseId(Str, "expected a format string") ||
5967         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5968       return MatchOperand_ParseFail;
5969     }
5970     if (Dfmt == DFMT_UNDEF) {
5971       Error(Loc, "duplicate numeric format");
5972       return MatchOperand_ParseFail;
5973     } else if (Nfmt == NFMT_UNDEF) {
5974       Error(Loc, "duplicate data format");
5975       return MatchOperand_ParseFail;
5976     }
5977   }
5978 
5979   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5980   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5981 
5982   if (isGFX10Plus()) {
5983     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5984     if (Ufmt == UFMT_UNDEF) {
5985       Error(FormatLoc, "unsupported format");
5986       return MatchOperand_ParseFail;
5987     }
5988     Format = Ufmt;
5989   } else {
5990     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5991   }
5992 
5993   return MatchOperand_Success;
5994 }
5995 
5996 OperandMatchResultTy
5997 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5998                                             SMLoc Loc,
5999                                             int64_t &Format) {
6000   using namespace llvm::AMDGPU::MTBUFFormat;
6001 
6002   auto Id = getUnifiedFormat(FormatStr);
6003   if (Id == UFMT_UNDEF)
6004     return MatchOperand_NoMatch;
6005 
6006   if (!isGFX10Plus()) {
6007     Error(Loc, "unified format is not supported on this GPU");
6008     return MatchOperand_ParseFail;
6009   }
6010 
6011   Format = Id;
6012   return MatchOperand_Success;
6013 }
6014 
6015 OperandMatchResultTy
6016 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6017   using namespace llvm::AMDGPU::MTBUFFormat;
6018   SMLoc Loc = getLoc();
6019 
6020   if (!parseExpr(Format))
6021     return MatchOperand_ParseFail;
6022   if (!isValidFormatEncoding(Format, getSTI())) {
6023     Error(Loc, "out of range format");
6024     return MatchOperand_ParseFail;
6025   }
6026 
6027   return MatchOperand_Success;
6028 }
6029 
6030 OperandMatchResultTy
6031 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6032   using namespace llvm::AMDGPU::MTBUFFormat;
6033 
6034   if (!trySkipId("format", AsmToken::Colon))
6035     return MatchOperand_NoMatch;
6036 
6037   if (trySkipToken(AsmToken::LBrac)) {
6038     StringRef FormatStr;
6039     SMLoc Loc = getLoc();
6040     if (!parseId(FormatStr, "expected a format string"))
6041       return MatchOperand_ParseFail;
6042 
6043     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6044     if (Res == MatchOperand_NoMatch)
6045       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6046     if (Res != MatchOperand_Success)
6047       return Res;
6048 
6049     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6050       return MatchOperand_ParseFail;
6051 
6052     return MatchOperand_Success;
6053   }
6054 
6055   return parseNumericFormat(Format);
6056 }
6057 
6058 OperandMatchResultTy
6059 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6060   using namespace llvm::AMDGPU::MTBUFFormat;
6061 
6062   int64_t Format = getDefaultFormatEncoding(getSTI());
6063   OperandMatchResultTy Res;
6064   SMLoc Loc = getLoc();
6065 
6066   // Parse legacy format syntax.
6067   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6068   if (Res == MatchOperand_ParseFail)
6069     return Res;
6070 
6071   bool FormatFound = (Res == MatchOperand_Success);
6072 
6073   Operands.push_back(
6074     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6075 
6076   if (FormatFound)
6077     trySkipToken(AsmToken::Comma);
6078 
6079   if (isToken(AsmToken::EndOfStatement)) {
6080     // We are expecting an soffset operand,
6081     // but let matcher handle the error.
6082     return MatchOperand_Success;
6083   }
6084 
6085   // Parse soffset.
6086   Res = parseRegOrImm(Operands);
6087   if (Res != MatchOperand_Success)
6088     return Res;
6089 
6090   trySkipToken(AsmToken::Comma);
6091 
6092   if (!FormatFound) {
6093     Res = parseSymbolicOrNumericFormat(Format);
6094     if (Res == MatchOperand_ParseFail)
6095       return Res;
6096     if (Res == MatchOperand_Success) {
6097       auto Size = Operands.size();
6098       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6099       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6100       Op.setImm(Format);
6101     }
6102     return MatchOperand_Success;
6103   }
6104 
6105   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6106     Error(getLoc(), "duplicate format");
6107     return MatchOperand_ParseFail;
6108   }
6109   return MatchOperand_Success;
6110 }
6111 
6112 //===----------------------------------------------------------------------===//
6113 // ds
6114 //===----------------------------------------------------------------------===//
6115 
6116 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6117                                     const OperandVector &Operands) {
6118   OptionalImmIndexMap OptionalIdx;
6119 
6120   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6121     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6122 
6123     // Add the register arguments
6124     if (Op.isReg()) {
6125       Op.addRegOperands(Inst, 1);
6126       continue;
6127     }
6128 
6129     // Handle optional arguments
6130     OptionalIdx[Op.getImmTy()] = i;
6131   }
6132 
6133   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6134   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6135   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6136 
6137   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6138 }
6139 
6140 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6141                                 bool IsGdsHardcoded) {
6142   OptionalImmIndexMap OptionalIdx;
6143 
6144   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6145     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6146 
6147     // Add the register arguments
6148     if (Op.isReg()) {
6149       Op.addRegOperands(Inst, 1);
6150       continue;
6151     }
6152 
6153     if (Op.isToken() && Op.getToken() == "gds") {
6154       IsGdsHardcoded = true;
6155       continue;
6156     }
6157 
6158     // Handle optional arguments
6159     OptionalIdx[Op.getImmTy()] = i;
6160   }
6161 
6162   AMDGPUOperand::ImmTy OffsetType =
6163     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6164      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6165      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6166                                                       AMDGPUOperand::ImmTyOffset;
6167 
6168   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6169 
6170   if (!IsGdsHardcoded) {
6171     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6172   }
6173   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6174 }
6175 
6176 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6177   OptionalImmIndexMap OptionalIdx;
6178 
6179   unsigned OperandIdx[4];
6180   unsigned EnMask = 0;
6181   int SrcIdx = 0;
6182 
6183   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6184     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6185 
6186     // Add the register arguments
6187     if (Op.isReg()) {
6188       assert(SrcIdx < 4);
6189       OperandIdx[SrcIdx] = Inst.size();
6190       Op.addRegOperands(Inst, 1);
6191       ++SrcIdx;
6192       continue;
6193     }
6194 
6195     if (Op.isOff()) {
6196       assert(SrcIdx < 4);
6197       OperandIdx[SrcIdx] = Inst.size();
6198       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6199       ++SrcIdx;
6200       continue;
6201     }
6202 
6203     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6204       Op.addImmOperands(Inst, 1);
6205       continue;
6206     }
6207 
6208     if (Op.isToken() && Op.getToken() == "done")
6209       continue;
6210 
6211     // Handle optional arguments
6212     OptionalIdx[Op.getImmTy()] = i;
6213   }
6214 
6215   assert(SrcIdx == 4);
6216 
6217   bool Compr = false;
6218   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6219     Compr = true;
6220     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6221     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6222     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6223   }
6224 
6225   for (auto i = 0; i < SrcIdx; ++i) {
6226     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6227       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6228     }
6229   }
6230 
6231   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6232   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6233 
6234   Inst.addOperand(MCOperand::createImm(EnMask));
6235 }
6236 
6237 //===----------------------------------------------------------------------===//
6238 // s_waitcnt
6239 //===----------------------------------------------------------------------===//
6240 
6241 static bool
6242 encodeCnt(
6243   const AMDGPU::IsaVersion ISA,
6244   int64_t &IntVal,
6245   int64_t CntVal,
6246   bool Saturate,
6247   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6248   unsigned (*decode)(const IsaVersion &Version, unsigned))
6249 {
6250   bool Failed = false;
6251 
6252   IntVal = encode(ISA, IntVal, CntVal);
6253   if (CntVal != decode(ISA, IntVal)) {
6254     if (Saturate) {
6255       IntVal = encode(ISA, IntVal, -1);
6256     } else {
6257       Failed = true;
6258     }
6259   }
6260   return Failed;
6261 }
6262 
6263 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6264 
6265   SMLoc CntLoc = getLoc();
6266   StringRef CntName = getTokenStr();
6267 
6268   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6269       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6270     return false;
6271 
6272   int64_t CntVal;
6273   SMLoc ValLoc = getLoc();
6274   if (!parseExpr(CntVal))
6275     return false;
6276 
6277   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6278 
6279   bool Failed = true;
6280   bool Sat = CntName.endswith("_sat");
6281 
6282   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6283     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6284   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6285     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6286   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6287     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6288   } else {
6289     Error(CntLoc, "invalid counter name " + CntName);
6290     return false;
6291   }
6292 
6293   if (Failed) {
6294     Error(ValLoc, "too large value for " + CntName);
6295     return false;
6296   }
6297 
6298   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6299     return false;
6300 
6301   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6302     if (isToken(AsmToken::EndOfStatement)) {
6303       Error(getLoc(), "expected a counter name");
6304       return false;
6305     }
6306   }
6307 
6308   return true;
6309 }
6310 
6311 OperandMatchResultTy
6312 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6313   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6314   int64_t Waitcnt = getWaitcntBitMask(ISA);
6315   SMLoc S = getLoc();
6316 
6317   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6318     while (!isToken(AsmToken::EndOfStatement)) {
6319       if (!parseCnt(Waitcnt))
6320         return MatchOperand_ParseFail;
6321     }
6322   } else {
6323     if (!parseExpr(Waitcnt))
6324       return MatchOperand_ParseFail;
6325   }
6326 
6327   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6328   return MatchOperand_Success;
6329 }
6330 
6331 bool
6332 AMDGPUOperand::isSWaitCnt() const {
6333   return isImm();
6334 }
6335 
6336 //===----------------------------------------------------------------------===//
6337 // hwreg
6338 //===----------------------------------------------------------------------===//
6339 
6340 bool
6341 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6342                                 OperandInfoTy &Offset,
6343                                 OperandInfoTy &Width) {
6344   using namespace llvm::AMDGPU::Hwreg;
6345 
6346   // The register may be specified by name or using a numeric code
6347   HwReg.Loc = getLoc();
6348   if (isToken(AsmToken::Identifier) &&
6349       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6350     HwReg.IsSymbolic = true;
6351     lex(); // skip register name
6352   } else if (!parseExpr(HwReg.Id, "a register name")) {
6353     return false;
6354   }
6355 
6356   if (trySkipToken(AsmToken::RParen))
6357     return true;
6358 
6359   // parse optional params
6360   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6361     return false;
6362 
6363   Offset.Loc = getLoc();
6364   if (!parseExpr(Offset.Id))
6365     return false;
6366 
6367   if (!skipToken(AsmToken::Comma, "expected a comma"))
6368     return false;
6369 
6370   Width.Loc = getLoc();
6371   return parseExpr(Width.Id) &&
6372          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6373 }
6374 
6375 bool
6376 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6377                                const OperandInfoTy &Offset,
6378                                const OperandInfoTy &Width) {
6379 
6380   using namespace llvm::AMDGPU::Hwreg;
6381 
6382   if (HwReg.IsSymbolic) {
6383     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6384       Error(HwReg.Loc,
6385             "specified hardware register is not supported on this GPU");
6386       return false;
6387     }
6388   } else {
6389     if (!isValidHwreg(HwReg.Id)) {
6390       Error(HwReg.Loc,
6391             "invalid code of hardware register: only 6-bit values are legal");
6392       return false;
6393     }
6394   }
6395   if (!isValidHwregOffset(Offset.Id)) {
6396     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6397     return false;
6398   }
6399   if (!isValidHwregWidth(Width.Id)) {
6400     Error(Width.Loc,
6401           "invalid bitfield width: only values from 1 to 32 are legal");
6402     return false;
6403   }
6404   return true;
6405 }
6406 
6407 OperandMatchResultTy
6408 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6409   using namespace llvm::AMDGPU::Hwreg;
6410 
6411   int64_t ImmVal = 0;
6412   SMLoc Loc = getLoc();
6413 
6414   if (trySkipId("hwreg", AsmToken::LParen)) {
6415     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6416     OperandInfoTy Offset(OFFSET_DEFAULT_);
6417     OperandInfoTy Width(WIDTH_DEFAULT_);
6418     if (parseHwregBody(HwReg, Offset, Width) &&
6419         validateHwreg(HwReg, Offset, Width)) {
6420       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6421     } else {
6422       return MatchOperand_ParseFail;
6423     }
6424   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6425     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6426       Error(Loc, "invalid immediate: only 16-bit values are legal");
6427       return MatchOperand_ParseFail;
6428     }
6429   } else {
6430     return MatchOperand_ParseFail;
6431   }
6432 
6433   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6434   return MatchOperand_Success;
6435 }
6436 
6437 bool AMDGPUOperand::isHwreg() const {
6438   return isImmTy(ImmTyHwreg);
6439 }
6440 
6441 //===----------------------------------------------------------------------===//
6442 // sendmsg
6443 //===----------------------------------------------------------------------===//
6444 
6445 bool
6446 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6447                                   OperandInfoTy &Op,
6448                                   OperandInfoTy &Stream) {
6449   using namespace llvm::AMDGPU::SendMsg;
6450 
6451   Msg.Loc = getLoc();
6452   if (isToken(AsmToken::Identifier) &&
6453       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6454     Msg.IsSymbolic = true;
6455     lex(); // skip message name
6456   } else if (!parseExpr(Msg.Id, "a message name")) {
6457     return false;
6458   }
6459 
6460   if (trySkipToken(AsmToken::Comma)) {
6461     Op.IsDefined = true;
6462     Op.Loc = getLoc();
6463     if (isToken(AsmToken::Identifier) &&
6464         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6465       lex(); // skip operation name
6466     } else if (!parseExpr(Op.Id, "an operation name")) {
6467       return false;
6468     }
6469 
6470     if (trySkipToken(AsmToken::Comma)) {
6471       Stream.IsDefined = true;
6472       Stream.Loc = getLoc();
6473       if (!parseExpr(Stream.Id))
6474         return false;
6475     }
6476   }
6477 
6478   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6479 }
6480 
6481 bool
6482 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6483                                  const OperandInfoTy &Op,
6484                                  const OperandInfoTy &Stream) {
6485   using namespace llvm::AMDGPU::SendMsg;
6486 
6487   // Validation strictness depends on whether message is specified
6488   // in a symbolic or in a numeric form. In the latter case
6489   // only encoding possibility is checked.
6490   bool Strict = Msg.IsSymbolic;
6491 
6492   if (Strict) {
6493     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6494       Error(Msg.Loc, "specified message id is not supported on this GPU");
6495       return false;
6496     }
6497   } else {
6498     if (!isValidMsgId(Msg.Id)) {
6499       Error(Msg.Loc, "invalid message id");
6500       return false;
6501     }
6502   }
6503   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6504     if (Op.IsDefined) {
6505       Error(Op.Loc, "message does not support operations");
6506     } else {
6507       Error(Msg.Loc, "missing message operation");
6508     }
6509     return false;
6510   }
6511   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6512     Error(Op.Loc, "invalid operation id");
6513     return false;
6514   }
6515   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6516     Error(Stream.Loc, "message operation does not support streams");
6517     return false;
6518   }
6519   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6520     Error(Stream.Loc, "invalid message stream id");
6521     return false;
6522   }
6523   return true;
6524 }
6525 
6526 OperandMatchResultTy
6527 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6528   using namespace llvm::AMDGPU::SendMsg;
6529 
6530   int64_t ImmVal = 0;
6531   SMLoc Loc = getLoc();
6532 
6533   if (trySkipId("sendmsg", AsmToken::LParen)) {
6534     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6535     OperandInfoTy Op(OP_NONE_);
6536     OperandInfoTy Stream(STREAM_ID_NONE_);
6537     if (parseSendMsgBody(Msg, Op, Stream) &&
6538         validateSendMsg(Msg, Op, Stream)) {
6539       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6540     } else {
6541       return MatchOperand_ParseFail;
6542     }
6543   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6544     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6545       Error(Loc, "invalid immediate: only 16-bit values are legal");
6546       return MatchOperand_ParseFail;
6547     }
6548   } else {
6549     return MatchOperand_ParseFail;
6550   }
6551 
6552   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6553   return MatchOperand_Success;
6554 }
6555 
6556 bool AMDGPUOperand::isSendMsg() const {
6557   return isImmTy(ImmTySendMsg);
6558 }
6559 
6560 //===----------------------------------------------------------------------===//
6561 // v_interp
6562 //===----------------------------------------------------------------------===//
6563 
6564 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6565   StringRef Str;
6566   SMLoc S = getLoc();
6567 
6568   if (!parseId(Str))
6569     return MatchOperand_NoMatch;
6570 
6571   int Slot = StringSwitch<int>(Str)
6572     .Case("p10", 0)
6573     .Case("p20", 1)
6574     .Case("p0", 2)
6575     .Default(-1);
6576 
6577   if (Slot == -1) {
6578     Error(S, "invalid interpolation slot");
6579     return MatchOperand_ParseFail;
6580   }
6581 
6582   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6583                                               AMDGPUOperand::ImmTyInterpSlot));
6584   return MatchOperand_Success;
6585 }
6586 
6587 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6588   StringRef Str;
6589   SMLoc S = getLoc();
6590 
6591   if (!parseId(Str))
6592     return MatchOperand_NoMatch;
6593 
6594   if (!Str.startswith("attr")) {
6595     Error(S, "invalid interpolation attribute");
6596     return MatchOperand_ParseFail;
6597   }
6598 
6599   StringRef Chan = Str.take_back(2);
6600   int AttrChan = StringSwitch<int>(Chan)
6601     .Case(".x", 0)
6602     .Case(".y", 1)
6603     .Case(".z", 2)
6604     .Case(".w", 3)
6605     .Default(-1);
6606   if (AttrChan == -1) {
6607     Error(S, "invalid or missing interpolation attribute channel");
6608     return MatchOperand_ParseFail;
6609   }
6610 
6611   Str = Str.drop_back(2).drop_front(4);
6612 
6613   uint8_t Attr;
6614   if (Str.getAsInteger(10, Attr)) {
6615     Error(S, "invalid or missing interpolation attribute number");
6616     return MatchOperand_ParseFail;
6617   }
6618 
6619   if (Attr > 63) {
6620     Error(S, "out of bounds interpolation attribute number");
6621     return MatchOperand_ParseFail;
6622   }
6623 
6624   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6625 
6626   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6627                                               AMDGPUOperand::ImmTyInterpAttr));
6628   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6629                                               AMDGPUOperand::ImmTyAttrChan));
6630   return MatchOperand_Success;
6631 }
6632 
6633 //===----------------------------------------------------------------------===//
6634 // exp
6635 //===----------------------------------------------------------------------===//
6636 
6637 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6638   using namespace llvm::AMDGPU::Exp;
6639 
6640   StringRef Str;
6641   SMLoc S = getLoc();
6642 
6643   if (!parseId(Str))
6644     return MatchOperand_NoMatch;
6645 
6646   unsigned Id = getTgtId(Str);
6647   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6648     Error(S, (Id == ET_INVALID) ?
6649                 "invalid exp target" :
6650                 "exp target is not supported on this GPU");
6651     return MatchOperand_ParseFail;
6652   }
6653 
6654   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6655                                               AMDGPUOperand::ImmTyExpTgt));
6656   return MatchOperand_Success;
6657 }
6658 
6659 //===----------------------------------------------------------------------===//
6660 // parser helpers
6661 //===----------------------------------------------------------------------===//
6662 
6663 bool
6664 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6665   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6666 }
6667 
6668 bool
6669 AMDGPUAsmParser::isId(const StringRef Id) const {
6670   return isId(getToken(), Id);
6671 }
6672 
6673 bool
6674 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6675   return getTokenKind() == Kind;
6676 }
6677 
6678 bool
6679 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6680   if (isId(Id)) {
6681     lex();
6682     return true;
6683   }
6684   return false;
6685 }
6686 
6687 bool
6688 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6689   if (isToken(AsmToken::Identifier)) {
6690     StringRef Tok = getTokenStr();
6691     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6692       lex();
6693       return true;
6694     }
6695   }
6696   return false;
6697 }
6698 
6699 bool
6700 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6701   if (isId(Id) && peekToken().is(Kind)) {
6702     lex();
6703     lex();
6704     return true;
6705   }
6706   return false;
6707 }
6708 
6709 bool
6710 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6711   if (isToken(Kind)) {
6712     lex();
6713     return true;
6714   }
6715   return false;
6716 }
6717 
6718 bool
6719 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6720                            const StringRef ErrMsg) {
6721   if (!trySkipToken(Kind)) {
6722     Error(getLoc(), ErrMsg);
6723     return false;
6724   }
6725   return true;
6726 }
6727 
6728 bool
6729 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6730   SMLoc S = getLoc();
6731 
6732   const MCExpr *Expr;
6733   if (Parser.parseExpression(Expr))
6734     return false;
6735 
6736   if (Expr->evaluateAsAbsolute(Imm))
6737     return true;
6738 
6739   if (Expected.empty()) {
6740     Error(S, "expected absolute expression");
6741   } else {
6742     Error(S, Twine("expected ", Expected) +
6743              Twine(" or an absolute expression"));
6744   }
6745   return false;
6746 }
6747 
6748 bool
6749 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6750   SMLoc S = getLoc();
6751 
6752   const MCExpr *Expr;
6753   if (Parser.parseExpression(Expr))
6754     return false;
6755 
6756   int64_t IntVal;
6757   if (Expr->evaluateAsAbsolute(IntVal)) {
6758     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6759   } else {
6760     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6761   }
6762   return true;
6763 }
6764 
6765 bool
6766 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6767   if (isToken(AsmToken::String)) {
6768     Val = getToken().getStringContents();
6769     lex();
6770     return true;
6771   } else {
6772     Error(getLoc(), ErrMsg);
6773     return false;
6774   }
6775 }
6776 
6777 bool
6778 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6779   if (isToken(AsmToken::Identifier)) {
6780     Val = getTokenStr();
6781     lex();
6782     return true;
6783   } else {
6784     if (!ErrMsg.empty())
6785       Error(getLoc(), ErrMsg);
6786     return false;
6787   }
6788 }
6789 
6790 AsmToken
6791 AMDGPUAsmParser::getToken() const {
6792   return Parser.getTok();
6793 }
6794 
6795 AsmToken
6796 AMDGPUAsmParser::peekToken() {
6797   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6798 }
6799 
6800 void
6801 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6802   auto TokCount = getLexer().peekTokens(Tokens);
6803 
6804   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6805     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6806 }
6807 
6808 AsmToken::TokenKind
6809 AMDGPUAsmParser::getTokenKind() const {
6810   return getLexer().getKind();
6811 }
6812 
6813 SMLoc
6814 AMDGPUAsmParser::getLoc() const {
6815   return getToken().getLoc();
6816 }
6817 
6818 StringRef
6819 AMDGPUAsmParser::getTokenStr() const {
6820   return getToken().getString();
6821 }
6822 
6823 void
6824 AMDGPUAsmParser::lex() {
6825   Parser.Lex();
6826 }
6827 
6828 SMLoc
6829 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6830                                const OperandVector &Operands) const {
6831   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6832     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6833     if (Test(Op))
6834       return Op.getStartLoc();
6835   }
6836   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6837 }
6838 
6839 SMLoc
6840 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6841                            const OperandVector &Operands) const {
6842   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6843   return getOperandLoc(Test, Operands);
6844 }
6845 
6846 SMLoc
6847 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6848                            const OperandVector &Operands) const {
6849   auto Test = [=](const AMDGPUOperand& Op) {
6850     return Op.isRegKind() && Op.getReg() == Reg;
6851   };
6852   return getOperandLoc(Test, Operands);
6853 }
6854 
6855 SMLoc
6856 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6857   auto Test = [](const AMDGPUOperand& Op) {
6858     return Op.IsImmKindLiteral() || Op.isExpr();
6859   };
6860   return getOperandLoc(Test, Operands);
6861 }
6862 
6863 SMLoc
6864 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6865   auto Test = [](const AMDGPUOperand& Op) {
6866     return Op.isImmKindConst();
6867   };
6868   return getOperandLoc(Test, Operands);
6869 }
6870 
6871 //===----------------------------------------------------------------------===//
6872 // swizzle
6873 //===----------------------------------------------------------------------===//
6874 
6875 LLVM_READNONE
6876 static unsigned
6877 encodeBitmaskPerm(const unsigned AndMask,
6878                   const unsigned OrMask,
6879                   const unsigned XorMask) {
6880   using namespace llvm::AMDGPU::Swizzle;
6881 
6882   return BITMASK_PERM_ENC |
6883          (AndMask << BITMASK_AND_SHIFT) |
6884          (OrMask  << BITMASK_OR_SHIFT)  |
6885          (XorMask << BITMASK_XOR_SHIFT);
6886 }
6887 
6888 bool
6889 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6890                                      const unsigned MinVal,
6891                                      const unsigned MaxVal,
6892                                      const StringRef ErrMsg,
6893                                      SMLoc &Loc) {
6894   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6895     return false;
6896   }
6897   Loc = getLoc();
6898   if (!parseExpr(Op)) {
6899     return false;
6900   }
6901   if (Op < MinVal || Op > MaxVal) {
6902     Error(Loc, ErrMsg);
6903     return false;
6904   }
6905 
6906   return true;
6907 }
6908 
6909 bool
6910 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6911                                       const unsigned MinVal,
6912                                       const unsigned MaxVal,
6913                                       const StringRef ErrMsg) {
6914   SMLoc Loc;
6915   for (unsigned i = 0; i < OpNum; ++i) {
6916     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6917       return false;
6918   }
6919 
6920   return true;
6921 }
6922 
6923 bool
6924 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6925   using namespace llvm::AMDGPU::Swizzle;
6926 
6927   int64_t Lane[LANE_NUM];
6928   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6929                            "expected a 2-bit lane id")) {
6930     Imm = QUAD_PERM_ENC;
6931     for (unsigned I = 0; I < LANE_NUM; ++I) {
6932       Imm |= Lane[I] << (LANE_SHIFT * I);
6933     }
6934     return true;
6935   }
6936   return false;
6937 }
6938 
6939 bool
6940 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6941   using namespace llvm::AMDGPU::Swizzle;
6942 
6943   SMLoc Loc;
6944   int64_t GroupSize;
6945   int64_t LaneIdx;
6946 
6947   if (!parseSwizzleOperand(GroupSize,
6948                            2, 32,
6949                            "group size must be in the interval [2,32]",
6950                            Loc)) {
6951     return false;
6952   }
6953   if (!isPowerOf2_64(GroupSize)) {
6954     Error(Loc, "group size must be a power of two");
6955     return false;
6956   }
6957   if (parseSwizzleOperand(LaneIdx,
6958                           0, GroupSize - 1,
6959                           "lane id must be in the interval [0,group size - 1]",
6960                           Loc)) {
6961     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6962     return true;
6963   }
6964   return false;
6965 }
6966 
6967 bool
6968 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6969   using namespace llvm::AMDGPU::Swizzle;
6970 
6971   SMLoc Loc;
6972   int64_t GroupSize;
6973 
6974   if (!parseSwizzleOperand(GroupSize,
6975                            2, 32,
6976                            "group size must be in the interval [2,32]",
6977                            Loc)) {
6978     return false;
6979   }
6980   if (!isPowerOf2_64(GroupSize)) {
6981     Error(Loc, "group size must be a power of two");
6982     return false;
6983   }
6984 
6985   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6986   return true;
6987 }
6988 
6989 bool
6990 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6991   using namespace llvm::AMDGPU::Swizzle;
6992 
6993   SMLoc Loc;
6994   int64_t GroupSize;
6995 
6996   if (!parseSwizzleOperand(GroupSize,
6997                            1, 16,
6998                            "group size must be in the interval [1,16]",
6999                            Loc)) {
7000     return false;
7001   }
7002   if (!isPowerOf2_64(GroupSize)) {
7003     Error(Loc, "group size must be a power of two");
7004     return false;
7005   }
7006 
7007   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7008   return true;
7009 }
7010 
7011 bool
7012 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7013   using namespace llvm::AMDGPU::Swizzle;
7014 
7015   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7016     return false;
7017   }
7018 
7019   StringRef Ctl;
7020   SMLoc StrLoc = getLoc();
7021   if (!parseString(Ctl)) {
7022     return false;
7023   }
7024   if (Ctl.size() != BITMASK_WIDTH) {
7025     Error(StrLoc, "expected a 5-character mask");
7026     return false;
7027   }
7028 
7029   unsigned AndMask = 0;
7030   unsigned OrMask = 0;
7031   unsigned XorMask = 0;
7032 
7033   for (size_t i = 0; i < Ctl.size(); ++i) {
7034     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7035     switch(Ctl[i]) {
7036     default:
7037       Error(StrLoc, "invalid mask");
7038       return false;
7039     case '0':
7040       break;
7041     case '1':
7042       OrMask |= Mask;
7043       break;
7044     case 'p':
7045       AndMask |= Mask;
7046       break;
7047     case 'i':
7048       AndMask |= Mask;
7049       XorMask |= Mask;
7050       break;
7051     }
7052   }
7053 
7054   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7055   return true;
7056 }
7057 
7058 bool
7059 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7060 
7061   SMLoc OffsetLoc = getLoc();
7062 
7063   if (!parseExpr(Imm, "a swizzle macro")) {
7064     return false;
7065   }
7066   if (!isUInt<16>(Imm)) {
7067     Error(OffsetLoc, "expected a 16-bit offset");
7068     return false;
7069   }
7070   return true;
7071 }
7072 
7073 bool
7074 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7075   using namespace llvm::AMDGPU::Swizzle;
7076 
7077   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7078 
7079     SMLoc ModeLoc = getLoc();
7080     bool Ok = false;
7081 
7082     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7083       Ok = parseSwizzleQuadPerm(Imm);
7084     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7085       Ok = parseSwizzleBitmaskPerm(Imm);
7086     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7087       Ok = parseSwizzleBroadcast(Imm);
7088     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7089       Ok = parseSwizzleSwap(Imm);
7090     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7091       Ok = parseSwizzleReverse(Imm);
7092     } else {
7093       Error(ModeLoc, "expected a swizzle mode");
7094     }
7095 
7096     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7097   }
7098 
7099   return false;
7100 }
7101 
7102 OperandMatchResultTy
7103 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7104   SMLoc S = getLoc();
7105   int64_t Imm = 0;
7106 
7107   if (trySkipId("offset")) {
7108 
7109     bool Ok = false;
7110     if (skipToken(AsmToken::Colon, "expected a colon")) {
7111       if (trySkipId("swizzle")) {
7112         Ok = parseSwizzleMacro(Imm);
7113       } else {
7114         Ok = parseSwizzleOffset(Imm);
7115       }
7116     }
7117 
7118     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7119 
7120     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7121   } else {
7122     // Swizzle "offset" operand is optional.
7123     // If it is omitted, try parsing other optional operands.
7124     return parseOptionalOpr(Operands);
7125   }
7126 }
7127 
7128 bool
7129 AMDGPUOperand::isSwizzle() const {
7130   return isImmTy(ImmTySwizzle);
7131 }
7132 
7133 //===----------------------------------------------------------------------===//
7134 // VGPR Index Mode
7135 //===----------------------------------------------------------------------===//
7136 
7137 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7138 
7139   using namespace llvm::AMDGPU::VGPRIndexMode;
7140 
7141   if (trySkipToken(AsmToken::RParen)) {
7142     return OFF;
7143   }
7144 
7145   int64_t Imm = 0;
7146 
7147   while (true) {
7148     unsigned Mode = 0;
7149     SMLoc S = getLoc();
7150 
7151     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7152       if (trySkipId(IdSymbolic[ModeId])) {
7153         Mode = 1 << ModeId;
7154         break;
7155       }
7156     }
7157 
7158     if (Mode == 0) {
7159       Error(S, (Imm == 0)?
7160                "expected a VGPR index mode or a closing parenthesis" :
7161                "expected a VGPR index mode");
7162       return UNDEF;
7163     }
7164 
7165     if (Imm & Mode) {
7166       Error(S, "duplicate VGPR index mode");
7167       return UNDEF;
7168     }
7169     Imm |= Mode;
7170 
7171     if (trySkipToken(AsmToken::RParen))
7172       break;
7173     if (!skipToken(AsmToken::Comma,
7174                    "expected a comma or a closing parenthesis"))
7175       return UNDEF;
7176   }
7177 
7178   return Imm;
7179 }
7180 
7181 OperandMatchResultTy
7182 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7183 
7184   using namespace llvm::AMDGPU::VGPRIndexMode;
7185 
7186   int64_t Imm = 0;
7187   SMLoc S = getLoc();
7188 
7189   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7190     Imm = parseGPRIdxMacro();
7191     if (Imm == UNDEF)
7192       return MatchOperand_ParseFail;
7193   } else {
7194     if (getParser().parseAbsoluteExpression(Imm))
7195       return MatchOperand_ParseFail;
7196     if (Imm < 0 || !isUInt<4>(Imm)) {
7197       Error(S, "invalid immediate: only 4-bit values are legal");
7198       return MatchOperand_ParseFail;
7199     }
7200   }
7201 
7202   Operands.push_back(
7203       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7204   return MatchOperand_Success;
7205 }
7206 
7207 bool AMDGPUOperand::isGPRIdxMode() const {
7208   return isImmTy(ImmTyGprIdxMode);
7209 }
7210 
7211 //===----------------------------------------------------------------------===//
7212 // sopp branch targets
7213 //===----------------------------------------------------------------------===//
7214 
7215 OperandMatchResultTy
7216 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7217 
7218   // Make sure we are not parsing something
7219   // that looks like a label or an expression but is not.
7220   // This will improve error messages.
7221   if (isRegister() || isModifier())
7222     return MatchOperand_NoMatch;
7223 
7224   if (!parseExpr(Operands))
7225     return MatchOperand_ParseFail;
7226 
7227   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7228   assert(Opr.isImm() || Opr.isExpr());
7229   SMLoc Loc = Opr.getStartLoc();
7230 
7231   // Currently we do not support arbitrary expressions as branch targets.
7232   // Only labels and absolute expressions are accepted.
7233   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7234     Error(Loc, "expected an absolute expression or a label");
7235   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7236     Error(Loc, "expected a 16-bit signed jump offset");
7237   }
7238 
7239   return MatchOperand_Success;
7240 }
7241 
7242 //===----------------------------------------------------------------------===//
7243 // Boolean holding registers
7244 //===----------------------------------------------------------------------===//
7245 
7246 OperandMatchResultTy
7247 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7248   return parseReg(Operands);
7249 }
7250 
7251 //===----------------------------------------------------------------------===//
7252 // mubuf
7253 //===----------------------------------------------------------------------===//
7254 
7255 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7256   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7257 }
7258 
7259 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7260                                    const OperandVector &Operands,
7261                                    bool IsAtomic,
7262                                    bool IsLds) {
7263   bool IsLdsOpcode = IsLds;
7264   bool HasLdsModifier = false;
7265   OptionalImmIndexMap OptionalIdx;
7266   unsigned FirstOperandIdx = 1;
7267   bool IsAtomicReturn = false;
7268 
7269   if (IsAtomic) {
7270     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7271       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7272       if (!Op.isCPol())
7273         continue;
7274       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7275       break;
7276     }
7277 
7278     if (!IsAtomicReturn) {
7279       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7280       if (NewOpc != -1)
7281         Inst.setOpcode(NewOpc);
7282     }
7283 
7284     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7285                       SIInstrFlags::IsAtomicRet;
7286   }
7287 
7288   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7289     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7290 
7291     // Add the register arguments
7292     if (Op.isReg()) {
7293       Op.addRegOperands(Inst, 1);
7294       // Insert a tied src for atomic return dst.
7295       // This cannot be postponed as subsequent calls to
7296       // addImmOperands rely on correct number of MC operands.
7297       if (IsAtomicReturn && i == FirstOperandIdx)
7298         Op.addRegOperands(Inst, 1);
7299       continue;
7300     }
7301 
7302     // Handle the case where soffset is an immediate
7303     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7304       Op.addImmOperands(Inst, 1);
7305       continue;
7306     }
7307 
7308     HasLdsModifier |= Op.isLDS();
7309 
7310     // Handle tokens like 'offen' which are sometimes hard-coded into the
7311     // asm string.  There are no MCInst operands for these.
7312     if (Op.isToken()) {
7313       continue;
7314     }
7315     assert(Op.isImm());
7316 
7317     // Handle optional arguments
7318     OptionalIdx[Op.getImmTy()] = i;
7319   }
7320 
7321   // This is a workaround for an llvm quirk which may result in an
7322   // incorrect instruction selection. Lds and non-lds versions of
7323   // MUBUF instructions are identical except that lds versions
7324   // have mandatory 'lds' modifier. However this modifier follows
7325   // optional modifiers and llvm asm matcher regards this 'lds'
7326   // modifier as an optional one. As a result, an lds version
7327   // of opcode may be selected even if it has no 'lds' modifier.
7328   if (IsLdsOpcode && !HasLdsModifier) {
7329     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7330     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7331       Inst.setOpcode(NoLdsOpcode);
7332       IsLdsOpcode = false;
7333     }
7334   }
7335 
7336   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7337   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7338 
7339   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7340     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7341   }
7342   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7343 }
7344 
7345 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7346   OptionalImmIndexMap OptionalIdx;
7347 
7348   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7349     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7350 
7351     // Add the register arguments
7352     if (Op.isReg()) {
7353       Op.addRegOperands(Inst, 1);
7354       continue;
7355     }
7356 
7357     // Handle the case where soffset is an immediate
7358     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7359       Op.addImmOperands(Inst, 1);
7360       continue;
7361     }
7362 
7363     // Handle tokens like 'offen' which are sometimes hard-coded into the
7364     // asm string.  There are no MCInst operands for these.
7365     if (Op.isToken()) {
7366       continue;
7367     }
7368     assert(Op.isImm());
7369 
7370     // Handle optional arguments
7371     OptionalIdx[Op.getImmTy()] = i;
7372   }
7373 
7374   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7375                         AMDGPUOperand::ImmTyOffset);
7376   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7377   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7378   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7379   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7380 }
7381 
7382 //===----------------------------------------------------------------------===//
7383 // mimg
7384 //===----------------------------------------------------------------------===//
7385 
7386 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7387                               bool IsAtomic) {
7388   unsigned I = 1;
7389   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7390   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7391     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7392   }
7393 
7394   if (IsAtomic) {
7395     // Add src, same as dst
7396     assert(Desc.getNumDefs() == 1);
7397     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7398   }
7399 
7400   OptionalImmIndexMap OptionalIdx;
7401 
7402   for (unsigned E = Operands.size(); I != E; ++I) {
7403     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7404 
7405     // Add the register arguments
7406     if (Op.isReg()) {
7407       Op.addRegOperands(Inst, 1);
7408     } else if (Op.isImmModifier()) {
7409       OptionalIdx[Op.getImmTy()] = I;
7410     } else if (!Op.isToken()) {
7411       llvm_unreachable("unexpected operand type");
7412     }
7413   }
7414 
7415   bool IsGFX10Plus = isGFX10Plus();
7416 
7417   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7418   if (IsGFX10Plus)
7419     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7420   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7421   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7422   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7423   if (IsGFX10Plus)
7424     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7425   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7426     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7427   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7428   if (!IsGFX10Plus)
7429     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7430   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7431 }
7432 
7433 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7434   cvtMIMG(Inst, Operands, true);
7435 }
7436 
7437 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7438   OptionalImmIndexMap OptionalIdx;
7439   bool IsAtomicReturn = false;
7440 
7441   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7442     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7443     if (!Op.isCPol())
7444       continue;
7445     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7446     break;
7447   }
7448 
7449   if (!IsAtomicReturn) {
7450     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7451     if (NewOpc != -1)
7452       Inst.setOpcode(NewOpc);
7453   }
7454 
7455   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7456                     SIInstrFlags::IsAtomicRet;
7457 
7458   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7459     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7460 
7461     // Add the register arguments
7462     if (Op.isReg()) {
7463       Op.addRegOperands(Inst, 1);
7464       if (IsAtomicReturn && i == 1)
7465         Op.addRegOperands(Inst, 1);
7466       continue;
7467     }
7468 
7469     // Handle the case where soffset is an immediate
7470     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7471       Op.addImmOperands(Inst, 1);
7472       continue;
7473     }
7474 
7475     // Handle tokens like 'offen' which are sometimes hard-coded into the
7476     // asm string.  There are no MCInst operands for these.
7477     if (Op.isToken()) {
7478       continue;
7479     }
7480     assert(Op.isImm());
7481 
7482     // Handle optional arguments
7483     OptionalIdx[Op.getImmTy()] = i;
7484   }
7485 
7486   if ((int)Inst.getNumOperands() <=
7487       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7488     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7489   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7490 }
7491 
7492 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7493                                       const OperandVector &Operands) {
7494   for (unsigned I = 1; I < Operands.size(); ++I) {
7495     auto &Operand = (AMDGPUOperand &)*Operands[I];
7496     if (Operand.isReg())
7497       Operand.addRegOperands(Inst, 1);
7498   }
7499 
7500   Inst.addOperand(MCOperand::createImm(1)); // a16
7501 }
7502 
7503 //===----------------------------------------------------------------------===//
7504 // smrd
7505 //===----------------------------------------------------------------------===//
7506 
7507 bool AMDGPUOperand::isSMRDOffset8() const {
7508   return isImm() && isUInt<8>(getImm());
7509 }
7510 
7511 bool AMDGPUOperand::isSMEMOffset() const {
7512   return isImm(); // Offset range is checked later by validator.
7513 }
7514 
7515 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7516   // 32-bit literals are only supported on CI and we only want to use them
7517   // when the offset is > 8-bits.
7518   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7519 }
7520 
7521 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7522   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7523 }
7524 
7525 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7526   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7527 }
7528 
7529 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7530   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7531 }
7532 
7533 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7534   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7535 }
7536 
7537 //===----------------------------------------------------------------------===//
7538 // vop3
7539 //===----------------------------------------------------------------------===//
7540 
7541 static bool ConvertOmodMul(int64_t &Mul) {
7542   if (Mul != 1 && Mul != 2 && Mul != 4)
7543     return false;
7544 
7545   Mul >>= 1;
7546   return true;
7547 }
7548 
7549 static bool ConvertOmodDiv(int64_t &Div) {
7550   if (Div == 1) {
7551     Div = 0;
7552     return true;
7553   }
7554 
7555   if (Div == 2) {
7556     Div = 3;
7557     return true;
7558   }
7559 
7560   return false;
7561 }
7562 
7563 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7564 // This is intentional and ensures compatibility with sp3.
7565 // See bug 35397 for details.
7566 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7567   if (BoundCtrl == 0 || BoundCtrl == 1) {
7568     BoundCtrl = 1;
7569     return true;
7570   }
7571   return false;
7572 }
7573 
7574 // Note: the order in this table matches the order of operands in AsmString.
7575 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7576   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7577   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7578   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7579   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7580   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7581   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7582   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7583   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7584   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7585   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7586   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7587   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7588   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7589   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7590   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7591   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7592   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7593   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7594   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7595   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7596   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7597   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7598   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7599   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7600   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7601   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7602   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7603   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7604   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7605   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7606   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7607   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7608   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7609   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7610   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7611   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7612   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7613   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7614   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7615   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7616   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7617 };
7618 
7619 void AMDGPUAsmParser::onBeginOfFile() {
7620   if (!getParser().getStreamer().getTargetStreamer() ||
7621       getSTI().getTargetTriple().getArch() == Triple::r600)
7622     return;
7623 
7624   if (!getTargetStreamer().getTargetID())
7625     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7626 
7627   if (isHsaAbiVersion3AndAbove(&getSTI()))
7628     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7629 }
7630 
7631 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7632 
7633   OperandMatchResultTy res = parseOptionalOpr(Operands);
7634 
7635   // This is a hack to enable hardcoded mandatory operands which follow
7636   // optional operands.
7637   //
7638   // Current design assumes that all operands after the first optional operand
7639   // are also optional. However implementation of some instructions violates
7640   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7641   //
7642   // To alleviate this problem, we have to (implicitly) parse extra operands
7643   // to make sure autogenerated parser of custom operands never hit hardcoded
7644   // mandatory operands.
7645 
7646   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7647     if (res != MatchOperand_Success ||
7648         isToken(AsmToken::EndOfStatement))
7649       break;
7650 
7651     trySkipToken(AsmToken::Comma);
7652     res = parseOptionalOpr(Operands);
7653   }
7654 
7655   return res;
7656 }
7657 
7658 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7659   OperandMatchResultTy res;
7660   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7661     // try to parse any optional operand here
7662     if (Op.IsBit) {
7663       res = parseNamedBit(Op.Name, Operands, Op.Type);
7664     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7665       res = parseOModOperand(Operands);
7666     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7667                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7668                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7669       res = parseSDWASel(Operands, Op.Name, Op.Type);
7670     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7671       res = parseSDWADstUnused(Operands);
7672     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7673                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7674                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7675                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7676       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7677                                         Op.ConvertResult);
7678     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7679       res = parseDim(Operands);
7680     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7681       res = parseCPol(Operands);
7682     } else {
7683       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7684       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
7685         res = parseOperandArrayWithPrefix("neg", Operands,
7686                                           AMDGPUOperand::ImmTyBLGP,
7687                                           nullptr);
7688       }
7689     }
7690     if (res != MatchOperand_NoMatch) {
7691       return res;
7692     }
7693   }
7694   return MatchOperand_NoMatch;
7695 }
7696 
7697 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7698   StringRef Name = getTokenStr();
7699   if (Name == "mul") {
7700     return parseIntWithPrefix("mul", Operands,
7701                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7702   }
7703 
7704   if (Name == "div") {
7705     return parseIntWithPrefix("div", Operands,
7706                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7707   }
7708 
7709   return MatchOperand_NoMatch;
7710 }
7711 
7712 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7713   cvtVOP3P(Inst, Operands);
7714 
7715   int Opc = Inst.getOpcode();
7716 
7717   int SrcNum;
7718   const int Ops[] = { AMDGPU::OpName::src0,
7719                       AMDGPU::OpName::src1,
7720                       AMDGPU::OpName::src2 };
7721   for (SrcNum = 0;
7722        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7723        ++SrcNum);
7724   assert(SrcNum > 0);
7725 
7726   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7727   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7728 
7729   if ((OpSel & (1 << SrcNum)) != 0) {
7730     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7731     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7732     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7733   }
7734 }
7735 
7736 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7737       // 1. This operand is input modifiers
7738   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7739       // 2. This is not last operand
7740       && Desc.NumOperands > (OpNum + 1)
7741       // 3. Next operand is register class
7742       && Desc.OpInfo[OpNum + 1].RegClass != -1
7743       // 4. Next register is not tied to any other operand
7744       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7745 }
7746 
7747 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7748 {
7749   OptionalImmIndexMap OptionalIdx;
7750   unsigned Opc = Inst.getOpcode();
7751 
7752   unsigned I = 1;
7753   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7754   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7755     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7756   }
7757 
7758   for (unsigned E = Operands.size(); I != E; ++I) {
7759     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7760     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7761       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7762     } else if (Op.isInterpSlot() ||
7763                Op.isInterpAttr() ||
7764                Op.isAttrChan()) {
7765       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7766     } else if (Op.isImmModifier()) {
7767       OptionalIdx[Op.getImmTy()] = I;
7768     } else {
7769       llvm_unreachable("unhandled operand type");
7770     }
7771   }
7772 
7773   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7774     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7775   }
7776 
7777   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7778     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7779   }
7780 
7781   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7782     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7783   }
7784 }
7785 
7786 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7787                               OptionalImmIndexMap &OptionalIdx) {
7788   unsigned Opc = Inst.getOpcode();
7789 
7790   unsigned I = 1;
7791   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7792   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7793     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7794   }
7795 
7796   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7797     // This instruction has src modifiers
7798     for (unsigned E = Operands.size(); I != E; ++I) {
7799       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7800       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7801         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7802       } else if (Op.isImmModifier()) {
7803         OptionalIdx[Op.getImmTy()] = I;
7804       } else if (Op.isRegOrImm()) {
7805         Op.addRegOrImmOperands(Inst, 1);
7806       } else {
7807         llvm_unreachable("unhandled operand type");
7808       }
7809     }
7810   } else {
7811     // No src modifiers
7812     for (unsigned E = Operands.size(); I != E; ++I) {
7813       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7814       if (Op.isMod()) {
7815         OptionalIdx[Op.getImmTy()] = I;
7816       } else {
7817         Op.addRegOrImmOperands(Inst, 1);
7818       }
7819     }
7820   }
7821 
7822   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7823     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7824   }
7825 
7826   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7827     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7828   }
7829 
7830   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7831   // it has src2 register operand that is tied to dst operand
7832   // we don't allow modifiers for this operand in assembler so src2_modifiers
7833   // should be 0.
7834   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7835       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7836       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7837       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7838       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7839       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7840       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7841       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7842       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7843       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7844       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7845     auto it = Inst.begin();
7846     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7847     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7848     ++it;
7849     // Copy the operand to ensure it's not invalidated when Inst grows.
7850     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7851   }
7852 }
7853 
7854 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7855   OptionalImmIndexMap OptionalIdx;
7856   cvtVOP3(Inst, Operands, OptionalIdx);
7857 }
7858 
7859 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7860                                OptionalImmIndexMap &OptIdx) {
7861   const int Opc = Inst.getOpcode();
7862   const MCInstrDesc &Desc = MII.get(Opc);
7863 
7864   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7865 
7866   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7867     assert(!IsPacked);
7868     Inst.addOperand(Inst.getOperand(0));
7869   }
7870 
7871   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7872   // instruction, and then figure out where to actually put the modifiers
7873 
7874   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7875   if (OpSelIdx != -1) {
7876     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7877   }
7878 
7879   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7880   if (OpSelHiIdx != -1) {
7881     int DefaultVal = IsPacked ? -1 : 0;
7882     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7883                           DefaultVal);
7884   }
7885 
7886   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7887   if (NegLoIdx != -1) {
7888     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7889     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7890   }
7891 
7892   const int Ops[] = { AMDGPU::OpName::src0,
7893                       AMDGPU::OpName::src1,
7894                       AMDGPU::OpName::src2 };
7895   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7896                          AMDGPU::OpName::src1_modifiers,
7897                          AMDGPU::OpName::src2_modifiers };
7898 
7899   unsigned OpSel = 0;
7900   unsigned OpSelHi = 0;
7901   unsigned NegLo = 0;
7902   unsigned NegHi = 0;
7903 
7904   if (OpSelIdx != -1)
7905     OpSel = Inst.getOperand(OpSelIdx).getImm();
7906 
7907   if (OpSelHiIdx != -1)
7908     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7909 
7910   if (NegLoIdx != -1) {
7911     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7912     NegLo = Inst.getOperand(NegLoIdx).getImm();
7913     NegHi = Inst.getOperand(NegHiIdx).getImm();
7914   }
7915 
7916   for (int J = 0; J < 3; ++J) {
7917     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7918     if (OpIdx == -1)
7919       break;
7920 
7921     uint32_t ModVal = 0;
7922 
7923     if ((OpSel & (1 << J)) != 0)
7924       ModVal |= SISrcMods::OP_SEL_0;
7925 
7926     if ((OpSelHi & (1 << J)) != 0)
7927       ModVal |= SISrcMods::OP_SEL_1;
7928 
7929     if ((NegLo & (1 << J)) != 0)
7930       ModVal |= SISrcMods::NEG;
7931 
7932     if ((NegHi & (1 << J)) != 0)
7933       ModVal |= SISrcMods::NEG_HI;
7934 
7935     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7936 
7937     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7938   }
7939 }
7940 
7941 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7942   OptionalImmIndexMap OptIdx;
7943   cvtVOP3(Inst, Operands, OptIdx);
7944   cvtVOP3P(Inst, Operands, OptIdx);
7945 }
7946 
7947 //===----------------------------------------------------------------------===//
7948 // dpp
7949 //===----------------------------------------------------------------------===//
7950 
7951 bool AMDGPUOperand::isDPP8() const {
7952   return isImmTy(ImmTyDPP8);
7953 }
7954 
7955 bool AMDGPUOperand::isDPPCtrl() const {
7956   using namespace AMDGPU::DPP;
7957 
7958   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7959   if (result) {
7960     int64_t Imm = getImm();
7961     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7962            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7963            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7964            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7965            (Imm == DppCtrl::WAVE_SHL1) ||
7966            (Imm == DppCtrl::WAVE_ROL1) ||
7967            (Imm == DppCtrl::WAVE_SHR1) ||
7968            (Imm == DppCtrl::WAVE_ROR1) ||
7969            (Imm == DppCtrl::ROW_MIRROR) ||
7970            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7971            (Imm == DppCtrl::BCAST15) ||
7972            (Imm == DppCtrl::BCAST31) ||
7973            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7974            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7975   }
7976   return false;
7977 }
7978 
7979 //===----------------------------------------------------------------------===//
7980 // mAI
7981 //===----------------------------------------------------------------------===//
7982 
7983 bool AMDGPUOperand::isBLGP() const {
7984   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7985 }
7986 
7987 bool AMDGPUOperand::isCBSZ() const {
7988   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7989 }
7990 
7991 bool AMDGPUOperand::isABID() const {
7992   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7993 }
7994 
7995 bool AMDGPUOperand::isS16Imm() const {
7996   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7997 }
7998 
7999 bool AMDGPUOperand::isU16Imm() const {
8000   return isImm() && isUInt<16>(getImm());
8001 }
8002 
8003 //===----------------------------------------------------------------------===//
8004 // dim
8005 //===----------------------------------------------------------------------===//
8006 
8007 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8008   // We want to allow "dim:1D" etc.,
8009   // but the initial 1 is tokenized as an integer.
8010   std::string Token;
8011   if (isToken(AsmToken::Integer)) {
8012     SMLoc Loc = getToken().getEndLoc();
8013     Token = std::string(getTokenStr());
8014     lex();
8015     if (getLoc() != Loc)
8016       return false;
8017   }
8018 
8019   StringRef Suffix;
8020   if (!parseId(Suffix))
8021     return false;
8022   Token += Suffix;
8023 
8024   StringRef DimId = Token;
8025   if (DimId.startswith("SQ_RSRC_IMG_"))
8026     DimId = DimId.drop_front(12);
8027 
8028   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8029   if (!DimInfo)
8030     return false;
8031 
8032   Encoding = DimInfo->Encoding;
8033   return true;
8034 }
8035 
8036 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8037   if (!isGFX10Plus())
8038     return MatchOperand_NoMatch;
8039 
8040   SMLoc S = getLoc();
8041 
8042   if (!trySkipId("dim", AsmToken::Colon))
8043     return MatchOperand_NoMatch;
8044 
8045   unsigned Encoding;
8046   SMLoc Loc = getLoc();
8047   if (!parseDimId(Encoding)) {
8048     Error(Loc, "invalid dim value");
8049     return MatchOperand_ParseFail;
8050   }
8051 
8052   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8053                                               AMDGPUOperand::ImmTyDim));
8054   return MatchOperand_Success;
8055 }
8056 
8057 //===----------------------------------------------------------------------===//
8058 // dpp
8059 //===----------------------------------------------------------------------===//
8060 
8061 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8062   SMLoc S = getLoc();
8063 
8064   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8065     return MatchOperand_NoMatch;
8066 
8067   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8068 
8069   int64_t Sels[8];
8070 
8071   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8072     return MatchOperand_ParseFail;
8073 
8074   for (size_t i = 0; i < 8; ++i) {
8075     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8076       return MatchOperand_ParseFail;
8077 
8078     SMLoc Loc = getLoc();
8079     if (getParser().parseAbsoluteExpression(Sels[i]))
8080       return MatchOperand_ParseFail;
8081     if (0 > Sels[i] || 7 < Sels[i]) {
8082       Error(Loc, "expected a 3-bit value");
8083       return MatchOperand_ParseFail;
8084     }
8085   }
8086 
8087   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8088     return MatchOperand_ParseFail;
8089 
8090   unsigned DPP8 = 0;
8091   for (size_t i = 0; i < 8; ++i)
8092     DPP8 |= (Sels[i] << (i * 3));
8093 
8094   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8095   return MatchOperand_Success;
8096 }
8097 
8098 bool
8099 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8100                                     const OperandVector &Operands) {
8101   if (Ctrl == "row_newbcast")
8102     return isGFX90A();
8103 
8104   if (Ctrl == "row_share" ||
8105       Ctrl == "row_xmask")
8106     return isGFX10Plus();
8107 
8108   if (Ctrl == "wave_shl" ||
8109       Ctrl == "wave_shr" ||
8110       Ctrl == "wave_rol" ||
8111       Ctrl == "wave_ror" ||
8112       Ctrl == "row_bcast")
8113     return isVI() || isGFX9();
8114 
8115   return Ctrl == "row_mirror" ||
8116          Ctrl == "row_half_mirror" ||
8117          Ctrl == "quad_perm" ||
8118          Ctrl == "row_shl" ||
8119          Ctrl == "row_shr" ||
8120          Ctrl == "row_ror";
8121 }
8122 
8123 int64_t
8124 AMDGPUAsmParser::parseDPPCtrlPerm() {
8125   // quad_perm:[%d,%d,%d,%d]
8126 
8127   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8128     return -1;
8129 
8130   int64_t Val = 0;
8131   for (int i = 0; i < 4; ++i) {
8132     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8133       return -1;
8134 
8135     int64_t Temp;
8136     SMLoc Loc = getLoc();
8137     if (getParser().parseAbsoluteExpression(Temp))
8138       return -1;
8139     if (Temp < 0 || Temp > 3) {
8140       Error(Loc, "expected a 2-bit value");
8141       return -1;
8142     }
8143 
8144     Val += (Temp << i * 2);
8145   }
8146 
8147   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8148     return -1;
8149 
8150   return Val;
8151 }
8152 
8153 int64_t
8154 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8155   using namespace AMDGPU::DPP;
8156 
8157   // sel:%d
8158 
8159   int64_t Val;
8160   SMLoc Loc = getLoc();
8161 
8162   if (getParser().parseAbsoluteExpression(Val))
8163     return -1;
8164 
8165   struct DppCtrlCheck {
8166     int64_t Ctrl;
8167     int Lo;
8168     int Hi;
8169   };
8170 
8171   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8172     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8173     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8174     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8175     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8176     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8177     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8178     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8179     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8180     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8181     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8182     .Default({-1, 0, 0});
8183 
8184   bool Valid;
8185   if (Check.Ctrl == -1) {
8186     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8187     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8188   } else {
8189     Valid = Check.Lo <= Val && Val <= Check.Hi;
8190     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8191   }
8192 
8193   if (!Valid) {
8194     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8195     return -1;
8196   }
8197 
8198   return Val;
8199 }
8200 
8201 OperandMatchResultTy
8202 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8203   using namespace AMDGPU::DPP;
8204 
8205   if (!isToken(AsmToken::Identifier) ||
8206       !isSupportedDPPCtrl(getTokenStr(), Operands))
8207     return MatchOperand_NoMatch;
8208 
8209   SMLoc S = getLoc();
8210   int64_t Val = -1;
8211   StringRef Ctrl;
8212 
8213   parseId(Ctrl);
8214 
8215   if (Ctrl == "row_mirror") {
8216     Val = DppCtrl::ROW_MIRROR;
8217   } else if (Ctrl == "row_half_mirror") {
8218     Val = DppCtrl::ROW_HALF_MIRROR;
8219   } else {
8220     if (skipToken(AsmToken::Colon, "expected a colon")) {
8221       if (Ctrl == "quad_perm") {
8222         Val = parseDPPCtrlPerm();
8223       } else {
8224         Val = parseDPPCtrlSel(Ctrl);
8225       }
8226     }
8227   }
8228 
8229   if (Val == -1)
8230     return MatchOperand_ParseFail;
8231 
8232   Operands.push_back(
8233     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8234   return MatchOperand_Success;
8235 }
8236 
8237 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8238   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8239 }
8240 
8241 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8242   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8243 }
8244 
8245 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8246   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8247 }
8248 
8249 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8250   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8251 }
8252 
8253 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8254   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8255 }
8256 
8257 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8258   OptionalImmIndexMap OptionalIdx;
8259 
8260   unsigned Opc = Inst.getOpcode();
8261   bool HasModifiers =
8262       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8263   unsigned I = 1;
8264   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8265   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8266     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8267   }
8268 
8269   int Fi = 0;
8270   for (unsigned E = Operands.size(); I != E; ++I) {
8271     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8272                                             MCOI::TIED_TO);
8273     if (TiedTo != -1) {
8274       assert((unsigned)TiedTo < Inst.getNumOperands());
8275       // handle tied old or src2 for MAC instructions
8276       Inst.addOperand(Inst.getOperand(TiedTo));
8277     }
8278     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8279     // Add the register arguments
8280     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8281       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8282       // Skip it.
8283       continue;
8284     }
8285 
8286     if (IsDPP8) {
8287       if (Op.isDPP8()) {
8288         Op.addImmOperands(Inst, 1);
8289       } else if (HasModifiers &&
8290                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8291         Op.addRegWithFPInputModsOperands(Inst, 2);
8292       } else if (Op.isFI()) {
8293         Fi = Op.getImm();
8294       } else if (Op.isReg()) {
8295         Op.addRegOperands(Inst, 1);
8296       } else {
8297         llvm_unreachable("Invalid operand type");
8298       }
8299     } else {
8300       if (HasModifiers &&
8301           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8302         Op.addRegWithFPInputModsOperands(Inst, 2);
8303       } else if (Op.isReg()) {
8304         Op.addRegOperands(Inst, 1);
8305       } else if (Op.isDPPCtrl()) {
8306         Op.addImmOperands(Inst, 1);
8307       } else if (Op.isImm()) {
8308         // Handle optional arguments
8309         OptionalIdx[Op.getImmTy()] = I;
8310       } else {
8311         llvm_unreachable("Invalid operand type");
8312       }
8313     }
8314   }
8315 
8316   if (IsDPP8) {
8317     using namespace llvm::AMDGPU::DPP;
8318     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8319   } else {
8320     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8321     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8322     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8323     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8324       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8325     }
8326   }
8327 }
8328 
8329 //===----------------------------------------------------------------------===//
8330 // sdwa
8331 //===----------------------------------------------------------------------===//
8332 
8333 OperandMatchResultTy
8334 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8335                               AMDGPUOperand::ImmTy Type) {
8336   using namespace llvm::AMDGPU::SDWA;
8337 
8338   SMLoc S = getLoc();
8339   StringRef Value;
8340   OperandMatchResultTy res;
8341 
8342   SMLoc StringLoc;
8343   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8344   if (res != MatchOperand_Success) {
8345     return res;
8346   }
8347 
8348   int64_t Int;
8349   Int = StringSwitch<int64_t>(Value)
8350         .Case("BYTE_0", SdwaSel::BYTE_0)
8351         .Case("BYTE_1", SdwaSel::BYTE_1)
8352         .Case("BYTE_2", SdwaSel::BYTE_2)
8353         .Case("BYTE_3", SdwaSel::BYTE_3)
8354         .Case("WORD_0", SdwaSel::WORD_0)
8355         .Case("WORD_1", SdwaSel::WORD_1)
8356         .Case("DWORD", SdwaSel::DWORD)
8357         .Default(0xffffffff);
8358 
8359   if (Int == 0xffffffff) {
8360     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8361     return MatchOperand_ParseFail;
8362   }
8363 
8364   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8365   return MatchOperand_Success;
8366 }
8367 
8368 OperandMatchResultTy
8369 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8370   using namespace llvm::AMDGPU::SDWA;
8371 
8372   SMLoc S = getLoc();
8373   StringRef Value;
8374   OperandMatchResultTy res;
8375 
8376   SMLoc StringLoc;
8377   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8378   if (res != MatchOperand_Success) {
8379     return res;
8380   }
8381 
8382   int64_t Int;
8383   Int = StringSwitch<int64_t>(Value)
8384         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8385         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8386         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8387         .Default(0xffffffff);
8388 
8389   if (Int == 0xffffffff) {
8390     Error(StringLoc, "invalid dst_unused value");
8391     return MatchOperand_ParseFail;
8392   }
8393 
8394   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8395   return MatchOperand_Success;
8396 }
8397 
8398 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8399   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8400 }
8401 
8402 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8403   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8404 }
8405 
8406 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8407   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8408 }
8409 
8410 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8411   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8412 }
8413 
8414 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8415   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8416 }
8417 
8418 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8419                               uint64_t BasicInstType,
8420                               bool SkipDstVcc,
8421                               bool SkipSrcVcc) {
8422   using namespace llvm::AMDGPU::SDWA;
8423 
8424   OptionalImmIndexMap OptionalIdx;
8425   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8426   bool SkippedVcc = false;
8427 
8428   unsigned I = 1;
8429   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8430   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8431     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8432   }
8433 
8434   for (unsigned E = Operands.size(); I != E; ++I) {
8435     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8436     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8437         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8438       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8439       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8440       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8441       // Skip VCC only if we didn't skip it on previous iteration.
8442       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8443       if (BasicInstType == SIInstrFlags::VOP2 &&
8444           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8445            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8446         SkippedVcc = true;
8447         continue;
8448       } else if (BasicInstType == SIInstrFlags::VOPC &&
8449                  Inst.getNumOperands() == 0) {
8450         SkippedVcc = true;
8451         continue;
8452       }
8453     }
8454     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8455       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8456     } else if (Op.isImm()) {
8457       // Handle optional arguments
8458       OptionalIdx[Op.getImmTy()] = I;
8459     } else {
8460       llvm_unreachable("Invalid operand type");
8461     }
8462     SkippedVcc = false;
8463   }
8464 
8465   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8466       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8467       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8468     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8469     switch (BasicInstType) {
8470     case SIInstrFlags::VOP1:
8471       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8472       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8473         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8474       }
8475       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8476       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8477       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8478       break;
8479 
8480     case SIInstrFlags::VOP2:
8481       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8482       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8483         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8484       }
8485       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8486       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8487       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8488       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8489       break;
8490 
8491     case SIInstrFlags::VOPC:
8492       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8493         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8494       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8495       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8496       break;
8497 
8498     default:
8499       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8500     }
8501   }
8502 
8503   // special case v_mac_{f16, f32}:
8504   // it has src2 register operand that is tied to dst operand
8505   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8506       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8507     auto it = Inst.begin();
8508     std::advance(
8509       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8510     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8511   }
8512 }
8513 
8514 //===----------------------------------------------------------------------===//
8515 // mAI
8516 //===----------------------------------------------------------------------===//
8517 
8518 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8519   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8520 }
8521 
8522 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8523   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8524 }
8525 
8526 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8527   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8528 }
8529 
8530 /// Force static initialization.
8531 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8532   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8533   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8534 }
8535 
8536 #define GET_REGISTER_MATCHER
8537 #define GET_MATCHER_IMPLEMENTATION
8538 #define GET_MNEMONIC_SPELL_CHECKER
8539 #define GET_MNEMONIC_CHECKER
8540 #include "AMDGPUGenAsmMatcher.inc"
8541 
8542 // This function should be defined after auto-generated include so that we have
8543 // MatchClassKind enum defined
8544 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8545                                                      unsigned Kind) {
8546   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8547   // But MatchInstructionImpl() expects to meet token and fails to validate
8548   // operand. This method checks if we are given immediate operand but expect to
8549   // get corresponding token.
8550   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8551   switch (Kind) {
8552   case MCK_addr64:
8553     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8554   case MCK_gds:
8555     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8556   case MCK_lds:
8557     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8558   case MCK_idxen:
8559     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8560   case MCK_offen:
8561     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8562   case MCK_SSrcB32:
8563     // When operands have expression values, they will return true for isToken,
8564     // because it is not possible to distinguish between a token and an
8565     // expression at parse time. MatchInstructionImpl() will always try to
8566     // match an operand as a token, when isToken returns true, and when the
8567     // name of the expression is not a valid token, the match will fail,
8568     // so we need to handle it here.
8569     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8570   case MCK_SSrcF32:
8571     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8572   case MCK_SoppBrTarget:
8573     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8574   case MCK_VReg32OrOff:
8575     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8576   case MCK_InterpSlot:
8577     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8578   case MCK_Attr:
8579     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8580   case MCK_AttrChan:
8581     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8582   case MCK_ImmSMEMOffset:
8583     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8584   case MCK_SReg_64:
8585   case MCK_SReg_64_XEXEC:
8586     // Null is defined as a 32-bit register but
8587     // it should also be enabled with 64-bit operands.
8588     // The following code enables it for SReg_64 operands
8589     // used as source and destination. Remaining source
8590     // operands are handled in isInlinableImm.
8591     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8592   default:
8593     return Match_InvalidOperand;
8594   }
8595 }
8596 
8597 //===----------------------------------------------------------------------===//
8598 // endpgm
8599 //===----------------------------------------------------------------------===//
8600 
8601 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8602   SMLoc S = getLoc();
8603   int64_t Imm = 0;
8604 
8605   if (!parseExpr(Imm)) {
8606     // The operand is optional, if not present default to 0
8607     Imm = 0;
8608   }
8609 
8610   if (!isUInt<16>(Imm)) {
8611     Error(S, "expected a 16-bit value");
8612     return MatchOperand_ParseFail;
8613   }
8614 
8615   Operands.push_back(
8616       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8617   return MatchOperand_Success;
8618 }
8619 
8620 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8621