1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/TargetParser.h"
40 
41 using namespace llvm;
42 using namespace llvm::AMDGPU;
43 using namespace llvm::amdhsa;
44 
45 namespace {
46 
47 class AMDGPUAsmParser;
48 
49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
50 
51 //===----------------------------------------------------------------------===//
52 // Operand
53 //===----------------------------------------------------------------------===//
54 
55 class AMDGPUOperand : public MCParsedAsmOperand {
56   enum KindTy {
57     Token,
58     Immediate,
59     Register,
60     Expression
61   } Kind;
62 
63   SMLoc StartLoc, EndLoc;
64   const AMDGPUAsmParser *AsmParser;
65 
66 public:
67   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
68       : Kind(Kind_), AsmParser(AsmParser_) {}
69 
70   using Ptr = std::unique_ptr<AMDGPUOperand>;
71 
72   struct Modifiers {
73     bool Abs = false;
74     bool Neg = false;
75     bool Sext = false;
76 
77     bool hasFPModifiers() const { return Abs || Neg; }
78     bool hasIntModifiers() const { return Sext; }
79     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
80 
81     int64_t getFPModifiersOperand() const {
82       int64_t Operand = 0;
83       Operand |= Abs ? SISrcMods::ABS : 0u;
84       Operand |= Neg ? SISrcMods::NEG : 0u;
85       return Operand;
86     }
87 
88     int64_t getIntModifiersOperand() const {
89       int64_t Operand = 0;
90       Operand |= Sext ? SISrcMods::SEXT : 0u;
91       return Operand;
92     }
93 
94     int64_t getModifiersOperand() const {
95       assert(!(hasFPModifiers() && hasIntModifiers())
96            && "fp and int modifiers should not be used simultaneously");
97       if (hasFPModifiers()) {
98         return getFPModifiersOperand();
99       } else if (hasIntModifiers()) {
100         return getIntModifiersOperand();
101       } else {
102         return 0;
103       }
104     }
105 
106     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
107   };
108 
109   enum ImmTy {
110     ImmTyNone,
111     ImmTyGDS,
112     ImmTyLDS,
113     ImmTyOffen,
114     ImmTyIdxen,
115     ImmTyAddr64,
116     ImmTyOffset,
117     ImmTyInstOffset,
118     ImmTyOffset0,
119     ImmTyOffset1,
120     ImmTyCPol,
121     ImmTySWZ,
122     ImmTyTFE,
123     ImmTyD16,
124     ImmTyClampSI,
125     ImmTyOModSI,
126     ImmTyDPP8,
127     ImmTyDppCtrl,
128     ImmTyDppRowMask,
129     ImmTyDppBankMask,
130     ImmTyDppBoundCtrl,
131     ImmTyDppFi,
132     ImmTySdwaDstSel,
133     ImmTySdwaSrc0Sel,
134     ImmTySdwaSrc1Sel,
135     ImmTySdwaDstUnused,
136     ImmTyDMask,
137     ImmTyDim,
138     ImmTyUNorm,
139     ImmTyDA,
140     ImmTyR128A16,
141     ImmTyA16,
142     ImmTyLWE,
143     ImmTyExpTgt,
144     ImmTyExpCompr,
145     ImmTyExpVM,
146     ImmTyFORMAT,
147     ImmTyHwreg,
148     ImmTyOff,
149     ImmTySendMsg,
150     ImmTyInterpSlot,
151     ImmTyInterpAttr,
152     ImmTyAttrChan,
153     ImmTyOpSel,
154     ImmTyOpSelHi,
155     ImmTyNegLo,
156     ImmTyNegHi,
157     ImmTySwizzle,
158     ImmTyGprIdxMode,
159     ImmTyHigh,
160     ImmTyBLGP,
161     ImmTyCBSZ,
162     ImmTyABID,
163     ImmTyEndpgm,
164   };
165 
166   enum ImmKindTy {
167     ImmKindTyNone,
168     ImmKindTyLiteral,
169     ImmKindTyConst,
170   };
171 
172 private:
173   struct TokOp {
174     const char *Data;
175     unsigned Length;
176   };
177 
178   struct ImmOp {
179     int64_t Val;
180     ImmTy Type;
181     bool IsFPImm;
182     mutable ImmKindTy Kind;
183     Modifiers Mods;
184   };
185 
186   struct RegOp {
187     unsigned RegNo;
188     Modifiers Mods;
189   };
190 
191   union {
192     TokOp Tok;
193     ImmOp Imm;
194     RegOp Reg;
195     const MCExpr *Expr;
196   };
197 
198 public:
199   bool isToken() const override {
200     if (Kind == Token)
201       return true;
202 
203     // When parsing operands, we can't always tell if something was meant to be
204     // a token, like 'gds', or an expression that references a global variable.
205     // In this case, we assume the string is an expression, and if we need to
206     // interpret is a token, then we treat the symbol name as the token.
207     return isSymbolRefExpr();
208   }
209 
210   bool isSymbolRefExpr() const {
211     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
212   }
213 
214   bool isImm() const override {
215     return Kind == Immediate;
216   }
217 
218   void setImmKindNone() const {
219     assert(isImm());
220     Imm.Kind = ImmKindTyNone;
221   }
222 
223   void setImmKindLiteral() const {
224     assert(isImm());
225     Imm.Kind = ImmKindTyLiteral;
226   }
227 
228   void setImmKindConst() const {
229     assert(isImm());
230     Imm.Kind = ImmKindTyConst;
231   }
232 
233   bool IsImmKindLiteral() const {
234     return isImm() && Imm.Kind == ImmKindTyLiteral;
235   }
236 
237   bool isImmKindConst() const {
238     return isImm() && Imm.Kind == ImmKindTyConst;
239   }
240 
241   bool isInlinableImm(MVT type) const;
242   bool isLiteralImm(MVT type) const;
243 
244   bool isRegKind() const {
245     return Kind == Register;
246   }
247 
248   bool isReg() const override {
249     return isRegKind() && !hasModifiers();
250   }
251 
252   bool isRegOrInline(unsigned RCID, MVT type) const {
253     return isRegClass(RCID) || isInlinableImm(type);
254   }
255 
256   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
257     return isRegOrInline(RCID, type) || isLiteralImm(type);
258   }
259 
260   bool isRegOrImmWithInt16InputMods() const {
261     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
262   }
263 
264   bool isRegOrImmWithInt32InputMods() const {
265     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
266   }
267 
268   bool isRegOrImmWithInt64InputMods() const {
269     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
270   }
271 
272   bool isRegOrImmWithFP16InputMods() const {
273     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
274   }
275 
276   bool isRegOrImmWithFP32InputMods() const {
277     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
278   }
279 
280   bool isRegOrImmWithFP64InputMods() const {
281     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
282   }
283 
284   bool isVReg() const {
285     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
286            isRegClass(AMDGPU::VReg_64RegClassID) ||
287            isRegClass(AMDGPU::VReg_96RegClassID) ||
288            isRegClass(AMDGPU::VReg_128RegClassID) ||
289            isRegClass(AMDGPU::VReg_160RegClassID) ||
290            isRegClass(AMDGPU::VReg_192RegClassID) ||
291            isRegClass(AMDGPU::VReg_256RegClassID) ||
292            isRegClass(AMDGPU::VReg_512RegClassID) ||
293            isRegClass(AMDGPU::VReg_1024RegClassID);
294   }
295 
296   bool isVReg32() const {
297     return isRegClass(AMDGPU::VGPR_32RegClassID);
298   }
299 
300   bool isVReg32OrOff() const {
301     return isOff() || isVReg32();
302   }
303 
304   bool isNull() const {
305     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
306   }
307 
308   bool isVRegWithInputMods() const;
309 
310   bool isSDWAOperand(MVT type) const;
311   bool isSDWAFP16Operand() const;
312   bool isSDWAFP32Operand() const;
313   bool isSDWAInt16Operand() const;
314   bool isSDWAInt32Operand() const;
315 
316   bool isImmTy(ImmTy ImmT) const {
317     return isImm() && Imm.Type == ImmT;
318   }
319 
320   bool isImmModifier() const {
321     return isImm() && Imm.Type != ImmTyNone;
322   }
323 
324   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
325   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
326   bool isDMask() const { return isImmTy(ImmTyDMask); }
327   bool isDim() const { return isImmTy(ImmTyDim); }
328   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
329   bool isDA() const { return isImmTy(ImmTyDA); }
330   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
331   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
332   bool isLWE() const { return isImmTy(ImmTyLWE); }
333   bool isOff() const { return isImmTy(ImmTyOff); }
334   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
335   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
336   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
337   bool isOffen() const { return isImmTy(ImmTyOffen); }
338   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
339   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
340   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
341   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
342   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
343 
344   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
345   bool isGDS() const { return isImmTy(ImmTyGDS); }
346   bool isLDS() const { return isImmTy(ImmTyLDS); }
347   bool isCPol() const { return isImmTy(ImmTyCPol); }
348   bool isSWZ() const { return isImmTy(ImmTySWZ); }
349   bool isTFE() const { return isImmTy(ImmTyTFE); }
350   bool isD16() const { return isImmTy(ImmTyD16); }
351   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
352   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
353   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
354   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
355   bool isFI() const { return isImmTy(ImmTyDppFi); }
356   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
357   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
358   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
359   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
360   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
361   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
362   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
363   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
364   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
365   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
366   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
367   bool isHigh() const { return isImmTy(ImmTyHigh); }
368 
369   bool isMod() const {
370     return isClampSI() || isOModSI();
371   }
372 
373   bool isRegOrImm() const {
374     return isReg() || isImm();
375   }
376 
377   bool isRegClass(unsigned RCID) const;
378 
379   bool isInlineValue() const;
380 
381   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
382     return isRegOrInline(RCID, type) && !hasModifiers();
383   }
384 
385   bool isSCSrcB16() const {
386     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
387   }
388 
389   bool isSCSrcV2B16() const {
390     return isSCSrcB16();
391   }
392 
393   bool isSCSrcB32() const {
394     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
395   }
396 
397   bool isSCSrcB64() const {
398     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
399   }
400 
401   bool isBoolReg() const;
402 
403   bool isSCSrcF16() const {
404     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
405   }
406 
407   bool isSCSrcV2F16() const {
408     return isSCSrcF16();
409   }
410 
411   bool isSCSrcF32() const {
412     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
413   }
414 
415   bool isSCSrcF64() const {
416     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
417   }
418 
419   bool isSSrcB32() const {
420     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
421   }
422 
423   bool isSSrcB16() const {
424     return isSCSrcB16() || isLiteralImm(MVT::i16);
425   }
426 
427   bool isSSrcV2B16() const {
428     llvm_unreachable("cannot happen");
429     return isSSrcB16();
430   }
431 
432   bool isSSrcB64() const {
433     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
434     // See isVSrc64().
435     return isSCSrcB64() || isLiteralImm(MVT::i64);
436   }
437 
438   bool isSSrcF32() const {
439     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
440   }
441 
442   bool isSSrcF64() const {
443     return isSCSrcB64() || isLiteralImm(MVT::f64);
444   }
445 
446   bool isSSrcF16() const {
447     return isSCSrcB16() || isLiteralImm(MVT::f16);
448   }
449 
450   bool isSSrcV2F16() const {
451     llvm_unreachable("cannot happen");
452     return isSSrcF16();
453   }
454 
455   bool isSSrcV2FP32() const {
456     llvm_unreachable("cannot happen");
457     return isSSrcF32();
458   }
459 
460   bool isSCSrcV2FP32() const {
461     llvm_unreachable("cannot happen");
462     return isSCSrcF32();
463   }
464 
465   bool isSSrcV2INT32() const {
466     llvm_unreachable("cannot happen");
467     return isSSrcB32();
468   }
469 
470   bool isSCSrcV2INT32() const {
471     llvm_unreachable("cannot happen");
472     return isSCSrcB32();
473   }
474 
475   bool isSSrcOrLdsB32() const {
476     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
477            isLiteralImm(MVT::i32) || isExpr();
478   }
479 
480   bool isVCSrcB32() const {
481     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
482   }
483 
484   bool isVCSrcB64() const {
485     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
486   }
487 
488   bool isVCSrcB16() const {
489     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
490   }
491 
492   bool isVCSrcV2B16() const {
493     return isVCSrcB16();
494   }
495 
496   bool isVCSrcF32() const {
497     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
498   }
499 
500   bool isVCSrcF64() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
502   }
503 
504   bool isVCSrcF16() const {
505     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
506   }
507 
508   bool isVCSrcV2F16() const {
509     return isVCSrcF16();
510   }
511 
512   bool isVSrcB32() const {
513     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
514   }
515 
516   bool isVSrcB64() const {
517     return isVCSrcF64() || isLiteralImm(MVT::i64);
518   }
519 
520   bool isVSrcB16() const {
521     return isVCSrcB16() || isLiteralImm(MVT::i16);
522   }
523 
524   bool isVSrcV2B16() const {
525     return isVSrcB16() || isLiteralImm(MVT::v2i16);
526   }
527 
528   bool isVCSrcV2FP32() const {
529     return isVCSrcF64();
530   }
531 
532   bool isVSrcV2FP32() const {
533     return isVSrcF64() || isLiteralImm(MVT::v2f32);
534   }
535 
536   bool isVCSrcV2INT32() const {
537     return isVCSrcB64();
538   }
539 
540   bool isVSrcV2INT32() const {
541     return isVSrcB64() || isLiteralImm(MVT::v2i32);
542   }
543 
544   bool isVSrcF32() const {
545     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
546   }
547 
548   bool isVSrcF64() const {
549     return isVCSrcF64() || isLiteralImm(MVT::f64);
550   }
551 
552   bool isVSrcF16() const {
553     return isVCSrcF16() || isLiteralImm(MVT::f16);
554   }
555 
556   bool isVSrcV2F16() const {
557     return isVSrcF16() || isLiteralImm(MVT::v2f16);
558   }
559 
560   bool isVISrcB32() const {
561     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
562   }
563 
564   bool isVISrcB16() const {
565     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
566   }
567 
568   bool isVISrcV2B16() const {
569     return isVISrcB16();
570   }
571 
572   bool isVISrcF32() const {
573     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
574   }
575 
576   bool isVISrcF16() const {
577     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
578   }
579 
580   bool isVISrcV2F16() const {
581     return isVISrcF16() || isVISrcB32();
582   }
583 
584   bool isVISrc_64B64() const {
585     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
586   }
587 
588   bool isVISrc_64F64() const {
589     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
590   }
591 
592   bool isVISrc_64V2FP32() const {
593     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
594   }
595 
596   bool isVISrc_64V2INT32() const {
597     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
598   }
599 
600   bool isVISrc_256B64() const {
601     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
602   }
603 
604   bool isVISrc_256F64() const {
605     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
606   }
607 
608   bool isVISrc_128B16() const {
609     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
610   }
611 
612   bool isVISrc_128V2B16() const {
613     return isVISrc_128B16();
614   }
615 
616   bool isVISrc_128B32() const {
617     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
618   }
619 
620   bool isVISrc_128F32() const {
621     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
622   }
623 
624   bool isVISrc_256V2FP32() const {
625     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
626   }
627 
628   bool isVISrc_256V2INT32() const {
629     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
630   }
631 
632   bool isVISrc_512B32() const {
633     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
634   }
635 
636   bool isVISrc_512B16() const {
637     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
638   }
639 
640   bool isVISrc_512V2B16() const {
641     return isVISrc_512B16();
642   }
643 
644   bool isVISrc_512F32() const {
645     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
646   }
647 
648   bool isVISrc_512F16() const {
649     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
650   }
651 
652   bool isVISrc_512V2F16() const {
653     return isVISrc_512F16() || isVISrc_512B32();
654   }
655 
656   bool isVISrc_1024B32() const {
657     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
658   }
659 
660   bool isVISrc_1024B16() const {
661     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
662   }
663 
664   bool isVISrc_1024V2B16() const {
665     return isVISrc_1024B16();
666   }
667 
668   bool isVISrc_1024F32() const {
669     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
670   }
671 
672   bool isVISrc_1024F16() const {
673     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
674   }
675 
676   bool isVISrc_1024V2F16() const {
677     return isVISrc_1024F16() || isVISrc_1024B32();
678   }
679 
680   bool isAISrcB32() const {
681     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
682   }
683 
684   bool isAISrcB16() const {
685     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
686   }
687 
688   bool isAISrcV2B16() const {
689     return isAISrcB16();
690   }
691 
692   bool isAISrcF32() const {
693     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
694   }
695 
696   bool isAISrcF16() const {
697     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
698   }
699 
700   bool isAISrcV2F16() const {
701     return isAISrcF16() || isAISrcB32();
702   }
703 
704   bool isAISrc_64B64() const {
705     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
706   }
707 
708   bool isAISrc_64F64() const {
709     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
710   }
711 
712   bool isAISrc_128B32() const {
713     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
714   }
715 
716   bool isAISrc_128B16() const {
717     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
718   }
719 
720   bool isAISrc_128V2B16() const {
721     return isAISrc_128B16();
722   }
723 
724   bool isAISrc_128F32() const {
725     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
726   }
727 
728   bool isAISrc_128F16() const {
729     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
730   }
731 
732   bool isAISrc_128V2F16() const {
733     return isAISrc_128F16() || isAISrc_128B32();
734   }
735 
736   bool isVISrc_128F16() const {
737     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
738   }
739 
740   bool isVISrc_128V2F16() const {
741     return isVISrc_128F16() || isVISrc_128B32();
742   }
743 
744   bool isAISrc_256B64() const {
745     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
746   }
747 
748   bool isAISrc_256F64() const {
749     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
750   }
751 
752   bool isAISrc_512B32() const {
753     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
754   }
755 
756   bool isAISrc_512B16() const {
757     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
758   }
759 
760   bool isAISrc_512V2B16() const {
761     return isAISrc_512B16();
762   }
763 
764   bool isAISrc_512F32() const {
765     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
766   }
767 
768   bool isAISrc_512F16() const {
769     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
770   }
771 
772   bool isAISrc_512V2F16() const {
773     return isAISrc_512F16() || isAISrc_512B32();
774   }
775 
776   bool isAISrc_1024B32() const {
777     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
778   }
779 
780   bool isAISrc_1024B16() const {
781     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
782   }
783 
784   bool isAISrc_1024V2B16() const {
785     return isAISrc_1024B16();
786   }
787 
788   bool isAISrc_1024F32() const {
789     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
790   }
791 
792   bool isAISrc_1024F16() const {
793     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
794   }
795 
796   bool isAISrc_1024V2F16() const {
797     return isAISrc_1024F16() || isAISrc_1024B32();
798   }
799 
800   bool isKImmFP32() const {
801     return isLiteralImm(MVT::f32);
802   }
803 
804   bool isKImmFP16() const {
805     return isLiteralImm(MVT::f16);
806   }
807 
808   bool isMem() const override {
809     return false;
810   }
811 
812   bool isExpr() const {
813     return Kind == Expression;
814   }
815 
816   bool isSoppBrTarget() const {
817     return isExpr() || isImm();
818   }
819 
820   bool isSWaitCnt() const;
821   bool isDepCtr() const;
822   bool isHwreg() const;
823   bool isSendMsg() const;
824   bool isSwizzle() const;
825   bool isSMRDOffset8() const;
826   bool isSMEMOffset() const;
827   bool isSMRDLiteralOffset() const;
828   bool isDPP8() const;
829   bool isDPPCtrl() const;
830   bool isBLGP() const;
831   bool isCBSZ() const;
832   bool isABID() const;
833   bool isGPRIdxMode() const;
834   bool isS16Imm() const;
835   bool isU16Imm() const;
836   bool isEndpgm() const;
837 
838   StringRef getExpressionAsToken() const {
839     assert(isExpr());
840     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
841     return S->getSymbol().getName();
842   }
843 
844   StringRef getToken() const {
845     assert(isToken());
846 
847     if (Kind == Expression)
848       return getExpressionAsToken();
849 
850     return StringRef(Tok.Data, Tok.Length);
851   }
852 
853   int64_t getImm() const {
854     assert(isImm());
855     return Imm.Val;
856   }
857 
858   void setImm(int64_t Val) {
859     assert(isImm());
860     Imm.Val = Val;
861   }
862 
863   ImmTy getImmTy() const {
864     assert(isImm());
865     return Imm.Type;
866   }
867 
868   unsigned getReg() const override {
869     assert(isRegKind());
870     return Reg.RegNo;
871   }
872 
873   SMLoc getStartLoc() const override {
874     return StartLoc;
875   }
876 
877   SMLoc getEndLoc() const override {
878     return EndLoc;
879   }
880 
881   SMRange getLocRange() const {
882     return SMRange(StartLoc, EndLoc);
883   }
884 
885   Modifiers getModifiers() const {
886     assert(isRegKind() || isImmTy(ImmTyNone));
887     return isRegKind() ? Reg.Mods : Imm.Mods;
888   }
889 
890   void setModifiers(Modifiers Mods) {
891     assert(isRegKind() || isImmTy(ImmTyNone));
892     if (isRegKind())
893       Reg.Mods = Mods;
894     else
895       Imm.Mods = Mods;
896   }
897 
898   bool hasModifiers() const {
899     return getModifiers().hasModifiers();
900   }
901 
902   bool hasFPModifiers() const {
903     return getModifiers().hasFPModifiers();
904   }
905 
906   bool hasIntModifiers() const {
907     return getModifiers().hasIntModifiers();
908   }
909 
910   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
911 
912   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
913 
914   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
915 
916   template <unsigned Bitwidth>
917   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
918 
919   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
920     addKImmFPOperands<16>(Inst, N);
921   }
922 
923   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
924     addKImmFPOperands<32>(Inst, N);
925   }
926 
927   void addRegOperands(MCInst &Inst, unsigned N) const;
928 
929   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
930     addRegOperands(Inst, N);
931   }
932 
933   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
934     if (isRegKind())
935       addRegOperands(Inst, N);
936     else if (isExpr())
937       Inst.addOperand(MCOperand::createExpr(Expr));
938     else
939       addImmOperands(Inst, N);
940   }
941 
942   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
943     Modifiers Mods = getModifiers();
944     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
945     if (isRegKind()) {
946       addRegOperands(Inst, N);
947     } else {
948       addImmOperands(Inst, N, false);
949     }
950   }
951 
952   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
953     assert(!hasIntModifiers());
954     addRegOrImmWithInputModsOperands(Inst, N);
955   }
956 
957   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
958     assert(!hasFPModifiers());
959     addRegOrImmWithInputModsOperands(Inst, N);
960   }
961 
962   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
963     Modifiers Mods = getModifiers();
964     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
965     assert(isRegKind());
966     addRegOperands(Inst, N);
967   }
968 
969   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
970     assert(!hasIntModifiers());
971     addRegWithInputModsOperands(Inst, N);
972   }
973 
974   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
975     assert(!hasFPModifiers());
976     addRegWithInputModsOperands(Inst, N);
977   }
978 
979   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
980     if (isImm())
981       addImmOperands(Inst, N);
982     else {
983       assert(isExpr());
984       Inst.addOperand(MCOperand::createExpr(Expr));
985     }
986   }
987 
988   static void printImmTy(raw_ostream& OS, ImmTy Type) {
989     switch (Type) {
990     case ImmTyNone: OS << "None"; break;
991     case ImmTyGDS: OS << "GDS"; break;
992     case ImmTyLDS: OS << "LDS"; break;
993     case ImmTyOffen: OS << "Offen"; break;
994     case ImmTyIdxen: OS << "Idxen"; break;
995     case ImmTyAddr64: OS << "Addr64"; break;
996     case ImmTyOffset: OS << "Offset"; break;
997     case ImmTyInstOffset: OS << "InstOffset"; break;
998     case ImmTyOffset0: OS << "Offset0"; break;
999     case ImmTyOffset1: OS << "Offset1"; break;
1000     case ImmTyCPol: OS << "CPol"; break;
1001     case ImmTySWZ: OS << "SWZ"; break;
1002     case ImmTyTFE: OS << "TFE"; break;
1003     case ImmTyD16: OS << "D16"; break;
1004     case ImmTyFORMAT: OS << "FORMAT"; break;
1005     case ImmTyClampSI: OS << "ClampSI"; break;
1006     case ImmTyOModSI: OS << "OModSI"; break;
1007     case ImmTyDPP8: OS << "DPP8"; break;
1008     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1009     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1010     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1011     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1012     case ImmTyDppFi: OS << "FI"; break;
1013     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1014     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1015     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1016     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1017     case ImmTyDMask: OS << "DMask"; break;
1018     case ImmTyDim: OS << "Dim"; break;
1019     case ImmTyUNorm: OS << "UNorm"; break;
1020     case ImmTyDA: OS << "DA"; break;
1021     case ImmTyR128A16: OS << "R128A16"; break;
1022     case ImmTyA16: OS << "A16"; break;
1023     case ImmTyLWE: OS << "LWE"; break;
1024     case ImmTyOff: OS << "Off"; break;
1025     case ImmTyExpTgt: OS << "ExpTgt"; break;
1026     case ImmTyExpCompr: OS << "ExpCompr"; break;
1027     case ImmTyExpVM: OS << "ExpVM"; break;
1028     case ImmTyHwreg: OS << "Hwreg"; break;
1029     case ImmTySendMsg: OS << "SendMsg"; break;
1030     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1031     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1032     case ImmTyAttrChan: OS << "AttrChan"; break;
1033     case ImmTyOpSel: OS << "OpSel"; break;
1034     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1035     case ImmTyNegLo: OS << "NegLo"; break;
1036     case ImmTyNegHi: OS << "NegHi"; break;
1037     case ImmTySwizzle: OS << "Swizzle"; break;
1038     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1039     case ImmTyHigh: OS << "High"; break;
1040     case ImmTyBLGP: OS << "BLGP"; break;
1041     case ImmTyCBSZ: OS << "CBSZ"; break;
1042     case ImmTyABID: OS << "ABID"; break;
1043     case ImmTyEndpgm: OS << "Endpgm"; break;
1044     }
1045   }
1046 
1047   void print(raw_ostream &OS) const override {
1048     switch (Kind) {
1049     case Register:
1050       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1051       break;
1052     case Immediate:
1053       OS << '<' << getImm();
1054       if (getImmTy() != ImmTyNone) {
1055         OS << " type: "; printImmTy(OS, getImmTy());
1056       }
1057       OS << " mods: " << Imm.Mods << '>';
1058       break;
1059     case Token:
1060       OS << '\'' << getToken() << '\'';
1061       break;
1062     case Expression:
1063       OS << "<expr " << *Expr << '>';
1064       break;
1065     }
1066   }
1067 
1068   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1069                                       int64_t Val, SMLoc Loc,
1070                                       ImmTy Type = ImmTyNone,
1071                                       bool IsFPImm = false) {
1072     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1073     Op->Imm.Val = Val;
1074     Op->Imm.IsFPImm = IsFPImm;
1075     Op->Imm.Kind = ImmKindTyNone;
1076     Op->Imm.Type = Type;
1077     Op->Imm.Mods = Modifiers();
1078     Op->StartLoc = Loc;
1079     Op->EndLoc = Loc;
1080     return Op;
1081   }
1082 
1083   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1084                                         StringRef Str, SMLoc Loc,
1085                                         bool HasExplicitEncodingSize = true) {
1086     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1087     Res->Tok.Data = Str.data();
1088     Res->Tok.Length = Str.size();
1089     Res->StartLoc = Loc;
1090     Res->EndLoc = Loc;
1091     return Res;
1092   }
1093 
1094   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1095                                       unsigned RegNo, SMLoc S,
1096                                       SMLoc E) {
1097     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1098     Op->Reg.RegNo = RegNo;
1099     Op->Reg.Mods = Modifiers();
1100     Op->StartLoc = S;
1101     Op->EndLoc = E;
1102     return Op;
1103   }
1104 
1105   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1106                                        const class MCExpr *Expr, SMLoc S) {
1107     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1108     Op->Expr = Expr;
1109     Op->StartLoc = S;
1110     Op->EndLoc = S;
1111     return Op;
1112   }
1113 };
1114 
1115 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1116   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1117   return OS;
1118 }
1119 
1120 //===----------------------------------------------------------------------===//
1121 // AsmParser
1122 //===----------------------------------------------------------------------===//
1123 
1124 // Holds info related to the current kernel, e.g. count of SGPRs used.
1125 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1126 // .amdgpu_hsa_kernel or at EOF.
1127 class KernelScopeInfo {
1128   int SgprIndexUnusedMin = -1;
1129   int VgprIndexUnusedMin = -1;
1130   int AgprIndexUnusedMin = -1;
1131   MCContext *Ctx = nullptr;
1132   MCSubtargetInfo const *MSTI = nullptr;
1133 
1134   void usesSgprAt(int i) {
1135     if (i >= SgprIndexUnusedMin) {
1136       SgprIndexUnusedMin = ++i;
1137       if (Ctx) {
1138         MCSymbol* const Sym =
1139           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1140         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1141       }
1142     }
1143   }
1144 
1145   void usesVgprAt(int i) {
1146     if (i >= VgprIndexUnusedMin) {
1147       VgprIndexUnusedMin = ++i;
1148       if (Ctx) {
1149         MCSymbol* const Sym =
1150           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1151         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1152                                          VgprIndexUnusedMin);
1153         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1154       }
1155     }
1156   }
1157 
1158   void usesAgprAt(int i) {
1159     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1160     if (!hasMAIInsts(*MSTI))
1161       return;
1162 
1163     if (i >= AgprIndexUnusedMin) {
1164       AgprIndexUnusedMin = ++i;
1165       if (Ctx) {
1166         MCSymbol* const Sym =
1167           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1168         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1169 
1170         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1171         MCSymbol* const vSym =
1172           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1173         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1174                                          VgprIndexUnusedMin);
1175         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1176       }
1177     }
1178   }
1179 
1180 public:
1181   KernelScopeInfo() = default;
1182 
1183   void initialize(MCContext &Context) {
1184     Ctx = &Context;
1185     MSTI = Ctx->getSubtargetInfo();
1186 
1187     usesSgprAt(SgprIndexUnusedMin = -1);
1188     usesVgprAt(VgprIndexUnusedMin = -1);
1189     if (hasMAIInsts(*MSTI)) {
1190       usesAgprAt(AgprIndexUnusedMin = -1);
1191     }
1192   }
1193 
1194   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1195                     unsigned RegWidth) {
1196     switch (RegKind) {
1197     case IS_SGPR:
1198       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1199       break;
1200     case IS_AGPR:
1201       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1202       break;
1203     case IS_VGPR:
1204       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1205       break;
1206     default:
1207       break;
1208     }
1209   }
1210 };
1211 
1212 class AMDGPUAsmParser : public MCTargetAsmParser {
1213   MCAsmParser &Parser;
1214 
1215   // Number of extra operands parsed after the first optional operand.
1216   // This may be necessary to skip hardcoded mandatory operands.
1217   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1218 
1219   unsigned ForcedEncodingSize = 0;
1220   bool ForcedDPP = false;
1221   bool ForcedSDWA = false;
1222   KernelScopeInfo KernelScope;
1223   unsigned CPolSeen;
1224 
1225   /// @name Auto-generated Match Functions
1226   /// {
1227 
1228 #define GET_ASSEMBLER_HEADER
1229 #include "AMDGPUGenAsmMatcher.inc"
1230 
1231   /// }
1232 
1233 private:
1234   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1235   bool OutOfRangeError(SMRange Range);
1236   /// Calculate VGPR/SGPR blocks required for given target, reserved
1237   /// registers, and user-specified NextFreeXGPR values.
1238   ///
1239   /// \param Features [in] Target features, used for bug corrections.
1240   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1241   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1242   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1243   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1244   /// descriptor field, if valid.
1245   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1246   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1247   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1248   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1249   /// \param VGPRBlocks [out] Result VGPR block count.
1250   /// \param SGPRBlocks [out] Result SGPR block count.
1251   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1252                           bool FlatScrUsed, bool XNACKUsed,
1253                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1254                           SMRange VGPRRange, unsigned NextFreeSGPR,
1255                           SMRange SGPRRange, unsigned &VGPRBlocks,
1256                           unsigned &SGPRBlocks);
1257   bool ParseDirectiveAMDGCNTarget();
1258   bool ParseDirectiveAMDHSAKernel();
1259   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1260   bool ParseDirectiveHSACodeObjectVersion();
1261   bool ParseDirectiveHSACodeObjectISA();
1262   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1263   bool ParseDirectiveAMDKernelCodeT();
1264   // TODO: Possibly make subtargetHasRegister const.
1265   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1266   bool ParseDirectiveAMDGPUHsaKernel();
1267 
1268   bool ParseDirectiveISAVersion();
1269   bool ParseDirectiveHSAMetadata();
1270   bool ParseDirectivePALMetadataBegin();
1271   bool ParseDirectivePALMetadata();
1272   bool ParseDirectiveAMDGPULDS();
1273 
1274   /// Common code to parse out a block of text (typically YAML) between start and
1275   /// end directives.
1276   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1277                            const char *AssemblerDirectiveEnd,
1278                            std::string &CollectString);
1279 
1280   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1281                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1282   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1283                            unsigned &RegNum, unsigned &RegWidth,
1284                            bool RestoreOnFailure = false);
1285   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1286                            unsigned &RegNum, unsigned &RegWidth,
1287                            SmallVectorImpl<AsmToken> &Tokens);
1288   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1289                            unsigned &RegWidth,
1290                            SmallVectorImpl<AsmToken> &Tokens);
1291   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1292                            unsigned &RegWidth,
1293                            SmallVectorImpl<AsmToken> &Tokens);
1294   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1295                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1296   bool ParseRegRange(unsigned& Num, unsigned& Width);
1297   unsigned getRegularReg(RegisterKind RegKind,
1298                          unsigned RegNum,
1299                          unsigned RegWidth,
1300                          SMLoc Loc);
1301 
1302   bool isRegister();
1303   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1304   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1305   void initializeGprCountSymbol(RegisterKind RegKind);
1306   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1307                              unsigned RegWidth);
1308   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1309                     bool IsAtomic, bool IsLds = false);
1310   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1311                  bool IsGdsHardcoded);
1312 
1313 public:
1314   enum AMDGPUMatchResultTy {
1315     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1316   };
1317   enum OperandMode {
1318     OperandMode_Default,
1319     OperandMode_NSA,
1320   };
1321 
1322   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1323 
1324   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1325                const MCInstrInfo &MII,
1326                const MCTargetOptions &Options)
1327       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1328     MCAsmParserExtension::Initialize(Parser);
1329 
1330     if (getFeatureBits().none()) {
1331       // Set default features.
1332       copySTI().ToggleFeature("southern-islands");
1333     }
1334 
1335     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1336 
1337     {
1338       // TODO: make those pre-defined variables read-only.
1339       // Currently there is none suitable machinery in the core llvm-mc for this.
1340       // MCSymbol::isRedefinable is intended for another purpose, and
1341       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1342       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1343       MCContext &Ctx = getContext();
1344       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1345         MCSymbol *Sym =
1346             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1347         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1348         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1349         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1350         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1351         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1352       } else {
1353         MCSymbol *Sym =
1354             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1355         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1356         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1357         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1358         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1359         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1360       }
1361       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1362         initializeGprCountSymbol(IS_VGPR);
1363         initializeGprCountSymbol(IS_SGPR);
1364       } else
1365         KernelScope.initialize(getContext());
1366     }
1367   }
1368 
1369   bool hasMIMG_R128() const {
1370     return AMDGPU::hasMIMG_R128(getSTI());
1371   }
1372 
1373   bool hasPackedD16() const {
1374     return AMDGPU::hasPackedD16(getSTI());
1375   }
1376 
1377   bool hasGFX10A16() const {
1378     return AMDGPU::hasGFX10A16(getSTI());
1379   }
1380 
1381   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1382 
1383   bool isSI() const {
1384     return AMDGPU::isSI(getSTI());
1385   }
1386 
1387   bool isCI() const {
1388     return AMDGPU::isCI(getSTI());
1389   }
1390 
1391   bool isVI() const {
1392     return AMDGPU::isVI(getSTI());
1393   }
1394 
1395   bool isGFX9() const {
1396     return AMDGPU::isGFX9(getSTI());
1397   }
1398 
1399   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1400   bool isGFX90A() const {
1401     return AMDGPU::isGFX90A(getSTI());
1402   }
1403 
1404   bool isGFX940() const {
1405     return AMDGPU::isGFX940(getSTI());
1406   }
1407 
1408   bool isGFX9Plus() const {
1409     return AMDGPU::isGFX9Plus(getSTI());
1410   }
1411 
1412   bool isGFX10() const {
1413     return AMDGPU::isGFX10(getSTI());
1414   }
1415 
1416   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1417 
1418   bool isGFX10_BEncoding() const {
1419     return AMDGPU::isGFX10_BEncoding(getSTI());
1420   }
1421 
1422   bool hasInv2PiInlineImm() const {
1423     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1424   }
1425 
1426   bool hasFlatOffsets() const {
1427     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1428   }
1429 
1430   bool hasArchitectedFlatScratch() const {
1431     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1432   }
1433 
1434   bool hasSGPR102_SGPR103() const {
1435     return !isVI() && !isGFX9();
1436   }
1437 
1438   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1439 
1440   bool hasIntClamp() const {
1441     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1442   }
1443 
1444   AMDGPUTargetStreamer &getTargetStreamer() {
1445     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1446     return static_cast<AMDGPUTargetStreamer &>(TS);
1447   }
1448 
1449   const MCRegisterInfo *getMRI() const {
1450     // We need this const_cast because for some reason getContext() is not const
1451     // in MCAsmParser.
1452     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1453   }
1454 
1455   const MCInstrInfo *getMII() const {
1456     return &MII;
1457   }
1458 
1459   const FeatureBitset &getFeatureBits() const {
1460     return getSTI().getFeatureBits();
1461   }
1462 
1463   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1464   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1465   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1466 
1467   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1468   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1469   bool isForcedDPP() const { return ForcedDPP; }
1470   bool isForcedSDWA() const { return ForcedSDWA; }
1471   ArrayRef<unsigned> getMatchedVariants() const;
1472   StringRef getMatchedVariantName() const;
1473 
1474   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1475   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1476                      bool RestoreOnFailure);
1477   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1478   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1479                                         SMLoc &EndLoc) override;
1480   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1481   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1482                                       unsigned Kind) override;
1483   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1484                                OperandVector &Operands, MCStreamer &Out,
1485                                uint64_t &ErrorInfo,
1486                                bool MatchingInlineAsm) override;
1487   bool ParseDirective(AsmToken DirectiveID) override;
1488   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1489                                     OperandMode Mode = OperandMode_Default);
1490   StringRef parseMnemonicSuffix(StringRef Name);
1491   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1492                         SMLoc NameLoc, OperandVector &Operands) override;
1493   //bool ProcessInstruction(MCInst &Inst);
1494 
1495   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1496 
1497   OperandMatchResultTy
1498   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1499                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1500                      bool (*ConvertResult)(int64_t &) = nullptr);
1501 
1502   OperandMatchResultTy
1503   parseOperandArrayWithPrefix(const char *Prefix,
1504                               OperandVector &Operands,
1505                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1506                               bool (*ConvertResult)(int64_t&) = nullptr);
1507 
1508   OperandMatchResultTy
1509   parseNamedBit(StringRef Name, OperandVector &Operands,
1510                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1511   OperandMatchResultTy parseCPol(OperandVector &Operands);
1512   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1513                                              StringRef &Value,
1514                                              SMLoc &StringLoc);
1515 
1516   bool isModifier();
1517   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1518   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1519   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1520   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1521   bool parseSP3NegModifier();
1522   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1523   OperandMatchResultTy parseReg(OperandVector &Operands);
1524   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1525   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1526   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1527   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1528   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1529   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1530   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1531   OperandMatchResultTy parseUfmt(int64_t &Format);
1532   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1533   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1534   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1535   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1536   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1537   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1538   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1539 
1540   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1541   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1542   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1543   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1544 
1545   bool parseCnt(int64_t &IntVal);
1546   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1547 
1548   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1549   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1550   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1551 
1552   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1553 
1554 private:
1555   struct OperandInfoTy {
1556     SMLoc Loc;
1557     int64_t Id;
1558     bool IsSymbolic = false;
1559     bool IsDefined = false;
1560 
1561     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1562   };
1563 
1564   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1565   bool validateSendMsg(const OperandInfoTy &Msg,
1566                        const OperandInfoTy &Op,
1567                        const OperandInfoTy &Stream);
1568 
1569   bool parseHwregBody(OperandInfoTy &HwReg,
1570                       OperandInfoTy &Offset,
1571                       OperandInfoTy &Width);
1572   bool validateHwreg(const OperandInfoTy &HwReg,
1573                      const OperandInfoTy &Offset,
1574                      const OperandInfoTy &Width);
1575 
1576   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1577   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1578   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1579 
1580   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1581                       const OperandVector &Operands) const;
1582   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1583   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1584   SMLoc getLitLoc(const OperandVector &Operands) const;
1585   SMLoc getConstLoc(const OperandVector &Operands) const;
1586 
1587   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1588   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1589   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1590   bool validateSOPLiteral(const MCInst &Inst) const;
1591   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1592   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1593   bool validateIntClampSupported(const MCInst &Inst);
1594   bool validateMIMGAtomicDMask(const MCInst &Inst);
1595   bool validateMIMGGatherDMask(const MCInst &Inst);
1596   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1597   Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1598   bool validateMIMGAddrSize(const MCInst &Inst);
1599   bool validateMIMGD16(const MCInst &Inst);
1600   bool validateMIMGDim(const MCInst &Inst);
1601   bool validateMIMGMSAA(const MCInst &Inst);
1602   bool validateOpSel(const MCInst &Inst);
1603   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1604   bool validateVccOperand(unsigned Reg) const;
1605   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1606   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1607   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1608   bool validateAGPRLdSt(const MCInst &Inst) const;
1609   bool validateVGPRAlign(const MCInst &Inst) const;
1610   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1611   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1612   bool validateDivScale(const MCInst &Inst);
1613   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1614                              const SMLoc &IDLoc);
1615   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1616   unsigned getConstantBusLimit(unsigned Opcode) const;
1617   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1618   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1619   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1620 
1621   bool isSupportedMnemo(StringRef Mnemo,
1622                         const FeatureBitset &FBS);
1623   bool isSupportedMnemo(StringRef Mnemo,
1624                         const FeatureBitset &FBS,
1625                         ArrayRef<unsigned> Variants);
1626   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1627 
1628   bool isId(const StringRef Id) const;
1629   bool isId(const AsmToken &Token, const StringRef Id) const;
1630   bool isToken(const AsmToken::TokenKind Kind) const;
1631   bool trySkipId(const StringRef Id);
1632   bool trySkipId(const StringRef Pref, const StringRef Id);
1633   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1634   bool trySkipToken(const AsmToken::TokenKind Kind);
1635   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1636   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1637   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1638 
1639   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1640   AsmToken::TokenKind getTokenKind() const;
1641   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1642   bool parseExpr(OperandVector &Operands);
1643   StringRef getTokenStr() const;
1644   AsmToken peekToken();
1645   AsmToken getToken() const;
1646   SMLoc getLoc() const;
1647   void lex();
1648 
1649 public:
1650   void onBeginOfFile() override;
1651 
1652   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1653   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1654 
1655   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1656   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1657   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1658   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1659   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1660   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1661 
1662   bool parseSwizzleOperand(int64_t &Op,
1663                            const unsigned MinVal,
1664                            const unsigned MaxVal,
1665                            const StringRef ErrMsg,
1666                            SMLoc &Loc);
1667   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1668                             const unsigned MinVal,
1669                             const unsigned MaxVal,
1670                             const StringRef ErrMsg);
1671   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1672   bool parseSwizzleOffset(int64_t &Imm);
1673   bool parseSwizzleMacro(int64_t &Imm);
1674   bool parseSwizzleQuadPerm(int64_t &Imm);
1675   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1676   bool parseSwizzleBroadcast(int64_t &Imm);
1677   bool parseSwizzleSwap(int64_t &Imm);
1678   bool parseSwizzleReverse(int64_t &Imm);
1679 
1680   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1681   int64_t parseGPRIdxMacro();
1682 
1683   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1684   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1685   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1686   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1687 
1688   AMDGPUOperand::Ptr defaultCPol() const;
1689 
1690   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1691   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1692   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1693   AMDGPUOperand::Ptr defaultFlatOffset() const;
1694 
1695   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1696 
1697   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1698                OptionalImmIndexMap &OptionalIdx);
1699   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1700   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1701   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1702   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1703                 OptionalImmIndexMap &OptionalIdx);
1704 
1705   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1706 
1707   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1708                bool IsAtomic = false);
1709   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1710   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1711 
1712   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1713 
1714   bool parseDimId(unsigned &Encoding);
1715   OperandMatchResultTy parseDim(OperandVector &Operands);
1716   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1717   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1718   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1719   int64_t parseDPPCtrlSel(StringRef Ctrl);
1720   int64_t parseDPPCtrlPerm();
1721   AMDGPUOperand::Ptr defaultRowMask() const;
1722   AMDGPUOperand::Ptr defaultBankMask() const;
1723   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1724   AMDGPUOperand::Ptr defaultFI() const;
1725   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1726   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1727 
1728   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1729                                     AMDGPUOperand::ImmTy Type);
1730   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1731   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1732   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1733   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1734   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1735   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1736   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1737                uint64_t BasicInstType,
1738                bool SkipDstVcc = false,
1739                bool SkipSrcVcc = false);
1740 
1741   AMDGPUOperand::Ptr defaultBLGP() const;
1742   AMDGPUOperand::Ptr defaultCBSZ() const;
1743   AMDGPUOperand::Ptr defaultABID() const;
1744 
1745   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1746   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1747 };
1748 
1749 struct OptionalOperand {
1750   const char *Name;
1751   AMDGPUOperand::ImmTy Type;
1752   bool IsBit;
1753   bool (*ConvertResult)(int64_t&);
1754 };
1755 
1756 } // end anonymous namespace
1757 
1758 // May be called with integer type with equivalent bitwidth.
1759 static const fltSemantics *getFltSemantics(unsigned Size) {
1760   switch (Size) {
1761   case 4:
1762     return &APFloat::IEEEsingle();
1763   case 8:
1764     return &APFloat::IEEEdouble();
1765   case 2:
1766     return &APFloat::IEEEhalf();
1767   default:
1768     llvm_unreachable("unsupported fp type");
1769   }
1770 }
1771 
1772 static const fltSemantics *getFltSemantics(MVT VT) {
1773   return getFltSemantics(VT.getSizeInBits() / 8);
1774 }
1775 
1776 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1777   switch (OperandType) {
1778   case AMDGPU::OPERAND_REG_IMM_INT32:
1779   case AMDGPU::OPERAND_REG_IMM_FP32:
1780   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1781   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1782   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1783   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1784   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1785   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1786   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1787   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1788   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1789   case AMDGPU::OPERAND_KIMM32:
1790     return &APFloat::IEEEsingle();
1791   case AMDGPU::OPERAND_REG_IMM_INT64:
1792   case AMDGPU::OPERAND_REG_IMM_FP64:
1793   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1794   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1795   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1796     return &APFloat::IEEEdouble();
1797   case AMDGPU::OPERAND_REG_IMM_INT16:
1798   case AMDGPU::OPERAND_REG_IMM_FP16:
1799   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1800   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1801   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1802   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1803   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1804   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1805   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1806   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1807   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1808   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1809   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1810   case AMDGPU::OPERAND_KIMM16:
1811     return &APFloat::IEEEhalf();
1812   default:
1813     llvm_unreachable("unsupported fp type");
1814   }
1815 }
1816 
1817 //===----------------------------------------------------------------------===//
1818 // Operand
1819 //===----------------------------------------------------------------------===//
1820 
1821 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1822   bool Lost;
1823 
1824   // Convert literal to single precision
1825   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1826                                                APFloat::rmNearestTiesToEven,
1827                                                &Lost);
1828   // We allow precision lost but not overflow or underflow
1829   if (Status != APFloat::opOK &&
1830       Lost &&
1831       ((Status & APFloat::opOverflow)  != 0 ||
1832        (Status & APFloat::opUnderflow) != 0)) {
1833     return false;
1834   }
1835 
1836   return true;
1837 }
1838 
1839 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1840   return isUIntN(Size, Val) || isIntN(Size, Val);
1841 }
1842 
1843 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1844   if (VT.getScalarType() == MVT::i16) {
1845     // FP immediate values are broken.
1846     return isInlinableIntLiteral(Val);
1847   }
1848 
1849   // f16/v2f16 operands work correctly for all values.
1850   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1851 }
1852 
1853 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1854 
1855   // This is a hack to enable named inline values like
1856   // shared_base with both 32-bit and 64-bit operands.
1857   // Note that these values are defined as
1858   // 32-bit operands only.
1859   if (isInlineValue()) {
1860     return true;
1861   }
1862 
1863   if (!isImmTy(ImmTyNone)) {
1864     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1865     return false;
1866   }
1867   // TODO: We should avoid using host float here. It would be better to
1868   // check the float bit values which is what a few other places do.
1869   // We've had bot failures before due to weird NaN support on mips hosts.
1870 
1871   APInt Literal(64, Imm.Val);
1872 
1873   if (Imm.IsFPImm) { // We got fp literal token
1874     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1875       return AMDGPU::isInlinableLiteral64(Imm.Val,
1876                                           AsmParser->hasInv2PiInlineImm());
1877     }
1878 
1879     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1880     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1881       return false;
1882 
1883     if (type.getScalarSizeInBits() == 16) {
1884       return isInlineableLiteralOp16(
1885         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1886         type, AsmParser->hasInv2PiInlineImm());
1887     }
1888 
1889     // Check if single precision literal is inlinable
1890     return AMDGPU::isInlinableLiteral32(
1891       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1892       AsmParser->hasInv2PiInlineImm());
1893   }
1894 
1895   // We got int literal token.
1896   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1897     return AMDGPU::isInlinableLiteral64(Imm.Val,
1898                                         AsmParser->hasInv2PiInlineImm());
1899   }
1900 
1901   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1902     return false;
1903   }
1904 
1905   if (type.getScalarSizeInBits() == 16) {
1906     return isInlineableLiteralOp16(
1907       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1908       type, AsmParser->hasInv2PiInlineImm());
1909   }
1910 
1911   return AMDGPU::isInlinableLiteral32(
1912     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1913     AsmParser->hasInv2PiInlineImm());
1914 }
1915 
1916 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1917   // Check that this immediate can be added as literal
1918   if (!isImmTy(ImmTyNone)) {
1919     return false;
1920   }
1921 
1922   if (!Imm.IsFPImm) {
1923     // We got int literal token.
1924 
1925     if (type == MVT::f64 && hasFPModifiers()) {
1926       // Cannot apply fp modifiers to int literals preserving the same semantics
1927       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1928       // disable these cases.
1929       return false;
1930     }
1931 
1932     unsigned Size = type.getSizeInBits();
1933     if (Size == 64)
1934       Size = 32;
1935 
1936     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1937     // types.
1938     return isSafeTruncation(Imm.Val, Size);
1939   }
1940 
1941   // We got fp literal token
1942   if (type == MVT::f64) { // Expected 64-bit fp operand
1943     // We would set low 64-bits of literal to zeroes but we accept this literals
1944     return true;
1945   }
1946 
1947   if (type == MVT::i64) { // Expected 64-bit int operand
1948     // We don't allow fp literals in 64-bit integer instructions. It is
1949     // unclear how we should encode them.
1950     return false;
1951   }
1952 
1953   // We allow fp literals with f16x2 operands assuming that the specified
1954   // literal goes into the lower half and the upper half is zero. We also
1955   // require that the literal may be losslessly converted to f16.
1956   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1957                      (type == MVT::v2i16)? MVT::i16 :
1958                      (type == MVT::v2f32)? MVT::f32 : type;
1959 
1960   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1961   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1962 }
1963 
1964 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1965   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1966 }
1967 
1968 bool AMDGPUOperand::isVRegWithInputMods() const {
1969   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1970          // GFX90A allows DPP on 64-bit operands.
1971          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1972           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1973 }
1974 
1975 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1976   if (AsmParser->isVI())
1977     return isVReg32();
1978   else if (AsmParser->isGFX9Plus())
1979     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1980   else
1981     return false;
1982 }
1983 
1984 bool AMDGPUOperand::isSDWAFP16Operand() const {
1985   return isSDWAOperand(MVT::f16);
1986 }
1987 
1988 bool AMDGPUOperand::isSDWAFP32Operand() const {
1989   return isSDWAOperand(MVT::f32);
1990 }
1991 
1992 bool AMDGPUOperand::isSDWAInt16Operand() const {
1993   return isSDWAOperand(MVT::i16);
1994 }
1995 
1996 bool AMDGPUOperand::isSDWAInt32Operand() const {
1997   return isSDWAOperand(MVT::i32);
1998 }
1999 
2000 bool AMDGPUOperand::isBoolReg() const {
2001   auto FB = AsmParser->getFeatureBits();
2002   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2003                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2004 }
2005 
2006 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2007 {
2008   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2009   assert(Size == 2 || Size == 4 || Size == 8);
2010 
2011   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2012 
2013   if (Imm.Mods.Abs) {
2014     Val &= ~FpSignMask;
2015   }
2016   if (Imm.Mods.Neg) {
2017     Val ^= FpSignMask;
2018   }
2019 
2020   return Val;
2021 }
2022 
2023 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2024   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2025                              Inst.getNumOperands())) {
2026     addLiteralImmOperand(Inst, Imm.Val,
2027                          ApplyModifiers &
2028                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2029   } else {
2030     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2031     Inst.addOperand(MCOperand::createImm(Imm.Val));
2032     setImmKindNone();
2033   }
2034 }
2035 
2036 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2037   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2038   auto OpNum = Inst.getNumOperands();
2039   // Check that this operand accepts literals
2040   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2041 
2042   if (ApplyModifiers) {
2043     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2044     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2045     Val = applyInputFPModifiers(Val, Size);
2046   }
2047 
2048   APInt Literal(64, Val);
2049   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2050 
2051   if (Imm.IsFPImm) { // We got fp literal token
2052     switch (OpTy) {
2053     case AMDGPU::OPERAND_REG_IMM_INT64:
2054     case AMDGPU::OPERAND_REG_IMM_FP64:
2055     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2056     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2057     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2058       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2059                                        AsmParser->hasInv2PiInlineImm())) {
2060         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2061         setImmKindConst();
2062         return;
2063       }
2064 
2065       // Non-inlineable
2066       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2067         // For fp operands we check if low 32 bits are zeros
2068         if (Literal.getLoBits(32) != 0) {
2069           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2070           "Can't encode literal as exact 64-bit floating-point operand. "
2071           "Low 32-bits will be set to zero");
2072         }
2073 
2074         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2075         setImmKindLiteral();
2076         return;
2077       }
2078 
2079       // We don't allow fp literals in 64-bit integer instructions. It is
2080       // unclear how we should encode them. This case should be checked earlier
2081       // in predicate methods (isLiteralImm())
2082       llvm_unreachable("fp literal in 64-bit integer instruction.");
2083 
2084     case AMDGPU::OPERAND_REG_IMM_INT32:
2085     case AMDGPU::OPERAND_REG_IMM_FP32:
2086     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2087     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2088     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2089     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2090     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2091     case AMDGPU::OPERAND_REG_IMM_INT16:
2092     case AMDGPU::OPERAND_REG_IMM_FP16:
2093     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2094     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2095     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2096     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2097     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2098     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2099     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2100     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2101     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2102     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2103     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2104     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2105     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2106     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2107     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2108     case AMDGPU::OPERAND_KIMM32:
2109     case AMDGPU::OPERAND_KIMM16: {
2110       bool lost;
2111       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2112       // Convert literal to single precision
2113       FPLiteral.convert(*getOpFltSemantics(OpTy),
2114                         APFloat::rmNearestTiesToEven, &lost);
2115       // We allow precision lost but not overflow or underflow. This should be
2116       // checked earlier in isLiteralImm()
2117 
2118       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2119       Inst.addOperand(MCOperand::createImm(ImmVal));
2120       setImmKindLiteral();
2121       return;
2122     }
2123     default:
2124       llvm_unreachable("invalid operand size");
2125     }
2126 
2127     return;
2128   }
2129 
2130   // We got int literal token.
2131   // Only sign extend inline immediates.
2132   switch (OpTy) {
2133   case AMDGPU::OPERAND_REG_IMM_INT32:
2134   case AMDGPU::OPERAND_REG_IMM_FP32:
2135   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2136   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2137   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2138   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2139   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2140   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2141   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2142   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2143   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2144   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2145   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2146     if (isSafeTruncation(Val, 32) &&
2147         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2148                                      AsmParser->hasInv2PiInlineImm())) {
2149       Inst.addOperand(MCOperand::createImm(Val));
2150       setImmKindConst();
2151       return;
2152     }
2153 
2154     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2155     setImmKindLiteral();
2156     return;
2157 
2158   case AMDGPU::OPERAND_REG_IMM_INT64:
2159   case AMDGPU::OPERAND_REG_IMM_FP64:
2160   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2161   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2162   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2163     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2164       Inst.addOperand(MCOperand::createImm(Val));
2165       setImmKindConst();
2166       return;
2167     }
2168 
2169     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2170     setImmKindLiteral();
2171     return;
2172 
2173   case AMDGPU::OPERAND_REG_IMM_INT16:
2174   case AMDGPU::OPERAND_REG_IMM_FP16:
2175   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2176   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2177   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2178   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2179   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2180     if (isSafeTruncation(Val, 16) &&
2181         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2182                                      AsmParser->hasInv2PiInlineImm())) {
2183       Inst.addOperand(MCOperand::createImm(Val));
2184       setImmKindConst();
2185       return;
2186     }
2187 
2188     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2189     setImmKindLiteral();
2190     return;
2191 
2192   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2193   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2194   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2195   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2196     assert(isSafeTruncation(Val, 16));
2197     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2198                                         AsmParser->hasInv2PiInlineImm()));
2199 
2200     Inst.addOperand(MCOperand::createImm(Val));
2201     return;
2202   }
2203   case AMDGPU::OPERAND_KIMM32:
2204     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2205     setImmKindNone();
2206     return;
2207   case AMDGPU::OPERAND_KIMM16:
2208     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2209     setImmKindNone();
2210     return;
2211   default:
2212     llvm_unreachable("invalid operand size");
2213   }
2214 }
2215 
2216 template <unsigned Bitwidth>
2217 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2218   APInt Literal(64, Imm.Val);
2219   setImmKindNone();
2220 
2221   if (!Imm.IsFPImm) {
2222     // We got int literal token.
2223     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2224     return;
2225   }
2226 
2227   bool Lost;
2228   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2229   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2230                     APFloat::rmNearestTiesToEven, &Lost);
2231   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2232 }
2233 
2234 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2235   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2236 }
2237 
2238 static bool isInlineValue(unsigned Reg) {
2239   switch (Reg) {
2240   case AMDGPU::SRC_SHARED_BASE:
2241   case AMDGPU::SRC_SHARED_LIMIT:
2242   case AMDGPU::SRC_PRIVATE_BASE:
2243   case AMDGPU::SRC_PRIVATE_LIMIT:
2244   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2245     return true;
2246   case AMDGPU::SRC_VCCZ:
2247   case AMDGPU::SRC_EXECZ:
2248   case AMDGPU::SRC_SCC:
2249     return true;
2250   case AMDGPU::SGPR_NULL:
2251     return true;
2252   default:
2253     return false;
2254   }
2255 }
2256 
2257 bool AMDGPUOperand::isInlineValue() const {
2258   return isRegKind() && ::isInlineValue(getReg());
2259 }
2260 
2261 //===----------------------------------------------------------------------===//
2262 // AsmParser
2263 //===----------------------------------------------------------------------===//
2264 
2265 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2266   if (Is == IS_VGPR) {
2267     switch (RegWidth) {
2268       default: return -1;
2269       case 32:
2270         return AMDGPU::VGPR_32RegClassID;
2271       case 64:
2272         return AMDGPU::VReg_64RegClassID;
2273       case 96:
2274         return AMDGPU::VReg_96RegClassID;
2275       case 128:
2276         return AMDGPU::VReg_128RegClassID;
2277       case 160:
2278         return AMDGPU::VReg_160RegClassID;
2279       case 192:
2280         return AMDGPU::VReg_192RegClassID;
2281       case 224:
2282         return AMDGPU::VReg_224RegClassID;
2283       case 256:
2284         return AMDGPU::VReg_256RegClassID;
2285       case 512:
2286         return AMDGPU::VReg_512RegClassID;
2287       case 1024:
2288         return AMDGPU::VReg_1024RegClassID;
2289     }
2290   } else if (Is == IS_TTMP) {
2291     switch (RegWidth) {
2292       default: return -1;
2293       case 32:
2294         return AMDGPU::TTMP_32RegClassID;
2295       case 64:
2296         return AMDGPU::TTMP_64RegClassID;
2297       case 128:
2298         return AMDGPU::TTMP_128RegClassID;
2299       case 256:
2300         return AMDGPU::TTMP_256RegClassID;
2301       case 512:
2302         return AMDGPU::TTMP_512RegClassID;
2303     }
2304   } else if (Is == IS_SGPR) {
2305     switch (RegWidth) {
2306       default: return -1;
2307       case 32:
2308         return AMDGPU::SGPR_32RegClassID;
2309       case 64:
2310         return AMDGPU::SGPR_64RegClassID;
2311       case 96:
2312         return AMDGPU::SGPR_96RegClassID;
2313       case 128:
2314         return AMDGPU::SGPR_128RegClassID;
2315       case 160:
2316         return AMDGPU::SGPR_160RegClassID;
2317       case 192:
2318         return AMDGPU::SGPR_192RegClassID;
2319       case 224:
2320         return AMDGPU::SGPR_224RegClassID;
2321       case 256:
2322         return AMDGPU::SGPR_256RegClassID;
2323       case 512:
2324         return AMDGPU::SGPR_512RegClassID;
2325     }
2326   } else if (Is == IS_AGPR) {
2327     switch (RegWidth) {
2328       default: return -1;
2329       case 32:
2330         return AMDGPU::AGPR_32RegClassID;
2331       case 64:
2332         return AMDGPU::AReg_64RegClassID;
2333       case 96:
2334         return AMDGPU::AReg_96RegClassID;
2335       case 128:
2336         return AMDGPU::AReg_128RegClassID;
2337       case 160:
2338         return AMDGPU::AReg_160RegClassID;
2339       case 192:
2340         return AMDGPU::AReg_192RegClassID;
2341       case 224:
2342         return AMDGPU::AReg_224RegClassID;
2343       case 256:
2344         return AMDGPU::AReg_256RegClassID;
2345       case 512:
2346         return AMDGPU::AReg_512RegClassID;
2347       case 1024:
2348         return AMDGPU::AReg_1024RegClassID;
2349     }
2350   }
2351   return -1;
2352 }
2353 
2354 static unsigned getSpecialRegForName(StringRef RegName) {
2355   return StringSwitch<unsigned>(RegName)
2356     .Case("exec", AMDGPU::EXEC)
2357     .Case("vcc", AMDGPU::VCC)
2358     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2359     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2360     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2361     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2362     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2363     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2364     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2365     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2366     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2367     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2368     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2369     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2370     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2371     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2372     .Case("m0", AMDGPU::M0)
2373     .Case("vccz", AMDGPU::SRC_VCCZ)
2374     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2375     .Case("execz", AMDGPU::SRC_EXECZ)
2376     .Case("src_execz", AMDGPU::SRC_EXECZ)
2377     .Case("scc", AMDGPU::SRC_SCC)
2378     .Case("src_scc", AMDGPU::SRC_SCC)
2379     .Case("tba", AMDGPU::TBA)
2380     .Case("tma", AMDGPU::TMA)
2381     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2382     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2383     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2384     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2385     .Case("vcc_lo", AMDGPU::VCC_LO)
2386     .Case("vcc_hi", AMDGPU::VCC_HI)
2387     .Case("exec_lo", AMDGPU::EXEC_LO)
2388     .Case("exec_hi", AMDGPU::EXEC_HI)
2389     .Case("tma_lo", AMDGPU::TMA_LO)
2390     .Case("tma_hi", AMDGPU::TMA_HI)
2391     .Case("tba_lo", AMDGPU::TBA_LO)
2392     .Case("tba_hi", AMDGPU::TBA_HI)
2393     .Case("pc", AMDGPU::PC_REG)
2394     .Case("null", AMDGPU::SGPR_NULL)
2395     .Default(AMDGPU::NoRegister);
2396 }
2397 
2398 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2399                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2400   auto R = parseRegister();
2401   if (!R) return true;
2402   assert(R->isReg());
2403   RegNo = R->getReg();
2404   StartLoc = R->getStartLoc();
2405   EndLoc = R->getEndLoc();
2406   return false;
2407 }
2408 
2409 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2410                                     SMLoc &EndLoc) {
2411   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2412 }
2413 
2414 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2415                                                        SMLoc &StartLoc,
2416                                                        SMLoc &EndLoc) {
2417   bool Result =
2418       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2419   bool PendingErrors = getParser().hasPendingError();
2420   getParser().clearPendingErrors();
2421   if (PendingErrors)
2422     return MatchOperand_ParseFail;
2423   if (Result)
2424     return MatchOperand_NoMatch;
2425   return MatchOperand_Success;
2426 }
2427 
2428 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2429                                             RegisterKind RegKind, unsigned Reg1,
2430                                             SMLoc Loc) {
2431   switch (RegKind) {
2432   case IS_SPECIAL:
2433     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2434       Reg = AMDGPU::EXEC;
2435       RegWidth = 64;
2436       return true;
2437     }
2438     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2439       Reg = AMDGPU::FLAT_SCR;
2440       RegWidth = 64;
2441       return true;
2442     }
2443     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2444       Reg = AMDGPU::XNACK_MASK;
2445       RegWidth = 64;
2446       return true;
2447     }
2448     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2449       Reg = AMDGPU::VCC;
2450       RegWidth = 64;
2451       return true;
2452     }
2453     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2454       Reg = AMDGPU::TBA;
2455       RegWidth = 64;
2456       return true;
2457     }
2458     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2459       Reg = AMDGPU::TMA;
2460       RegWidth = 64;
2461       return true;
2462     }
2463     Error(Loc, "register does not fit in the list");
2464     return false;
2465   case IS_VGPR:
2466   case IS_SGPR:
2467   case IS_AGPR:
2468   case IS_TTMP:
2469     if (Reg1 != Reg + RegWidth / 32) {
2470       Error(Loc, "registers in a list must have consecutive indices");
2471       return false;
2472     }
2473     RegWidth += 32;
2474     return true;
2475   default:
2476     llvm_unreachable("unexpected register kind");
2477   }
2478 }
2479 
2480 struct RegInfo {
2481   StringLiteral Name;
2482   RegisterKind Kind;
2483 };
2484 
2485 static constexpr RegInfo RegularRegisters[] = {
2486   {{"v"},    IS_VGPR},
2487   {{"s"},    IS_SGPR},
2488   {{"ttmp"}, IS_TTMP},
2489   {{"acc"},  IS_AGPR},
2490   {{"a"},    IS_AGPR},
2491 };
2492 
2493 static bool isRegularReg(RegisterKind Kind) {
2494   return Kind == IS_VGPR ||
2495          Kind == IS_SGPR ||
2496          Kind == IS_TTMP ||
2497          Kind == IS_AGPR;
2498 }
2499 
2500 static const RegInfo* getRegularRegInfo(StringRef Str) {
2501   for (const RegInfo &Reg : RegularRegisters)
2502     if (Str.startswith(Reg.Name))
2503       return &Reg;
2504   return nullptr;
2505 }
2506 
2507 static bool getRegNum(StringRef Str, unsigned& Num) {
2508   return !Str.getAsInteger(10, Num);
2509 }
2510 
2511 bool
2512 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2513                             const AsmToken &NextToken) const {
2514 
2515   // A list of consecutive registers: [s0,s1,s2,s3]
2516   if (Token.is(AsmToken::LBrac))
2517     return true;
2518 
2519   if (!Token.is(AsmToken::Identifier))
2520     return false;
2521 
2522   // A single register like s0 or a range of registers like s[0:1]
2523 
2524   StringRef Str = Token.getString();
2525   const RegInfo *Reg = getRegularRegInfo(Str);
2526   if (Reg) {
2527     StringRef RegName = Reg->Name;
2528     StringRef RegSuffix = Str.substr(RegName.size());
2529     if (!RegSuffix.empty()) {
2530       unsigned Num;
2531       // A single register with an index: rXX
2532       if (getRegNum(RegSuffix, Num))
2533         return true;
2534     } else {
2535       // A range of registers: r[XX:YY].
2536       if (NextToken.is(AsmToken::LBrac))
2537         return true;
2538     }
2539   }
2540 
2541   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2542 }
2543 
2544 bool
2545 AMDGPUAsmParser::isRegister()
2546 {
2547   return isRegister(getToken(), peekToken());
2548 }
2549 
2550 unsigned
2551 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2552                                unsigned RegNum,
2553                                unsigned RegWidth,
2554                                SMLoc Loc) {
2555 
2556   assert(isRegularReg(RegKind));
2557 
2558   unsigned AlignSize = 1;
2559   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2560     // SGPR and TTMP registers must be aligned.
2561     // Max required alignment is 4 dwords.
2562     AlignSize = std::min(RegWidth / 32, 4u);
2563   }
2564 
2565   if (RegNum % AlignSize != 0) {
2566     Error(Loc, "invalid register alignment");
2567     return AMDGPU::NoRegister;
2568   }
2569 
2570   unsigned RegIdx = RegNum / AlignSize;
2571   int RCID = getRegClass(RegKind, RegWidth);
2572   if (RCID == -1) {
2573     Error(Loc, "invalid or unsupported register size");
2574     return AMDGPU::NoRegister;
2575   }
2576 
2577   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2578   const MCRegisterClass RC = TRI->getRegClass(RCID);
2579   if (RegIdx >= RC.getNumRegs()) {
2580     Error(Loc, "register index is out of range");
2581     return AMDGPU::NoRegister;
2582   }
2583 
2584   return RC.getRegister(RegIdx);
2585 }
2586 
2587 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2588   int64_t RegLo, RegHi;
2589   if (!skipToken(AsmToken::LBrac, "missing register index"))
2590     return false;
2591 
2592   SMLoc FirstIdxLoc = getLoc();
2593   SMLoc SecondIdxLoc;
2594 
2595   if (!parseExpr(RegLo))
2596     return false;
2597 
2598   if (trySkipToken(AsmToken::Colon)) {
2599     SecondIdxLoc = getLoc();
2600     if (!parseExpr(RegHi))
2601       return false;
2602   } else {
2603     RegHi = RegLo;
2604   }
2605 
2606   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2607     return false;
2608 
2609   if (!isUInt<32>(RegLo)) {
2610     Error(FirstIdxLoc, "invalid register index");
2611     return false;
2612   }
2613 
2614   if (!isUInt<32>(RegHi)) {
2615     Error(SecondIdxLoc, "invalid register index");
2616     return false;
2617   }
2618 
2619   if (RegLo > RegHi) {
2620     Error(FirstIdxLoc, "first register index should not exceed second index");
2621     return false;
2622   }
2623 
2624   Num = static_cast<unsigned>(RegLo);
2625   RegWidth = 32 * ((RegHi - RegLo) + 1);
2626   return true;
2627 }
2628 
2629 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2630                                           unsigned &RegNum, unsigned &RegWidth,
2631                                           SmallVectorImpl<AsmToken> &Tokens) {
2632   assert(isToken(AsmToken::Identifier));
2633   unsigned Reg = getSpecialRegForName(getTokenStr());
2634   if (Reg) {
2635     RegNum = 0;
2636     RegWidth = 32;
2637     RegKind = IS_SPECIAL;
2638     Tokens.push_back(getToken());
2639     lex(); // skip register name
2640   }
2641   return Reg;
2642 }
2643 
2644 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2645                                           unsigned &RegNum, unsigned &RegWidth,
2646                                           SmallVectorImpl<AsmToken> &Tokens) {
2647   assert(isToken(AsmToken::Identifier));
2648   StringRef RegName = getTokenStr();
2649   auto Loc = getLoc();
2650 
2651   const RegInfo *RI = getRegularRegInfo(RegName);
2652   if (!RI) {
2653     Error(Loc, "invalid register name");
2654     return AMDGPU::NoRegister;
2655   }
2656 
2657   Tokens.push_back(getToken());
2658   lex(); // skip register name
2659 
2660   RegKind = RI->Kind;
2661   StringRef RegSuffix = RegName.substr(RI->Name.size());
2662   if (!RegSuffix.empty()) {
2663     // Single 32-bit register: vXX.
2664     if (!getRegNum(RegSuffix, RegNum)) {
2665       Error(Loc, "invalid register index");
2666       return AMDGPU::NoRegister;
2667     }
2668     RegWidth = 32;
2669   } else {
2670     // Range of registers: v[XX:YY]. ":YY" is optional.
2671     if (!ParseRegRange(RegNum, RegWidth))
2672       return AMDGPU::NoRegister;
2673   }
2674 
2675   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2676 }
2677 
2678 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2679                                        unsigned &RegWidth,
2680                                        SmallVectorImpl<AsmToken> &Tokens) {
2681   unsigned Reg = AMDGPU::NoRegister;
2682   auto ListLoc = getLoc();
2683 
2684   if (!skipToken(AsmToken::LBrac,
2685                  "expected a register or a list of registers")) {
2686     return AMDGPU::NoRegister;
2687   }
2688 
2689   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2690 
2691   auto Loc = getLoc();
2692   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2693     return AMDGPU::NoRegister;
2694   if (RegWidth != 32) {
2695     Error(Loc, "expected a single 32-bit register");
2696     return AMDGPU::NoRegister;
2697   }
2698 
2699   for (; trySkipToken(AsmToken::Comma); ) {
2700     RegisterKind NextRegKind;
2701     unsigned NextReg, NextRegNum, NextRegWidth;
2702     Loc = getLoc();
2703 
2704     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2705                              NextRegNum, NextRegWidth,
2706                              Tokens)) {
2707       return AMDGPU::NoRegister;
2708     }
2709     if (NextRegWidth != 32) {
2710       Error(Loc, "expected a single 32-bit register");
2711       return AMDGPU::NoRegister;
2712     }
2713     if (NextRegKind != RegKind) {
2714       Error(Loc, "registers in a list must be of the same kind");
2715       return AMDGPU::NoRegister;
2716     }
2717     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2718       return AMDGPU::NoRegister;
2719   }
2720 
2721   if (!skipToken(AsmToken::RBrac,
2722                  "expected a comma or a closing square bracket")) {
2723     return AMDGPU::NoRegister;
2724   }
2725 
2726   if (isRegularReg(RegKind))
2727     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2728 
2729   return Reg;
2730 }
2731 
2732 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2733                                           unsigned &RegNum, unsigned &RegWidth,
2734                                           SmallVectorImpl<AsmToken> &Tokens) {
2735   auto Loc = getLoc();
2736   Reg = AMDGPU::NoRegister;
2737 
2738   if (isToken(AsmToken::Identifier)) {
2739     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2740     if (Reg == AMDGPU::NoRegister)
2741       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2742   } else {
2743     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2744   }
2745 
2746   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2747   if (Reg == AMDGPU::NoRegister) {
2748     assert(Parser.hasPendingError());
2749     return false;
2750   }
2751 
2752   if (!subtargetHasRegister(*TRI, Reg)) {
2753     if (Reg == AMDGPU::SGPR_NULL) {
2754       Error(Loc, "'null' operand is not supported on this GPU");
2755     } else {
2756       Error(Loc, "register not available on this GPU");
2757     }
2758     return false;
2759   }
2760 
2761   return true;
2762 }
2763 
2764 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2765                                           unsigned &RegNum, unsigned &RegWidth,
2766                                           bool RestoreOnFailure /*=false*/) {
2767   Reg = AMDGPU::NoRegister;
2768 
2769   SmallVector<AsmToken, 1> Tokens;
2770   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2771     if (RestoreOnFailure) {
2772       while (!Tokens.empty()) {
2773         getLexer().UnLex(Tokens.pop_back_val());
2774       }
2775     }
2776     return true;
2777   }
2778   return false;
2779 }
2780 
2781 Optional<StringRef>
2782 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2783   switch (RegKind) {
2784   case IS_VGPR:
2785     return StringRef(".amdgcn.next_free_vgpr");
2786   case IS_SGPR:
2787     return StringRef(".amdgcn.next_free_sgpr");
2788   default:
2789     return None;
2790   }
2791 }
2792 
2793 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2794   auto SymbolName = getGprCountSymbolName(RegKind);
2795   assert(SymbolName && "initializing invalid register kind");
2796   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2797   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2798 }
2799 
2800 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2801                                             unsigned DwordRegIndex,
2802                                             unsigned RegWidth) {
2803   // Symbols are only defined for GCN targets
2804   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2805     return true;
2806 
2807   auto SymbolName = getGprCountSymbolName(RegKind);
2808   if (!SymbolName)
2809     return true;
2810   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2811 
2812   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2813   int64_t OldCount;
2814 
2815   if (!Sym->isVariable())
2816     return !Error(getLoc(),
2817                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2818   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2819     return !Error(
2820         getLoc(),
2821         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2822 
2823   if (OldCount <= NewMax)
2824     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2825 
2826   return true;
2827 }
2828 
2829 std::unique_ptr<AMDGPUOperand>
2830 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2831   const auto &Tok = getToken();
2832   SMLoc StartLoc = Tok.getLoc();
2833   SMLoc EndLoc = Tok.getEndLoc();
2834   RegisterKind RegKind;
2835   unsigned Reg, RegNum, RegWidth;
2836 
2837   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2838     return nullptr;
2839   }
2840   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2841     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2842       return nullptr;
2843   } else
2844     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2845   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2846 }
2847 
2848 OperandMatchResultTy
2849 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2850   // TODO: add syntactic sugar for 1/(2*PI)
2851 
2852   assert(!isRegister());
2853   assert(!isModifier());
2854 
2855   const auto& Tok = getToken();
2856   const auto& NextTok = peekToken();
2857   bool IsReal = Tok.is(AsmToken::Real);
2858   SMLoc S = getLoc();
2859   bool Negate = false;
2860 
2861   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2862     lex();
2863     IsReal = true;
2864     Negate = true;
2865   }
2866 
2867   if (IsReal) {
2868     // Floating-point expressions are not supported.
2869     // Can only allow floating-point literals with an
2870     // optional sign.
2871 
2872     StringRef Num = getTokenStr();
2873     lex();
2874 
2875     APFloat RealVal(APFloat::IEEEdouble());
2876     auto roundMode = APFloat::rmNearestTiesToEven;
2877     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2878       return MatchOperand_ParseFail;
2879     }
2880     if (Negate)
2881       RealVal.changeSign();
2882 
2883     Operands.push_back(
2884       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2885                                AMDGPUOperand::ImmTyNone, true));
2886 
2887     return MatchOperand_Success;
2888 
2889   } else {
2890     int64_t IntVal;
2891     const MCExpr *Expr;
2892     SMLoc S = getLoc();
2893 
2894     if (HasSP3AbsModifier) {
2895       // This is a workaround for handling expressions
2896       // as arguments of SP3 'abs' modifier, for example:
2897       //     |1.0|
2898       //     |-1|
2899       //     |1+x|
2900       // This syntax is not compatible with syntax of standard
2901       // MC expressions (due to the trailing '|').
2902       SMLoc EndLoc;
2903       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2904         return MatchOperand_ParseFail;
2905     } else {
2906       if (Parser.parseExpression(Expr))
2907         return MatchOperand_ParseFail;
2908     }
2909 
2910     if (Expr->evaluateAsAbsolute(IntVal)) {
2911       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2912     } else {
2913       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2914     }
2915 
2916     return MatchOperand_Success;
2917   }
2918 
2919   return MatchOperand_NoMatch;
2920 }
2921 
2922 OperandMatchResultTy
2923 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2924   if (!isRegister())
2925     return MatchOperand_NoMatch;
2926 
2927   if (auto R = parseRegister()) {
2928     assert(R->isReg());
2929     Operands.push_back(std::move(R));
2930     return MatchOperand_Success;
2931   }
2932   return MatchOperand_ParseFail;
2933 }
2934 
2935 OperandMatchResultTy
2936 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2937   auto res = parseReg(Operands);
2938   if (res != MatchOperand_NoMatch) {
2939     return res;
2940   } else if (isModifier()) {
2941     return MatchOperand_NoMatch;
2942   } else {
2943     return parseImm(Operands, HasSP3AbsMod);
2944   }
2945 }
2946 
2947 bool
2948 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2949   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2950     const auto &str = Token.getString();
2951     return str == "abs" || str == "neg" || str == "sext";
2952   }
2953   return false;
2954 }
2955 
2956 bool
2957 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2958   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2959 }
2960 
2961 bool
2962 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2963   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2964 }
2965 
2966 bool
2967 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2968   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2969 }
2970 
2971 // Check if this is an operand modifier or an opcode modifier
2972 // which may look like an expression but it is not. We should
2973 // avoid parsing these modifiers as expressions. Currently
2974 // recognized sequences are:
2975 //   |...|
2976 //   abs(...)
2977 //   neg(...)
2978 //   sext(...)
2979 //   -reg
2980 //   -|...|
2981 //   -abs(...)
2982 //   name:...
2983 // Note that simple opcode modifiers like 'gds' may be parsed as
2984 // expressions; this is a special case. See getExpressionAsToken.
2985 //
2986 bool
2987 AMDGPUAsmParser::isModifier() {
2988 
2989   AsmToken Tok = getToken();
2990   AsmToken NextToken[2];
2991   peekTokens(NextToken);
2992 
2993   return isOperandModifier(Tok, NextToken[0]) ||
2994          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2995          isOpcodeModifierWithVal(Tok, NextToken[0]);
2996 }
2997 
2998 // Check if the current token is an SP3 'neg' modifier.
2999 // Currently this modifier is allowed in the following context:
3000 //
3001 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3002 // 2. Before an 'abs' modifier: -abs(...)
3003 // 3. Before an SP3 'abs' modifier: -|...|
3004 //
3005 // In all other cases "-" is handled as a part
3006 // of an expression that follows the sign.
3007 //
3008 // Note: When "-" is followed by an integer literal,
3009 // this is interpreted as integer negation rather
3010 // than a floating-point NEG modifier applied to N.
3011 // Beside being contr-intuitive, such use of floating-point
3012 // NEG modifier would have resulted in different meaning
3013 // of integer literals used with VOP1/2/C and VOP3,
3014 // for example:
3015 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3016 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3017 // Negative fp literals with preceding "-" are
3018 // handled likewise for uniformity
3019 //
3020 bool
3021 AMDGPUAsmParser::parseSP3NegModifier() {
3022 
3023   AsmToken NextToken[2];
3024   peekTokens(NextToken);
3025 
3026   if (isToken(AsmToken::Minus) &&
3027       (isRegister(NextToken[0], NextToken[1]) ||
3028        NextToken[0].is(AsmToken::Pipe) ||
3029        isId(NextToken[0], "abs"))) {
3030     lex();
3031     return true;
3032   }
3033 
3034   return false;
3035 }
3036 
3037 OperandMatchResultTy
3038 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3039                                               bool AllowImm) {
3040   bool Neg, SP3Neg;
3041   bool Abs, SP3Abs;
3042   SMLoc Loc;
3043 
3044   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3045   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3046     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3047     return MatchOperand_ParseFail;
3048   }
3049 
3050   SP3Neg = parseSP3NegModifier();
3051 
3052   Loc = getLoc();
3053   Neg = trySkipId("neg");
3054   if (Neg && SP3Neg) {
3055     Error(Loc, "expected register or immediate");
3056     return MatchOperand_ParseFail;
3057   }
3058   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3059     return MatchOperand_ParseFail;
3060 
3061   Abs = trySkipId("abs");
3062   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3063     return MatchOperand_ParseFail;
3064 
3065   Loc = getLoc();
3066   SP3Abs = trySkipToken(AsmToken::Pipe);
3067   if (Abs && SP3Abs) {
3068     Error(Loc, "expected register or immediate");
3069     return MatchOperand_ParseFail;
3070   }
3071 
3072   OperandMatchResultTy Res;
3073   if (AllowImm) {
3074     Res = parseRegOrImm(Operands, SP3Abs);
3075   } else {
3076     Res = parseReg(Operands);
3077   }
3078   if (Res != MatchOperand_Success) {
3079     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3080   }
3081 
3082   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3083     return MatchOperand_ParseFail;
3084   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3085     return MatchOperand_ParseFail;
3086   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3087     return MatchOperand_ParseFail;
3088 
3089   AMDGPUOperand::Modifiers Mods;
3090   Mods.Abs = Abs || SP3Abs;
3091   Mods.Neg = Neg || SP3Neg;
3092 
3093   if (Mods.hasFPModifiers()) {
3094     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3095     if (Op.isExpr()) {
3096       Error(Op.getStartLoc(), "expected an absolute expression");
3097       return MatchOperand_ParseFail;
3098     }
3099     Op.setModifiers(Mods);
3100   }
3101   return MatchOperand_Success;
3102 }
3103 
3104 OperandMatchResultTy
3105 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3106                                                bool AllowImm) {
3107   bool Sext = trySkipId("sext");
3108   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3109     return MatchOperand_ParseFail;
3110 
3111   OperandMatchResultTy Res;
3112   if (AllowImm) {
3113     Res = parseRegOrImm(Operands);
3114   } else {
3115     Res = parseReg(Operands);
3116   }
3117   if (Res != MatchOperand_Success) {
3118     return Sext? MatchOperand_ParseFail : Res;
3119   }
3120 
3121   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3122     return MatchOperand_ParseFail;
3123 
3124   AMDGPUOperand::Modifiers Mods;
3125   Mods.Sext = Sext;
3126 
3127   if (Mods.hasIntModifiers()) {
3128     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3129     if (Op.isExpr()) {
3130       Error(Op.getStartLoc(), "expected an absolute expression");
3131       return MatchOperand_ParseFail;
3132     }
3133     Op.setModifiers(Mods);
3134   }
3135 
3136   return MatchOperand_Success;
3137 }
3138 
3139 OperandMatchResultTy
3140 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3141   return parseRegOrImmWithFPInputMods(Operands, false);
3142 }
3143 
3144 OperandMatchResultTy
3145 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3146   return parseRegOrImmWithIntInputMods(Operands, false);
3147 }
3148 
3149 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3150   auto Loc = getLoc();
3151   if (trySkipId("off")) {
3152     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3153                                                 AMDGPUOperand::ImmTyOff, false));
3154     return MatchOperand_Success;
3155   }
3156 
3157   if (!isRegister())
3158     return MatchOperand_NoMatch;
3159 
3160   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3161   if (Reg) {
3162     Operands.push_back(std::move(Reg));
3163     return MatchOperand_Success;
3164   }
3165 
3166   return MatchOperand_ParseFail;
3167 
3168 }
3169 
3170 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3171   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3172 
3173   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3174       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3175       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3176       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3177     return Match_InvalidOperand;
3178 
3179   if ((TSFlags & SIInstrFlags::VOP3) &&
3180       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3181       getForcedEncodingSize() != 64)
3182     return Match_PreferE32;
3183 
3184   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3185       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3186     // v_mac_f32/16 allow only dst_sel == DWORD;
3187     auto OpNum =
3188         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3189     const auto &Op = Inst.getOperand(OpNum);
3190     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3191       return Match_InvalidOperand;
3192     }
3193   }
3194 
3195   return Match_Success;
3196 }
3197 
3198 static ArrayRef<unsigned> getAllVariants() {
3199   static const unsigned Variants[] = {
3200     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3201     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3202   };
3203 
3204   return makeArrayRef(Variants);
3205 }
3206 
3207 // What asm variants we should check
3208 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3209   if (getForcedEncodingSize() == 32) {
3210     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3211     return makeArrayRef(Variants);
3212   }
3213 
3214   if (isForcedVOP3()) {
3215     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3216     return makeArrayRef(Variants);
3217   }
3218 
3219   if (isForcedSDWA()) {
3220     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3221                                         AMDGPUAsmVariants::SDWA9};
3222     return makeArrayRef(Variants);
3223   }
3224 
3225   if (isForcedDPP()) {
3226     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3227     return makeArrayRef(Variants);
3228   }
3229 
3230   return getAllVariants();
3231 }
3232 
3233 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3234   if (getForcedEncodingSize() == 32)
3235     return "e32";
3236 
3237   if (isForcedVOP3())
3238     return "e64";
3239 
3240   if (isForcedSDWA())
3241     return "sdwa";
3242 
3243   if (isForcedDPP())
3244     return "dpp";
3245 
3246   return "";
3247 }
3248 
3249 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3250   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3251   const unsigned Num = Desc.getNumImplicitUses();
3252   for (unsigned i = 0; i < Num; ++i) {
3253     unsigned Reg = Desc.ImplicitUses[i];
3254     switch (Reg) {
3255     case AMDGPU::FLAT_SCR:
3256     case AMDGPU::VCC:
3257     case AMDGPU::VCC_LO:
3258     case AMDGPU::VCC_HI:
3259     case AMDGPU::M0:
3260       return Reg;
3261     default:
3262       break;
3263     }
3264   }
3265   return AMDGPU::NoRegister;
3266 }
3267 
3268 // NB: This code is correct only when used to check constant
3269 // bus limitations because GFX7 support no f16 inline constants.
3270 // Note that there are no cases when a GFX7 opcode violates
3271 // constant bus limitations due to the use of an f16 constant.
3272 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3273                                        unsigned OpIdx) const {
3274   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3275 
3276   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3277     return false;
3278   }
3279 
3280   const MCOperand &MO = Inst.getOperand(OpIdx);
3281 
3282   int64_t Val = MO.getImm();
3283   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3284 
3285   switch (OpSize) { // expected operand size
3286   case 8:
3287     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3288   case 4:
3289     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3290   case 2: {
3291     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3292     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3293         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3294         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3295       return AMDGPU::isInlinableIntLiteral(Val);
3296 
3297     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3298         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3299         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3300       return AMDGPU::isInlinableIntLiteralV216(Val);
3301 
3302     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3303         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3304         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3305       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3306 
3307     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3308   }
3309   default:
3310     llvm_unreachable("invalid operand size");
3311   }
3312 }
3313 
3314 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3315   if (!isGFX10Plus())
3316     return 1;
3317 
3318   switch (Opcode) {
3319   // 64-bit shift instructions can use only one scalar value input
3320   case AMDGPU::V_LSHLREV_B64_e64:
3321   case AMDGPU::V_LSHLREV_B64_gfx10:
3322   case AMDGPU::V_LSHRREV_B64_e64:
3323   case AMDGPU::V_LSHRREV_B64_gfx10:
3324   case AMDGPU::V_ASHRREV_I64_e64:
3325   case AMDGPU::V_ASHRREV_I64_gfx10:
3326   case AMDGPU::V_LSHL_B64_e64:
3327   case AMDGPU::V_LSHR_B64_e64:
3328   case AMDGPU::V_ASHR_I64_e64:
3329     return 1;
3330   default:
3331     return 2;
3332   }
3333 }
3334 
3335 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3336   const MCOperand &MO = Inst.getOperand(OpIdx);
3337   if (MO.isImm()) {
3338     return !isInlineConstant(Inst, OpIdx);
3339   } else if (MO.isReg()) {
3340     auto Reg = MO.getReg();
3341     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3342     auto PReg = mc2PseudoReg(Reg);
3343     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3344   } else {
3345     return true;
3346   }
3347 }
3348 
3349 bool
3350 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3351                                                 const OperandVector &Operands) {
3352   const unsigned Opcode = Inst.getOpcode();
3353   const MCInstrDesc &Desc = MII.get(Opcode);
3354   unsigned LastSGPR = AMDGPU::NoRegister;
3355   unsigned ConstantBusUseCount = 0;
3356   unsigned NumLiterals = 0;
3357   unsigned LiteralSize;
3358 
3359   if (Desc.TSFlags &
3360       (SIInstrFlags::VOPC |
3361        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3362        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3363        SIInstrFlags::SDWA)) {
3364     // Check special imm operands (used by madmk, etc)
3365     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3366       ++NumLiterals;
3367       LiteralSize = 4;
3368     }
3369 
3370     SmallDenseSet<unsigned> SGPRsUsed;
3371     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3372     if (SGPRUsed != AMDGPU::NoRegister) {
3373       SGPRsUsed.insert(SGPRUsed);
3374       ++ConstantBusUseCount;
3375     }
3376 
3377     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3378     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3379     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3380 
3381     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3382 
3383     for (int OpIdx : OpIndices) {
3384       if (OpIdx == -1) break;
3385 
3386       const MCOperand &MO = Inst.getOperand(OpIdx);
3387       if (usesConstantBus(Inst, OpIdx)) {
3388         if (MO.isReg()) {
3389           LastSGPR = mc2PseudoReg(MO.getReg());
3390           // Pairs of registers with a partial intersections like these
3391           //   s0, s[0:1]
3392           //   flat_scratch_lo, flat_scratch
3393           //   flat_scratch_lo, flat_scratch_hi
3394           // are theoretically valid but they are disabled anyway.
3395           // Note that this code mimics SIInstrInfo::verifyInstruction
3396           if (!SGPRsUsed.count(LastSGPR)) {
3397             SGPRsUsed.insert(LastSGPR);
3398             ++ConstantBusUseCount;
3399           }
3400         } else { // Expression or a literal
3401 
3402           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3403             continue; // special operand like VINTERP attr_chan
3404 
3405           // An instruction may use only one literal.
3406           // This has been validated on the previous step.
3407           // See validateVOPLiteral.
3408           // This literal may be used as more than one operand.
3409           // If all these operands are of the same size,
3410           // this literal counts as one scalar value.
3411           // Otherwise it counts as 2 scalar values.
3412           // See "GFX10 Shader Programming", section 3.6.2.3.
3413 
3414           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3415           if (Size < 4) Size = 4;
3416 
3417           if (NumLiterals == 0) {
3418             NumLiterals = 1;
3419             LiteralSize = Size;
3420           } else if (LiteralSize != Size) {
3421             NumLiterals = 2;
3422           }
3423         }
3424       }
3425     }
3426   }
3427   ConstantBusUseCount += NumLiterals;
3428 
3429   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3430     return true;
3431 
3432   SMLoc LitLoc = getLitLoc(Operands);
3433   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3434   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3435   Error(Loc, "invalid operand (violates constant bus restrictions)");
3436   return false;
3437 }
3438 
3439 bool
3440 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3441                                                  const OperandVector &Operands) {
3442   const unsigned Opcode = Inst.getOpcode();
3443   const MCInstrDesc &Desc = MII.get(Opcode);
3444 
3445   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3446   if (DstIdx == -1 ||
3447       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3448     return true;
3449   }
3450 
3451   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3452 
3453   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3454   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3455   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3456 
3457   assert(DstIdx != -1);
3458   const MCOperand &Dst = Inst.getOperand(DstIdx);
3459   assert(Dst.isReg());
3460 
3461   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3462 
3463   for (int SrcIdx : SrcIndices) {
3464     if (SrcIdx == -1) break;
3465     const MCOperand &Src = Inst.getOperand(SrcIdx);
3466     if (Src.isReg()) {
3467       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3468         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3469         Error(getRegLoc(SrcReg, Operands),
3470           "destination must be different than all sources");
3471         return false;
3472       }
3473     }
3474   }
3475 
3476   return true;
3477 }
3478 
3479 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3480 
3481   const unsigned Opc = Inst.getOpcode();
3482   const MCInstrDesc &Desc = MII.get(Opc);
3483 
3484   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3485     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3486     assert(ClampIdx != -1);
3487     return Inst.getOperand(ClampIdx).getImm() == 0;
3488   }
3489 
3490   return true;
3491 }
3492 
3493 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3494 
3495   const unsigned Opc = Inst.getOpcode();
3496   const MCInstrDesc &Desc = MII.get(Opc);
3497 
3498   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3499     return None;
3500 
3501   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3502   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3503   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3504 
3505   assert(VDataIdx != -1);
3506 
3507   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3508     return None;
3509 
3510   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3511   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3512   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3513   if (DMask == 0)
3514     DMask = 1;
3515 
3516   bool isPackedD16 = false;
3517   unsigned DataSize =
3518     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3519   if (hasPackedD16()) {
3520     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3521     isPackedD16 = D16Idx >= 0;
3522     if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3523       DataSize = (DataSize + 1) / 2;
3524   }
3525 
3526   if ((VDataSize / 4) == DataSize + TFESize)
3527     return None;
3528 
3529   return StringRef(isPackedD16
3530                        ? "image data size does not match dmask, d16 and tfe"
3531                        : "image data size does not match dmask and tfe");
3532 }
3533 
3534 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3535   const unsigned Opc = Inst.getOpcode();
3536   const MCInstrDesc &Desc = MII.get(Opc);
3537 
3538   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3539     return true;
3540 
3541   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3542 
3543   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3544       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3545   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3546   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3547   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3548   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3549 
3550   assert(VAddr0Idx != -1);
3551   assert(SrsrcIdx != -1);
3552   assert(SrsrcIdx > VAddr0Idx);
3553 
3554   if (DimIdx == -1)
3555     return true; // intersect_ray
3556 
3557   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3558   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3559   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3560   unsigned ActualAddrSize =
3561       IsNSA ? SrsrcIdx - VAddr0Idx
3562             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3563   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3564 
3565   unsigned ExpectedAddrSize =
3566       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3567 
3568   if (!IsNSA) {
3569     if (ExpectedAddrSize > 8)
3570       ExpectedAddrSize = 16;
3571 
3572     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3573     // This provides backward compatibility for assembly created
3574     // before 160b/192b/224b types were directly supported.
3575     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3576       return true;
3577   }
3578 
3579   return ActualAddrSize == ExpectedAddrSize;
3580 }
3581 
3582 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3583 
3584   const unsigned Opc = Inst.getOpcode();
3585   const MCInstrDesc &Desc = MII.get(Opc);
3586 
3587   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3588     return true;
3589   if (!Desc.mayLoad() || !Desc.mayStore())
3590     return true; // Not atomic
3591 
3592   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3593   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3594 
3595   // This is an incomplete check because image_atomic_cmpswap
3596   // may only use 0x3 and 0xf while other atomic operations
3597   // may use 0x1 and 0x3. However these limitations are
3598   // verified when we check that dmask matches dst size.
3599   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3600 }
3601 
3602 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3603 
3604   const unsigned Opc = Inst.getOpcode();
3605   const MCInstrDesc &Desc = MII.get(Opc);
3606 
3607   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3608     return true;
3609 
3610   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3611   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3612 
3613   // GATHER4 instructions use dmask in a different fashion compared to
3614   // other MIMG instructions. The only useful DMASK values are
3615   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3616   // (red,red,red,red) etc.) The ISA document doesn't mention
3617   // this.
3618   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3619 }
3620 
3621 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3622   const unsigned Opc = Inst.getOpcode();
3623   const MCInstrDesc &Desc = MII.get(Opc);
3624 
3625   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3626     return true;
3627 
3628   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3629   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3630       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3631 
3632   if (!BaseOpcode->MSAA)
3633     return true;
3634 
3635   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3636   assert(DimIdx != -1);
3637 
3638   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3639   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3640 
3641   return DimInfo->MSAA;
3642 }
3643 
3644 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3645 {
3646   switch (Opcode) {
3647   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3648   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3649   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3650     return true;
3651   default:
3652     return false;
3653   }
3654 }
3655 
3656 // movrels* opcodes should only allow VGPRS as src0.
3657 // This is specified in .td description for vop1/vop3,
3658 // but sdwa is handled differently. See isSDWAOperand.
3659 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3660                                       const OperandVector &Operands) {
3661 
3662   const unsigned Opc = Inst.getOpcode();
3663   const MCInstrDesc &Desc = MII.get(Opc);
3664 
3665   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3666     return true;
3667 
3668   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3669   assert(Src0Idx != -1);
3670 
3671   SMLoc ErrLoc;
3672   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3673   if (Src0.isReg()) {
3674     auto Reg = mc2PseudoReg(Src0.getReg());
3675     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3676     if (!isSGPR(Reg, TRI))
3677       return true;
3678     ErrLoc = getRegLoc(Reg, Operands);
3679   } else {
3680     ErrLoc = getConstLoc(Operands);
3681   }
3682 
3683   Error(ErrLoc, "source operand must be a VGPR");
3684   return false;
3685 }
3686 
3687 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3688                                           const OperandVector &Operands) {
3689 
3690   const unsigned Opc = Inst.getOpcode();
3691 
3692   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3693     return true;
3694 
3695   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3696   assert(Src0Idx != -1);
3697 
3698   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3699   if (!Src0.isReg())
3700     return true;
3701 
3702   auto Reg = mc2PseudoReg(Src0.getReg());
3703   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3704   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3705     Error(getRegLoc(Reg, Operands),
3706           "source operand must be either a VGPR or an inline constant");
3707     return false;
3708   }
3709 
3710   return true;
3711 }
3712 
3713 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3714                                    const OperandVector &Operands) {
3715   const unsigned Opc = Inst.getOpcode();
3716   const MCInstrDesc &Desc = MII.get(Opc);
3717 
3718   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3719     return true;
3720 
3721   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3722   if (Src2Idx == -1)
3723     return true;
3724 
3725   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3726   if (!Src2.isReg())
3727     return true;
3728 
3729   MCRegister Src2Reg = Src2.getReg();
3730   MCRegister DstReg = Inst.getOperand(0).getReg();
3731   if (Src2Reg == DstReg)
3732     return true;
3733 
3734   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3735   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3736     return true;
3737 
3738   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3739     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3740           "source 2 operand must not partially overlap with dst");
3741     return false;
3742   }
3743 
3744   return true;
3745 }
3746 
3747 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3748   switch (Inst.getOpcode()) {
3749   default:
3750     return true;
3751   case V_DIV_SCALE_F32_gfx6_gfx7:
3752   case V_DIV_SCALE_F32_vi:
3753   case V_DIV_SCALE_F32_gfx10:
3754   case V_DIV_SCALE_F64_gfx6_gfx7:
3755   case V_DIV_SCALE_F64_vi:
3756   case V_DIV_SCALE_F64_gfx10:
3757     break;
3758   }
3759 
3760   // TODO: Check that src0 = src1 or src2.
3761 
3762   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3763                     AMDGPU::OpName::src2_modifiers,
3764                     AMDGPU::OpName::src2_modifiers}) {
3765     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3766             .getImm() &
3767         SISrcMods::ABS) {
3768       return false;
3769     }
3770   }
3771 
3772   return true;
3773 }
3774 
3775 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3776 
3777   const unsigned Opc = Inst.getOpcode();
3778   const MCInstrDesc &Desc = MII.get(Opc);
3779 
3780   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3781     return true;
3782 
3783   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3784   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3785     if (isCI() || isSI())
3786       return false;
3787   }
3788 
3789   return true;
3790 }
3791 
3792 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3793   const unsigned Opc = Inst.getOpcode();
3794   const MCInstrDesc &Desc = MII.get(Opc);
3795 
3796   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3797     return true;
3798 
3799   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3800   if (DimIdx < 0)
3801     return true;
3802 
3803   long Imm = Inst.getOperand(DimIdx).getImm();
3804   if (Imm < 0 || Imm >= 8)
3805     return false;
3806 
3807   return true;
3808 }
3809 
3810 static bool IsRevOpcode(const unsigned Opcode)
3811 {
3812   switch (Opcode) {
3813   case AMDGPU::V_SUBREV_F32_e32:
3814   case AMDGPU::V_SUBREV_F32_e64:
3815   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3816   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3817   case AMDGPU::V_SUBREV_F32_e32_vi:
3818   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3819   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3820   case AMDGPU::V_SUBREV_F32_e64_vi:
3821 
3822   case AMDGPU::V_SUBREV_CO_U32_e32:
3823   case AMDGPU::V_SUBREV_CO_U32_e64:
3824   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3825   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3826 
3827   case AMDGPU::V_SUBBREV_U32_e32:
3828   case AMDGPU::V_SUBBREV_U32_e64:
3829   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3830   case AMDGPU::V_SUBBREV_U32_e32_vi:
3831   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3832   case AMDGPU::V_SUBBREV_U32_e64_vi:
3833 
3834   case AMDGPU::V_SUBREV_U32_e32:
3835   case AMDGPU::V_SUBREV_U32_e64:
3836   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3837   case AMDGPU::V_SUBREV_U32_e32_vi:
3838   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3839   case AMDGPU::V_SUBREV_U32_e64_vi:
3840 
3841   case AMDGPU::V_SUBREV_F16_e32:
3842   case AMDGPU::V_SUBREV_F16_e64:
3843   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3844   case AMDGPU::V_SUBREV_F16_e32_vi:
3845   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3846   case AMDGPU::V_SUBREV_F16_e64_vi:
3847 
3848   case AMDGPU::V_SUBREV_U16_e32:
3849   case AMDGPU::V_SUBREV_U16_e64:
3850   case AMDGPU::V_SUBREV_U16_e32_vi:
3851   case AMDGPU::V_SUBREV_U16_e64_vi:
3852 
3853   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3854   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3855   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3856 
3857   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3858   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3859 
3860   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3861   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3862 
3863   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3864   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3865 
3866   case AMDGPU::V_LSHRREV_B32_e32:
3867   case AMDGPU::V_LSHRREV_B32_e64:
3868   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3869   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3870   case AMDGPU::V_LSHRREV_B32_e32_vi:
3871   case AMDGPU::V_LSHRREV_B32_e64_vi:
3872   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3873   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3874 
3875   case AMDGPU::V_ASHRREV_I32_e32:
3876   case AMDGPU::V_ASHRREV_I32_e64:
3877   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3878   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3879   case AMDGPU::V_ASHRREV_I32_e32_vi:
3880   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3881   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3882   case AMDGPU::V_ASHRREV_I32_e64_vi:
3883 
3884   case AMDGPU::V_LSHLREV_B32_e32:
3885   case AMDGPU::V_LSHLREV_B32_e64:
3886   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3887   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3888   case AMDGPU::V_LSHLREV_B32_e32_vi:
3889   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3890   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3891   case AMDGPU::V_LSHLREV_B32_e64_vi:
3892 
3893   case AMDGPU::V_LSHLREV_B16_e32:
3894   case AMDGPU::V_LSHLREV_B16_e64:
3895   case AMDGPU::V_LSHLREV_B16_e32_vi:
3896   case AMDGPU::V_LSHLREV_B16_e64_vi:
3897   case AMDGPU::V_LSHLREV_B16_gfx10:
3898 
3899   case AMDGPU::V_LSHRREV_B16_e32:
3900   case AMDGPU::V_LSHRREV_B16_e64:
3901   case AMDGPU::V_LSHRREV_B16_e32_vi:
3902   case AMDGPU::V_LSHRREV_B16_e64_vi:
3903   case AMDGPU::V_LSHRREV_B16_gfx10:
3904 
3905   case AMDGPU::V_ASHRREV_I16_e32:
3906   case AMDGPU::V_ASHRREV_I16_e64:
3907   case AMDGPU::V_ASHRREV_I16_e32_vi:
3908   case AMDGPU::V_ASHRREV_I16_e64_vi:
3909   case AMDGPU::V_ASHRREV_I16_gfx10:
3910 
3911   case AMDGPU::V_LSHLREV_B64_e64:
3912   case AMDGPU::V_LSHLREV_B64_gfx10:
3913   case AMDGPU::V_LSHLREV_B64_vi:
3914 
3915   case AMDGPU::V_LSHRREV_B64_e64:
3916   case AMDGPU::V_LSHRREV_B64_gfx10:
3917   case AMDGPU::V_LSHRREV_B64_vi:
3918 
3919   case AMDGPU::V_ASHRREV_I64_e64:
3920   case AMDGPU::V_ASHRREV_I64_gfx10:
3921   case AMDGPU::V_ASHRREV_I64_vi:
3922 
3923   case AMDGPU::V_PK_LSHLREV_B16:
3924   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3925   case AMDGPU::V_PK_LSHLREV_B16_vi:
3926 
3927   case AMDGPU::V_PK_LSHRREV_B16:
3928   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3929   case AMDGPU::V_PK_LSHRREV_B16_vi:
3930   case AMDGPU::V_PK_ASHRREV_I16:
3931   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3932   case AMDGPU::V_PK_ASHRREV_I16_vi:
3933     return true;
3934   default:
3935     return false;
3936   }
3937 }
3938 
3939 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3940 
3941   using namespace SIInstrFlags;
3942   const unsigned Opcode = Inst.getOpcode();
3943   const MCInstrDesc &Desc = MII.get(Opcode);
3944 
3945   // lds_direct register is defined so that it can be used
3946   // with 9-bit operands only. Ignore encodings which do not accept these.
3947   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3948   if ((Desc.TSFlags & Enc) == 0)
3949     return None;
3950 
3951   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3952     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3953     if (SrcIdx == -1)
3954       break;
3955     const auto &Src = Inst.getOperand(SrcIdx);
3956     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3957 
3958       if (isGFX90A())
3959         return StringRef("lds_direct is not supported on this GPU");
3960 
3961       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3962         return StringRef("lds_direct cannot be used with this instruction");
3963 
3964       if (SrcName != OpName::src0)
3965         return StringRef("lds_direct may be used as src0 only");
3966     }
3967   }
3968 
3969   return None;
3970 }
3971 
3972 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3973   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3974     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3975     if (Op.isFlatOffset())
3976       return Op.getStartLoc();
3977   }
3978   return getLoc();
3979 }
3980 
3981 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3982                                          const OperandVector &Operands) {
3983   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3984   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3985     return true;
3986 
3987   auto Opcode = Inst.getOpcode();
3988   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3989   assert(OpNum != -1);
3990 
3991   const auto &Op = Inst.getOperand(OpNum);
3992   if (!hasFlatOffsets() && Op.getImm() != 0) {
3993     Error(getFlatOffsetLoc(Operands),
3994           "flat offset modifier is not supported on this GPU");
3995     return false;
3996   }
3997 
3998   // For FLAT segment the offset must be positive;
3999   // MSB is ignored and forced to zero.
4000   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
4001     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4002     if (!isIntN(OffsetSize, Op.getImm())) {
4003       Error(getFlatOffsetLoc(Operands),
4004             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4005       return false;
4006     }
4007   } else {
4008     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4009     if (!isUIntN(OffsetSize, Op.getImm())) {
4010       Error(getFlatOffsetLoc(Operands),
4011             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4012       return false;
4013     }
4014   }
4015 
4016   return true;
4017 }
4018 
4019 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4020   // Start with second operand because SMEM Offset cannot be dst or src0.
4021   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4022     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4023     if (Op.isSMEMOffset())
4024       return Op.getStartLoc();
4025   }
4026   return getLoc();
4027 }
4028 
4029 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4030                                          const OperandVector &Operands) {
4031   if (isCI() || isSI())
4032     return true;
4033 
4034   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4035   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4036     return true;
4037 
4038   auto Opcode = Inst.getOpcode();
4039   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4040   if (OpNum == -1)
4041     return true;
4042 
4043   const auto &Op = Inst.getOperand(OpNum);
4044   if (!Op.isImm())
4045     return true;
4046 
4047   uint64_t Offset = Op.getImm();
4048   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4049   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4050       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4051     return true;
4052 
4053   Error(getSMEMOffsetLoc(Operands),
4054         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4055                                "expected a 21-bit signed offset");
4056 
4057   return false;
4058 }
4059 
4060 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4061   unsigned Opcode = Inst.getOpcode();
4062   const MCInstrDesc &Desc = MII.get(Opcode);
4063   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4064     return true;
4065 
4066   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4067   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4068 
4069   const int OpIndices[] = { Src0Idx, Src1Idx };
4070 
4071   unsigned NumExprs = 0;
4072   unsigned NumLiterals = 0;
4073   uint32_t LiteralValue;
4074 
4075   for (int OpIdx : OpIndices) {
4076     if (OpIdx == -1) break;
4077 
4078     const MCOperand &MO = Inst.getOperand(OpIdx);
4079     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4080     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4081       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4082         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4083         if (NumLiterals == 0 || LiteralValue != Value) {
4084           LiteralValue = Value;
4085           ++NumLiterals;
4086         }
4087       } else if (MO.isExpr()) {
4088         ++NumExprs;
4089       }
4090     }
4091   }
4092 
4093   return NumLiterals + NumExprs <= 1;
4094 }
4095 
4096 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4097   const unsigned Opc = Inst.getOpcode();
4098   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4099       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4100     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4101     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4102 
4103     if (OpSel & ~3)
4104       return false;
4105   }
4106 
4107   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4108     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4109     if (OpSelIdx != -1) {
4110       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4111         return false;
4112     }
4113     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4114     if (OpSelHiIdx != -1) {
4115       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4116         return false;
4117     }
4118   }
4119 
4120   return true;
4121 }
4122 
4123 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4124                                   const OperandVector &Operands) {
4125   const unsigned Opc = Inst.getOpcode();
4126   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4127   if (DppCtrlIdx < 0)
4128     return true;
4129   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4130 
4131   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4132     // DPP64 is supported for row_newbcast only.
4133     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4134     if (Src0Idx >= 0 &&
4135         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4136       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4137       Error(S, "64 bit dpp only supports row_newbcast");
4138       return false;
4139     }
4140   }
4141 
4142   return true;
4143 }
4144 
4145 // Check if VCC register matches wavefront size
4146 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4147   auto FB = getFeatureBits();
4148   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4149     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4150 }
4151 
4152 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4153 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4154                                          const OperandVector &Operands) {
4155   unsigned Opcode = Inst.getOpcode();
4156   const MCInstrDesc &Desc = MII.get(Opcode);
4157   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4158   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4159       ImmIdx == -1)
4160     return true;
4161 
4162   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4163   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4164   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4165 
4166   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4167 
4168   unsigned NumExprs = 0;
4169   unsigned NumLiterals = 0;
4170   uint32_t LiteralValue;
4171 
4172   for (int OpIdx : OpIndices) {
4173     if (OpIdx == -1)
4174       continue;
4175 
4176     const MCOperand &MO = Inst.getOperand(OpIdx);
4177     if (!MO.isImm() && !MO.isExpr())
4178       continue;
4179     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4180       continue;
4181 
4182     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4183         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4184       Error(getConstLoc(Operands),
4185             "inline constants are not allowed for this operand");
4186       return false;
4187     }
4188 
4189     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4190       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4191       if (NumLiterals == 0 || LiteralValue != Value) {
4192         LiteralValue = Value;
4193         ++NumLiterals;
4194       }
4195     } else if (MO.isExpr()) {
4196       ++NumExprs;
4197     }
4198   }
4199   NumLiterals += NumExprs;
4200 
4201   if (!NumLiterals)
4202     return true;
4203 
4204   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4205     Error(getLitLoc(Operands), "literal operands are not supported");
4206     return false;
4207   }
4208 
4209   if (NumLiterals > 1) {
4210     Error(getLitLoc(Operands), "only one literal operand is allowed");
4211     return false;
4212   }
4213 
4214   return true;
4215 }
4216 
4217 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4218 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4219                          const MCRegisterInfo *MRI) {
4220   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4221   if (OpIdx < 0)
4222     return -1;
4223 
4224   const MCOperand &Op = Inst.getOperand(OpIdx);
4225   if (!Op.isReg())
4226     return -1;
4227 
4228   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4229   auto Reg = Sub ? Sub : Op.getReg();
4230   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4231   return AGPR32.contains(Reg) ? 1 : 0;
4232 }
4233 
4234 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4235   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4236   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4237                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4238                   SIInstrFlags::DS)) == 0)
4239     return true;
4240 
4241   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4242                                                       : AMDGPU::OpName::vdata;
4243 
4244   const MCRegisterInfo *MRI = getMRI();
4245   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4246   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4247 
4248   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4249     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4250     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4251       return false;
4252   }
4253 
4254   auto FB = getFeatureBits();
4255   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4256     if (DataAreg < 0 || DstAreg < 0)
4257       return true;
4258     return DstAreg == DataAreg;
4259   }
4260 
4261   return DstAreg < 1 && DataAreg < 1;
4262 }
4263 
4264 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4265   auto FB = getFeatureBits();
4266   if (!FB[AMDGPU::FeatureGFX90AInsts])
4267     return true;
4268 
4269   const MCRegisterInfo *MRI = getMRI();
4270   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4271   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4272   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4273     const MCOperand &Op = Inst.getOperand(I);
4274     if (!Op.isReg())
4275       continue;
4276 
4277     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4278     if (!Sub)
4279       continue;
4280 
4281     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4282       return false;
4283     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4284       return false;
4285   }
4286 
4287   return true;
4288 }
4289 
4290 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4291   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4292     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4293     if (Op.isBLGP())
4294       return Op.getStartLoc();
4295   }
4296   return SMLoc();
4297 }
4298 
4299 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4300                                    const OperandVector &Operands) {
4301   unsigned Opc = Inst.getOpcode();
4302   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4303   if (BlgpIdx == -1)
4304     return true;
4305   SMLoc BLGPLoc = getBLGPLoc(Operands);
4306   if (!BLGPLoc.isValid())
4307     return true;
4308   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4309   auto FB = getFeatureBits();
4310   bool UsesNeg = false;
4311   if (FB[AMDGPU::FeatureGFX940Insts]) {
4312     switch (Opc) {
4313     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4314     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4315     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4316     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4317       UsesNeg = true;
4318     }
4319   }
4320 
4321   if (IsNeg == UsesNeg)
4322     return true;
4323 
4324   Error(BLGPLoc,
4325         UsesNeg ? "invalid modifier: blgp is not supported"
4326                 : "invalid modifier: neg is not supported");
4327 
4328   return false;
4329 }
4330 
4331 // gfx90a has an undocumented limitation:
4332 // DS_GWS opcodes must use even aligned registers.
4333 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4334                                   const OperandVector &Operands) {
4335   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4336     return true;
4337 
4338   int Opc = Inst.getOpcode();
4339   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4340       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4341     return true;
4342 
4343   const MCRegisterInfo *MRI = getMRI();
4344   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4345   int Data0Pos =
4346       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4347   assert(Data0Pos != -1);
4348   auto Reg = Inst.getOperand(Data0Pos).getReg();
4349   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4350   if (RegIdx & 1) {
4351     SMLoc RegLoc = getRegLoc(Reg, Operands);
4352     Error(RegLoc, "vgpr must be even aligned");
4353     return false;
4354   }
4355 
4356   return true;
4357 }
4358 
4359 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4360                                             const OperandVector &Operands,
4361                                             const SMLoc &IDLoc) {
4362   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4363                                            AMDGPU::OpName::cpol);
4364   if (CPolPos == -1)
4365     return true;
4366 
4367   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4368 
4369   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4370   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4371       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4372     Error(IDLoc, "invalid cache policy for SMRD instruction");
4373     return false;
4374   }
4375 
4376   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4377     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4378     StringRef CStr(S.getPointer());
4379     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4380     Error(S, "scc is not supported on this GPU");
4381     return false;
4382   }
4383 
4384   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4385     return true;
4386 
4387   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4388     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4389       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4390                               : "instruction must use glc");
4391       return false;
4392     }
4393   } else {
4394     if (CPol & CPol::GLC) {
4395       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4396       StringRef CStr(S.getPointer());
4397       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4398       Error(S, isGFX940() ? "instruction must not use sc0"
4399                           : "instruction must not use glc");
4400       return false;
4401     }
4402   }
4403 
4404   return true;
4405 }
4406 
4407 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4408                                           const SMLoc &IDLoc,
4409                                           const OperandVector &Operands) {
4410   if (auto ErrMsg = validateLdsDirect(Inst)) {
4411     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4412     return false;
4413   }
4414   if (!validateSOPLiteral(Inst)) {
4415     Error(getLitLoc(Operands),
4416       "only one literal operand is allowed");
4417     return false;
4418   }
4419   if (!validateVOPLiteral(Inst, Operands)) {
4420     return false;
4421   }
4422   if (!validateConstantBusLimitations(Inst, Operands)) {
4423     return false;
4424   }
4425   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4426     return false;
4427   }
4428   if (!validateIntClampSupported(Inst)) {
4429     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4430       "integer clamping is not supported on this GPU");
4431     return false;
4432   }
4433   if (!validateOpSel(Inst)) {
4434     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4435       "invalid op_sel operand");
4436     return false;
4437   }
4438   if (!validateDPP(Inst, Operands)) {
4439     return false;
4440   }
4441   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4442   if (!validateMIMGD16(Inst)) {
4443     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4444       "d16 modifier is not supported on this GPU");
4445     return false;
4446   }
4447   if (!validateMIMGDim(Inst)) {
4448     Error(IDLoc, "dim modifier is required on this GPU");
4449     return false;
4450   }
4451   if (!validateMIMGMSAA(Inst)) {
4452     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4453           "invalid dim; must be MSAA type");
4454     return false;
4455   }
4456   if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4457     Error(IDLoc, *ErrMsg);
4458     return false;
4459   }
4460   if (!validateMIMGAddrSize(Inst)) {
4461     Error(IDLoc,
4462       "image address size does not match dim and a16");
4463     return false;
4464   }
4465   if (!validateMIMGAtomicDMask(Inst)) {
4466     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4467       "invalid atomic image dmask");
4468     return false;
4469   }
4470   if (!validateMIMGGatherDMask(Inst)) {
4471     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4472       "invalid image_gather dmask: only one bit must be set");
4473     return false;
4474   }
4475   if (!validateMovrels(Inst, Operands)) {
4476     return false;
4477   }
4478   if (!validateFlatOffset(Inst, Operands)) {
4479     return false;
4480   }
4481   if (!validateSMEMOffset(Inst, Operands)) {
4482     return false;
4483   }
4484   if (!validateMAIAccWrite(Inst, Operands)) {
4485     return false;
4486   }
4487   if (!validateMFMA(Inst, Operands)) {
4488     return false;
4489   }
4490   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4491     return false;
4492   }
4493 
4494   if (!validateAGPRLdSt(Inst)) {
4495     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4496     ? "invalid register class: data and dst should be all VGPR or AGPR"
4497     : "invalid register class: agpr loads and stores not supported on this GPU"
4498     );
4499     return false;
4500   }
4501   if (!validateVGPRAlign(Inst)) {
4502     Error(IDLoc,
4503       "invalid register class: vgpr tuples must be 64 bit aligned");
4504     return false;
4505   }
4506   if (!validateGWS(Inst, Operands)) {
4507     return false;
4508   }
4509 
4510   if (!validateBLGP(Inst, Operands)) {
4511     return false;
4512   }
4513 
4514   if (!validateDivScale(Inst)) {
4515     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4516     return false;
4517   }
4518   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4519     return false;
4520   }
4521 
4522   return true;
4523 }
4524 
4525 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4526                                             const FeatureBitset &FBS,
4527                                             unsigned VariantID = 0);
4528 
4529 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4530                                 const FeatureBitset &AvailableFeatures,
4531                                 unsigned VariantID);
4532 
4533 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4534                                        const FeatureBitset &FBS) {
4535   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4536 }
4537 
4538 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4539                                        const FeatureBitset &FBS,
4540                                        ArrayRef<unsigned> Variants) {
4541   for (auto Variant : Variants) {
4542     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4543       return true;
4544   }
4545 
4546   return false;
4547 }
4548 
4549 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4550                                                   const SMLoc &IDLoc) {
4551   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4552 
4553   // Check if requested instruction variant is supported.
4554   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4555     return false;
4556 
4557   // This instruction is not supported.
4558   // Clear any other pending errors because they are no longer relevant.
4559   getParser().clearPendingErrors();
4560 
4561   // Requested instruction variant is not supported.
4562   // Check if any other variants are supported.
4563   StringRef VariantName = getMatchedVariantName();
4564   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4565     return Error(IDLoc,
4566                  Twine(VariantName,
4567                        " variant of this instruction is not supported"));
4568   }
4569 
4570   // Finally check if this instruction is supported on any other GPU.
4571   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4572     return Error(IDLoc, "instruction not supported on this GPU");
4573   }
4574 
4575   // Instruction not supported on any GPU. Probably a typo.
4576   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4577   return Error(IDLoc, "invalid instruction" + Suggestion);
4578 }
4579 
4580 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4581                                               OperandVector &Operands,
4582                                               MCStreamer &Out,
4583                                               uint64_t &ErrorInfo,
4584                                               bool MatchingInlineAsm) {
4585   MCInst Inst;
4586   unsigned Result = Match_Success;
4587   for (auto Variant : getMatchedVariants()) {
4588     uint64_t EI;
4589     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4590                                   Variant);
4591     // We order match statuses from least to most specific. We use most specific
4592     // status as resulting
4593     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4594     if ((R == Match_Success) ||
4595         (R == Match_PreferE32) ||
4596         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4597         (R == Match_InvalidOperand && Result != Match_MissingFeature
4598                                    && Result != Match_PreferE32) ||
4599         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4600                                    && Result != Match_MissingFeature
4601                                    && Result != Match_PreferE32)) {
4602       Result = R;
4603       ErrorInfo = EI;
4604     }
4605     if (R == Match_Success)
4606       break;
4607   }
4608 
4609   if (Result == Match_Success) {
4610     if (!validateInstruction(Inst, IDLoc, Operands)) {
4611       return true;
4612     }
4613     Inst.setLoc(IDLoc);
4614     Out.emitInstruction(Inst, getSTI());
4615     return false;
4616   }
4617 
4618   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4619   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4620     return true;
4621   }
4622 
4623   switch (Result) {
4624   default: break;
4625   case Match_MissingFeature:
4626     // It has been verified that the specified instruction
4627     // mnemonic is valid. A match was found but it requires
4628     // features which are not supported on this GPU.
4629     return Error(IDLoc, "operands are not valid for this GPU or mode");
4630 
4631   case Match_InvalidOperand: {
4632     SMLoc ErrorLoc = IDLoc;
4633     if (ErrorInfo != ~0ULL) {
4634       if (ErrorInfo >= Operands.size()) {
4635         return Error(IDLoc, "too few operands for instruction");
4636       }
4637       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4638       if (ErrorLoc == SMLoc())
4639         ErrorLoc = IDLoc;
4640     }
4641     return Error(ErrorLoc, "invalid operand for instruction");
4642   }
4643 
4644   case Match_PreferE32:
4645     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4646                         "should be encoded as e32");
4647   case Match_MnemonicFail:
4648     llvm_unreachable("Invalid instructions should have been handled already");
4649   }
4650   llvm_unreachable("Implement any new match types added!");
4651 }
4652 
4653 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4654   int64_t Tmp = -1;
4655   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4656     return true;
4657   }
4658   if (getParser().parseAbsoluteExpression(Tmp)) {
4659     return true;
4660   }
4661   Ret = static_cast<uint32_t>(Tmp);
4662   return false;
4663 }
4664 
4665 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4666                                                uint32_t &Minor) {
4667   if (ParseAsAbsoluteExpression(Major))
4668     return TokError("invalid major version");
4669 
4670   if (!trySkipToken(AsmToken::Comma))
4671     return TokError("minor version number required, comma expected");
4672 
4673   if (ParseAsAbsoluteExpression(Minor))
4674     return TokError("invalid minor version");
4675 
4676   return false;
4677 }
4678 
4679 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4680   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4681     return TokError("directive only supported for amdgcn architecture");
4682 
4683   std::string TargetIDDirective;
4684   SMLoc TargetStart = getTok().getLoc();
4685   if (getParser().parseEscapedString(TargetIDDirective))
4686     return true;
4687 
4688   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4689   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4690     return getParser().Error(TargetRange.Start,
4691         (Twine(".amdgcn_target directive's target id ") +
4692          Twine(TargetIDDirective) +
4693          Twine(" does not match the specified target id ") +
4694          Twine(getTargetStreamer().getTargetID()->toString())).str());
4695 
4696   return false;
4697 }
4698 
4699 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4700   return Error(Range.Start, "value out of range", Range);
4701 }
4702 
4703 bool AMDGPUAsmParser::calculateGPRBlocks(
4704     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4705     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4706     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4707     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4708   // TODO(scott.linder): These calculations are duplicated from
4709   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4710   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4711 
4712   unsigned NumVGPRs = NextFreeVGPR;
4713   unsigned NumSGPRs = NextFreeSGPR;
4714 
4715   if (Version.Major >= 10)
4716     NumSGPRs = 0;
4717   else {
4718     unsigned MaxAddressableNumSGPRs =
4719         IsaInfo::getAddressableNumSGPRs(&getSTI());
4720 
4721     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4722         NumSGPRs > MaxAddressableNumSGPRs)
4723       return OutOfRangeError(SGPRRange);
4724 
4725     NumSGPRs +=
4726         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4727 
4728     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4729         NumSGPRs > MaxAddressableNumSGPRs)
4730       return OutOfRangeError(SGPRRange);
4731 
4732     if (Features.test(FeatureSGPRInitBug))
4733       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4734   }
4735 
4736   VGPRBlocks =
4737       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4738   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4739 
4740   return false;
4741 }
4742 
4743 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4744   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4745     return TokError("directive only supported for amdgcn architecture");
4746 
4747   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4748     return TokError("directive only supported for amdhsa OS");
4749 
4750   StringRef KernelName;
4751   if (getParser().parseIdentifier(KernelName))
4752     return true;
4753 
4754   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4755 
4756   StringSet<> Seen;
4757 
4758   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4759 
4760   SMRange VGPRRange;
4761   uint64_t NextFreeVGPR = 0;
4762   uint64_t AccumOffset = 0;
4763   uint64_t SharedVGPRCount = 0;
4764   SMRange SGPRRange;
4765   uint64_t NextFreeSGPR = 0;
4766 
4767   // Count the number of user SGPRs implied from the enabled feature bits.
4768   unsigned ImpliedUserSGPRCount = 0;
4769 
4770   // Track if the asm explicitly contains the directive for the user SGPR
4771   // count.
4772   Optional<unsigned> ExplicitUserSGPRCount;
4773   bool ReserveVCC = true;
4774   bool ReserveFlatScr = true;
4775   Optional<bool> EnableWavefrontSize32;
4776 
4777   while (true) {
4778     while (trySkipToken(AsmToken::EndOfStatement));
4779 
4780     StringRef ID;
4781     SMRange IDRange = getTok().getLocRange();
4782     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4783       return true;
4784 
4785     if (ID == ".end_amdhsa_kernel")
4786       break;
4787 
4788     if (Seen.find(ID) != Seen.end())
4789       return TokError(".amdhsa_ directives cannot be repeated");
4790     Seen.insert(ID);
4791 
4792     SMLoc ValStart = getLoc();
4793     int64_t IVal;
4794     if (getParser().parseAbsoluteExpression(IVal))
4795       return true;
4796     SMLoc ValEnd = getLoc();
4797     SMRange ValRange = SMRange(ValStart, ValEnd);
4798 
4799     if (IVal < 0)
4800       return OutOfRangeError(ValRange);
4801 
4802     uint64_t Val = IVal;
4803 
4804 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4805   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4806     return OutOfRangeError(RANGE);                                             \
4807   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4808 
4809     if (ID == ".amdhsa_group_segment_fixed_size") {
4810       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4811         return OutOfRangeError(ValRange);
4812       KD.group_segment_fixed_size = Val;
4813     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4814       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4815         return OutOfRangeError(ValRange);
4816       KD.private_segment_fixed_size = Val;
4817     } else if (ID == ".amdhsa_kernarg_size") {
4818       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4819         return OutOfRangeError(ValRange);
4820       KD.kernarg_size = Val;
4821     } else if (ID == ".amdhsa_user_sgpr_count") {
4822       ExplicitUserSGPRCount = Val;
4823     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4824       if (hasArchitectedFlatScratch())
4825         return Error(IDRange.Start,
4826                      "directive is not supported with architected flat scratch",
4827                      IDRange);
4828       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4829                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4830                        Val, ValRange);
4831       if (Val)
4832         ImpliedUserSGPRCount += 4;
4833     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4834       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4835                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4836                        ValRange);
4837       if (Val)
4838         ImpliedUserSGPRCount += 2;
4839     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4840       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4841                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4842                        ValRange);
4843       if (Val)
4844         ImpliedUserSGPRCount += 2;
4845     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4846       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4847                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4848                        Val, ValRange);
4849       if (Val)
4850         ImpliedUserSGPRCount += 2;
4851     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4852       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4853                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4854                        ValRange);
4855       if (Val)
4856         ImpliedUserSGPRCount += 2;
4857     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4858       if (hasArchitectedFlatScratch())
4859         return Error(IDRange.Start,
4860                      "directive is not supported with architected flat scratch",
4861                      IDRange);
4862       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4863                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4864                        ValRange);
4865       if (Val)
4866         ImpliedUserSGPRCount += 2;
4867     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4868       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4869                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4870                        Val, ValRange);
4871       if (Val)
4872         ImpliedUserSGPRCount += 1;
4873     } else if (ID == ".amdhsa_wavefront_size32") {
4874       if (IVersion.Major < 10)
4875         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4876       EnableWavefrontSize32 = Val;
4877       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4878                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4879                        Val, ValRange);
4880     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4881       if (hasArchitectedFlatScratch())
4882         return Error(IDRange.Start,
4883                      "directive is not supported with architected flat scratch",
4884                      IDRange);
4885       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4886                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4887     } else if (ID == ".amdhsa_enable_private_segment") {
4888       if (!hasArchitectedFlatScratch())
4889         return Error(
4890             IDRange.Start,
4891             "directive is not supported without architected flat scratch",
4892             IDRange);
4893       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4894                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4895     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4896       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4897                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4898                        ValRange);
4899     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4900       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4901                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4902                        ValRange);
4903     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4904       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4905                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4906                        ValRange);
4907     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4908       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4909                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4910                        ValRange);
4911     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4912       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4913                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4914                        ValRange);
4915     } else if (ID == ".amdhsa_next_free_vgpr") {
4916       VGPRRange = ValRange;
4917       NextFreeVGPR = Val;
4918     } else if (ID == ".amdhsa_next_free_sgpr") {
4919       SGPRRange = ValRange;
4920       NextFreeSGPR = Val;
4921     } else if (ID == ".amdhsa_accum_offset") {
4922       if (!isGFX90A())
4923         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4924       AccumOffset = Val;
4925     } else if (ID == ".amdhsa_reserve_vcc") {
4926       if (!isUInt<1>(Val))
4927         return OutOfRangeError(ValRange);
4928       ReserveVCC = Val;
4929     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4930       if (IVersion.Major < 7)
4931         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4932       if (hasArchitectedFlatScratch())
4933         return Error(IDRange.Start,
4934                      "directive is not supported with architected flat scratch",
4935                      IDRange);
4936       if (!isUInt<1>(Val))
4937         return OutOfRangeError(ValRange);
4938       ReserveFlatScr = Val;
4939     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4940       if (IVersion.Major < 8)
4941         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4942       if (!isUInt<1>(Val))
4943         return OutOfRangeError(ValRange);
4944       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4945         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4946                                  IDRange);
4947     } else if (ID == ".amdhsa_float_round_mode_32") {
4948       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4949                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4950     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4951       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4952                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4953     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4954       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4955                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4956     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4957       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4958                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4959                        ValRange);
4960     } else if (ID == ".amdhsa_dx10_clamp") {
4961       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4962                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4963     } else if (ID == ".amdhsa_ieee_mode") {
4964       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4965                        Val, ValRange);
4966     } else if (ID == ".amdhsa_fp16_overflow") {
4967       if (IVersion.Major < 9)
4968         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4969       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4970                        ValRange);
4971     } else if (ID == ".amdhsa_tg_split") {
4972       if (!isGFX90A())
4973         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4974       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4975                        ValRange);
4976     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4977       if (IVersion.Major < 10)
4978         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4979       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4980                        ValRange);
4981     } else if (ID == ".amdhsa_memory_ordered") {
4982       if (IVersion.Major < 10)
4983         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4984       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4985                        ValRange);
4986     } else if (ID == ".amdhsa_forward_progress") {
4987       if (IVersion.Major < 10)
4988         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4989       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4990                        ValRange);
4991     } else if (ID == ".amdhsa_shared_vgpr_count") {
4992       if (IVersion.Major < 10)
4993         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4994       SharedVGPRCount = Val;
4995       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
4996                        COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
4997                        ValRange);
4998     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4999       PARSE_BITS_ENTRY(
5000           KD.compute_pgm_rsrc2,
5001           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5002           ValRange);
5003     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5004       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5005                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5006                        Val, ValRange);
5007     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5008       PARSE_BITS_ENTRY(
5009           KD.compute_pgm_rsrc2,
5010           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5011           ValRange);
5012     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5013       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5014                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5015                        Val, ValRange);
5016     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5017       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5018                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5019                        Val, ValRange);
5020     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5021       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5022                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5023                        Val, ValRange);
5024     } else if (ID == ".amdhsa_exception_int_div_zero") {
5025       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5026                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5027                        Val, ValRange);
5028     } else {
5029       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5030     }
5031 
5032 #undef PARSE_BITS_ENTRY
5033   }
5034 
5035   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5036     return TokError(".amdhsa_next_free_vgpr directive is required");
5037 
5038   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5039     return TokError(".amdhsa_next_free_sgpr directive is required");
5040 
5041   unsigned VGPRBlocks;
5042   unsigned SGPRBlocks;
5043   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5044                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5045                          EnableWavefrontSize32, NextFreeVGPR,
5046                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5047                          SGPRBlocks))
5048     return true;
5049 
5050   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5051           VGPRBlocks))
5052     return OutOfRangeError(VGPRRange);
5053   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5054                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5055 
5056   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5057           SGPRBlocks))
5058     return OutOfRangeError(SGPRRange);
5059   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5060                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5061                   SGPRBlocks);
5062 
5063   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5064     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5065                     "enabled user SGPRs");
5066 
5067   unsigned UserSGPRCount =
5068       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5069 
5070   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5071     return TokError("too many user SGPRs enabled");
5072   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5073                   UserSGPRCount);
5074 
5075   if (isGFX90A()) {
5076     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5077       return TokError(".amdhsa_accum_offset directive is required");
5078     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5079       return TokError("accum_offset should be in range [4..256] in "
5080                       "increments of 4");
5081     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5082       return TokError("accum_offset exceeds total VGPR allocation");
5083     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5084                     (AccumOffset / 4 - 1));
5085   }
5086 
5087   if (IVersion.Major == 10) {
5088     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5089     if (SharedVGPRCount && EnableWavefrontSize32) {
5090       return TokError("shared_vgpr_count directive not valid on "
5091                       "wavefront size 32");
5092     }
5093     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5094       return TokError("shared_vgpr_count*2 + "
5095                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5096                       "exceed 63\n");
5097     }
5098   }
5099 
5100   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5101       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5102       ReserveFlatScr);
5103   return false;
5104 }
5105 
5106 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5107   uint32_t Major;
5108   uint32_t Minor;
5109 
5110   if (ParseDirectiveMajorMinor(Major, Minor))
5111     return true;
5112 
5113   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5114   return false;
5115 }
5116 
5117 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5118   uint32_t Major;
5119   uint32_t Minor;
5120   uint32_t Stepping;
5121   StringRef VendorName;
5122   StringRef ArchName;
5123 
5124   // If this directive has no arguments, then use the ISA version for the
5125   // targeted GPU.
5126   if (isToken(AsmToken::EndOfStatement)) {
5127     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5128     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5129                                                         ISA.Stepping,
5130                                                         "AMD", "AMDGPU");
5131     return false;
5132   }
5133 
5134   if (ParseDirectiveMajorMinor(Major, Minor))
5135     return true;
5136 
5137   if (!trySkipToken(AsmToken::Comma))
5138     return TokError("stepping version number required, comma expected");
5139 
5140   if (ParseAsAbsoluteExpression(Stepping))
5141     return TokError("invalid stepping version");
5142 
5143   if (!trySkipToken(AsmToken::Comma))
5144     return TokError("vendor name required, comma expected");
5145 
5146   if (!parseString(VendorName, "invalid vendor name"))
5147     return true;
5148 
5149   if (!trySkipToken(AsmToken::Comma))
5150     return TokError("arch name required, comma expected");
5151 
5152   if (!parseString(ArchName, "invalid arch name"))
5153     return true;
5154 
5155   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5156                                                       VendorName, ArchName);
5157   return false;
5158 }
5159 
5160 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5161                                                amd_kernel_code_t &Header) {
5162   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5163   // assembly for backwards compatibility.
5164   if (ID == "max_scratch_backing_memory_byte_size") {
5165     Parser.eatToEndOfStatement();
5166     return false;
5167   }
5168 
5169   SmallString<40> ErrStr;
5170   raw_svector_ostream Err(ErrStr);
5171   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5172     return TokError(Err.str());
5173   }
5174   Lex();
5175 
5176   if (ID == "enable_wavefront_size32") {
5177     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5178       if (!isGFX10Plus())
5179         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5180       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5181         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5182     } else {
5183       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5184         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5185     }
5186   }
5187 
5188   if (ID == "wavefront_size") {
5189     if (Header.wavefront_size == 5) {
5190       if (!isGFX10Plus())
5191         return TokError("wavefront_size=5 is only allowed on GFX10+");
5192       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5193         return TokError("wavefront_size=5 requires +WavefrontSize32");
5194     } else if (Header.wavefront_size == 6) {
5195       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5196         return TokError("wavefront_size=6 requires +WavefrontSize64");
5197     }
5198   }
5199 
5200   if (ID == "enable_wgp_mode") {
5201     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5202         !isGFX10Plus())
5203       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5204   }
5205 
5206   if (ID == "enable_mem_ordered") {
5207     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5208         !isGFX10Plus())
5209       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5210   }
5211 
5212   if (ID == "enable_fwd_progress") {
5213     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5214         !isGFX10Plus())
5215       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5216   }
5217 
5218   return false;
5219 }
5220 
5221 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5222   amd_kernel_code_t Header;
5223   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5224 
5225   while (true) {
5226     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5227     // will set the current token to EndOfStatement.
5228     while(trySkipToken(AsmToken::EndOfStatement));
5229 
5230     StringRef ID;
5231     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5232       return true;
5233 
5234     if (ID == ".end_amd_kernel_code_t")
5235       break;
5236 
5237     if (ParseAMDKernelCodeTValue(ID, Header))
5238       return true;
5239   }
5240 
5241   getTargetStreamer().EmitAMDKernelCodeT(Header);
5242 
5243   return false;
5244 }
5245 
5246 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5247   StringRef KernelName;
5248   if (!parseId(KernelName, "expected symbol name"))
5249     return true;
5250 
5251   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5252                                            ELF::STT_AMDGPU_HSA_KERNEL);
5253 
5254   KernelScope.initialize(getContext());
5255   return false;
5256 }
5257 
5258 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5259   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5260     return Error(getLoc(),
5261                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5262                  "architectures");
5263   }
5264 
5265   auto TargetIDDirective = getLexer().getTok().getStringContents();
5266   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5267     return Error(getParser().getTok().getLoc(), "target id must match options");
5268 
5269   getTargetStreamer().EmitISAVersion();
5270   Lex();
5271 
5272   return false;
5273 }
5274 
5275 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5276   const char *AssemblerDirectiveBegin;
5277   const char *AssemblerDirectiveEnd;
5278   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5279       isHsaAbiVersion3AndAbove(&getSTI())
5280           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5281                             HSAMD::V3::AssemblerDirectiveEnd)
5282           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5283                             HSAMD::AssemblerDirectiveEnd);
5284 
5285   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5286     return Error(getLoc(),
5287                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5288                  "not available on non-amdhsa OSes")).str());
5289   }
5290 
5291   std::string HSAMetadataString;
5292   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5293                           HSAMetadataString))
5294     return true;
5295 
5296   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5297     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5298       return Error(getLoc(), "invalid HSA metadata");
5299   } else {
5300     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5301       return Error(getLoc(), "invalid HSA metadata");
5302   }
5303 
5304   return false;
5305 }
5306 
5307 /// Common code to parse out a block of text (typically YAML) between start and
5308 /// end directives.
5309 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5310                                           const char *AssemblerDirectiveEnd,
5311                                           std::string &CollectString) {
5312 
5313   raw_string_ostream CollectStream(CollectString);
5314 
5315   getLexer().setSkipSpace(false);
5316 
5317   bool FoundEnd = false;
5318   while (!isToken(AsmToken::Eof)) {
5319     while (isToken(AsmToken::Space)) {
5320       CollectStream << getTokenStr();
5321       Lex();
5322     }
5323 
5324     if (trySkipId(AssemblerDirectiveEnd)) {
5325       FoundEnd = true;
5326       break;
5327     }
5328 
5329     CollectStream << Parser.parseStringToEndOfStatement()
5330                   << getContext().getAsmInfo()->getSeparatorString();
5331 
5332     Parser.eatToEndOfStatement();
5333   }
5334 
5335   getLexer().setSkipSpace(true);
5336 
5337   if (isToken(AsmToken::Eof) && !FoundEnd) {
5338     return TokError(Twine("expected directive ") +
5339                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5340   }
5341 
5342   CollectStream.flush();
5343   return false;
5344 }
5345 
5346 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5347 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5348   std::string String;
5349   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5350                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5351     return true;
5352 
5353   auto PALMetadata = getTargetStreamer().getPALMetadata();
5354   if (!PALMetadata->setFromString(String))
5355     return Error(getLoc(), "invalid PAL metadata");
5356   return false;
5357 }
5358 
5359 /// Parse the assembler directive for old linear-format PAL metadata.
5360 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5361   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5362     return Error(getLoc(),
5363                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5364                  "not available on non-amdpal OSes")).str());
5365   }
5366 
5367   auto PALMetadata = getTargetStreamer().getPALMetadata();
5368   PALMetadata->setLegacy();
5369   for (;;) {
5370     uint32_t Key, Value;
5371     if (ParseAsAbsoluteExpression(Key)) {
5372       return TokError(Twine("invalid value in ") +
5373                       Twine(PALMD::AssemblerDirective));
5374     }
5375     if (!trySkipToken(AsmToken::Comma)) {
5376       return TokError(Twine("expected an even number of values in ") +
5377                       Twine(PALMD::AssemblerDirective));
5378     }
5379     if (ParseAsAbsoluteExpression(Value)) {
5380       return TokError(Twine("invalid value in ") +
5381                       Twine(PALMD::AssemblerDirective));
5382     }
5383     PALMetadata->setRegister(Key, Value);
5384     if (!trySkipToken(AsmToken::Comma))
5385       break;
5386   }
5387   return false;
5388 }
5389 
5390 /// ParseDirectiveAMDGPULDS
5391 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5392 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5393   if (getParser().checkForValidSection())
5394     return true;
5395 
5396   StringRef Name;
5397   SMLoc NameLoc = getLoc();
5398   if (getParser().parseIdentifier(Name))
5399     return TokError("expected identifier in directive");
5400 
5401   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5402   if (parseToken(AsmToken::Comma, "expected ','"))
5403     return true;
5404 
5405   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5406 
5407   int64_t Size;
5408   SMLoc SizeLoc = getLoc();
5409   if (getParser().parseAbsoluteExpression(Size))
5410     return true;
5411   if (Size < 0)
5412     return Error(SizeLoc, "size must be non-negative");
5413   if (Size > LocalMemorySize)
5414     return Error(SizeLoc, "size is too large");
5415 
5416   int64_t Alignment = 4;
5417   if (trySkipToken(AsmToken::Comma)) {
5418     SMLoc AlignLoc = getLoc();
5419     if (getParser().parseAbsoluteExpression(Alignment))
5420       return true;
5421     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5422       return Error(AlignLoc, "alignment must be a power of two");
5423 
5424     // Alignment larger than the size of LDS is possible in theory, as long
5425     // as the linker manages to place to symbol at address 0, but we do want
5426     // to make sure the alignment fits nicely into a 32-bit integer.
5427     if (Alignment >= 1u << 31)
5428       return Error(AlignLoc, "alignment is too large");
5429   }
5430 
5431   if (parseToken(AsmToken::EndOfStatement,
5432                  "unexpected token in '.amdgpu_lds' directive"))
5433     return true;
5434 
5435   Symbol->redefineIfPossible();
5436   if (!Symbol->isUndefined())
5437     return Error(NameLoc, "invalid symbol redefinition");
5438 
5439   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5440   return false;
5441 }
5442 
5443 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5444   StringRef IDVal = DirectiveID.getString();
5445 
5446   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5447     if (IDVal == ".amdhsa_kernel")
5448      return ParseDirectiveAMDHSAKernel();
5449 
5450     // TODO: Restructure/combine with PAL metadata directive.
5451     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5452       return ParseDirectiveHSAMetadata();
5453   } else {
5454     if (IDVal == ".hsa_code_object_version")
5455       return ParseDirectiveHSACodeObjectVersion();
5456 
5457     if (IDVal == ".hsa_code_object_isa")
5458       return ParseDirectiveHSACodeObjectISA();
5459 
5460     if (IDVal == ".amd_kernel_code_t")
5461       return ParseDirectiveAMDKernelCodeT();
5462 
5463     if (IDVal == ".amdgpu_hsa_kernel")
5464       return ParseDirectiveAMDGPUHsaKernel();
5465 
5466     if (IDVal == ".amd_amdgpu_isa")
5467       return ParseDirectiveISAVersion();
5468 
5469     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5470       return ParseDirectiveHSAMetadata();
5471   }
5472 
5473   if (IDVal == ".amdgcn_target")
5474     return ParseDirectiveAMDGCNTarget();
5475 
5476   if (IDVal == ".amdgpu_lds")
5477     return ParseDirectiveAMDGPULDS();
5478 
5479   if (IDVal == PALMD::AssemblerDirectiveBegin)
5480     return ParseDirectivePALMetadataBegin();
5481 
5482   if (IDVal == PALMD::AssemblerDirective)
5483     return ParseDirectivePALMetadata();
5484 
5485   return true;
5486 }
5487 
5488 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5489                                            unsigned RegNo) {
5490 
5491   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5492     return isGFX9Plus();
5493 
5494   // GFX10 has 2 more SGPRs 104 and 105.
5495   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5496     return hasSGPR104_SGPR105();
5497 
5498   switch (RegNo) {
5499   case AMDGPU::SRC_SHARED_BASE:
5500   case AMDGPU::SRC_SHARED_LIMIT:
5501   case AMDGPU::SRC_PRIVATE_BASE:
5502   case AMDGPU::SRC_PRIVATE_LIMIT:
5503   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5504     return isGFX9Plus();
5505   case AMDGPU::TBA:
5506   case AMDGPU::TBA_LO:
5507   case AMDGPU::TBA_HI:
5508   case AMDGPU::TMA:
5509   case AMDGPU::TMA_LO:
5510   case AMDGPU::TMA_HI:
5511     return !isGFX9Plus();
5512   case AMDGPU::XNACK_MASK:
5513   case AMDGPU::XNACK_MASK_LO:
5514   case AMDGPU::XNACK_MASK_HI:
5515     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5516   case AMDGPU::SGPR_NULL:
5517     return isGFX10Plus();
5518   default:
5519     break;
5520   }
5521 
5522   if (isCI())
5523     return true;
5524 
5525   if (isSI() || isGFX10Plus()) {
5526     // No flat_scr on SI.
5527     // On GFX10 flat scratch is not a valid register operand and can only be
5528     // accessed with s_setreg/s_getreg.
5529     switch (RegNo) {
5530     case AMDGPU::FLAT_SCR:
5531     case AMDGPU::FLAT_SCR_LO:
5532     case AMDGPU::FLAT_SCR_HI:
5533       return false;
5534     default:
5535       return true;
5536     }
5537   }
5538 
5539   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5540   // SI/CI have.
5541   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5542     return hasSGPR102_SGPR103();
5543 
5544   return true;
5545 }
5546 
5547 OperandMatchResultTy
5548 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5549                               OperandMode Mode) {
5550   // Try to parse with a custom parser
5551   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5552 
5553   // If we successfully parsed the operand or if there as an error parsing,
5554   // we are done.
5555   //
5556   // If we are parsing after we reach EndOfStatement then this means we
5557   // are appending default values to the Operands list.  This is only done
5558   // by custom parser, so we shouldn't continue on to the generic parsing.
5559   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5560       isToken(AsmToken::EndOfStatement))
5561     return ResTy;
5562 
5563   SMLoc RBraceLoc;
5564   SMLoc LBraceLoc = getLoc();
5565   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5566     unsigned Prefix = Operands.size();
5567 
5568     for (;;) {
5569       auto Loc = getLoc();
5570       ResTy = parseReg(Operands);
5571       if (ResTy == MatchOperand_NoMatch)
5572         Error(Loc, "expected a register");
5573       if (ResTy != MatchOperand_Success)
5574         return MatchOperand_ParseFail;
5575 
5576       RBraceLoc = getLoc();
5577       if (trySkipToken(AsmToken::RBrac))
5578         break;
5579 
5580       if (!skipToken(AsmToken::Comma,
5581                      "expected a comma or a closing square bracket")) {
5582         return MatchOperand_ParseFail;
5583       }
5584     }
5585 
5586     if (Operands.size() - Prefix > 1) {
5587       Operands.insert(Operands.begin() + Prefix,
5588                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5589       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5590     }
5591 
5592     return MatchOperand_Success;
5593   }
5594 
5595   return parseRegOrImm(Operands);
5596 }
5597 
5598 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5599   // Clear any forced encodings from the previous instruction.
5600   setForcedEncodingSize(0);
5601   setForcedDPP(false);
5602   setForcedSDWA(false);
5603 
5604   if (Name.endswith("_e64")) {
5605     setForcedEncodingSize(64);
5606     return Name.substr(0, Name.size() - 4);
5607   } else if (Name.endswith("_e32")) {
5608     setForcedEncodingSize(32);
5609     return Name.substr(0, Name.size() - 4);
5610   } else if (Name.endswith("_dpp")) {
5611     setForcedDPP(true);
5612     return Name.substr(0, Name.size() - 4);
5613   } else if (Name.endswith("_sdwa")) {
5614     setForcedSDWA(true);
5615     return Name.substr(0, Name.size() - 5);
5616   }
5617   return Name;
5618 }
5619 
5620 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5621                                        StringRef Name,
5622                                        SMLoc NameLoc, OperandVector &Operands) {
5623   // Add the instruction mnemonic
5624   Name = parseMnemonicSuffix(Name);
5625   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5626 
5627   bool IsMIMG = Name.startswith("image_");
5628 
5629   while (!trySkipToken(AsmToken::EndOfStatement)) {
5630     OperandMode Mode = OperandMode_Default;
5631     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5632       Mode = OperandMode_NSA;
5633     CPolSeen = 0;
5634     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5635 
5636     if (Res != MatchOperand_Success) {
5637       checkUnsupportedInstruction(Name, NameLoc);
5638       if (!Parser.hasPendingError()) {
5639         // FIXME: use real operand location rather than the current location.
5640         StringRef Msg =
5641           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5642                                             "not a valid operand.";
5643         Error(getLoc(), Msg);
5644       }
5645       while (!trySkipToken(AsmToken::EndOfStatement)) {
5646         lex();
5647       }
5648       return true;
5649     }
5650 
5651     // Eat the comma or space if there is one.
5652     trySkipToken(AsmToken::Comma);
5653   }
5654 
5655   return false;
5656 }
5657 
5658 //===----------------------------------------------------------------------===//
5659 // Utility functions
5660 //===----------------------------------------------------------------------===//
5661 
5662 OperandMatchResultTy
5663 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5664 
5665   if (!trySkipId(Prefix, AsmToken::Colon))
5666     return MatchOperand_NoMatch;
5667 
5668   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5669 }
5670 
5671 OperandMatchResultTy
5672 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5673                                     AMDGPUOperand::ImmTy ImmTy,
5674                                     bool (*ConvertResult)(int64_t&)) {
5675   SMLoc S = getLoc();
5676   int64_t Value = 0;
5677 
5678   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5679   if (Res != MatchOperand_Success)
5680     return Res;
5681 
5682   if (ConvertResult && !ConvertResult(Value)) {
5683     Error(S, "invalid " + StringRef(Prefix) + " value.");
5684   }
5685 
5686   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5687   return MatchOperand_Success;
5688 }
5689 
5690 OperandMatchResultTy
5691 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5692                                              OperandVector &Operands,
5693                                              AMDGPUOperand::ImmTy ImmTy,
5694                                              bool (*ConvertResult)(int64_t&)) {
5695   SMLoc S = getLoc();
5696   if (!trySkipId(Prefix, AsmToken::Colon))
5697     return MatchOperand_NoMatch;
5698 
5699   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5700     return MatchOperand_ParseFail;
5701 
5702   unsigned Val = 0;
5703   const unsigned MaxSize = 4;
5704 
5705   // FIXME: How to verify the number of elements matches the number of src
5706   // operands?
5707   for (int I = 0; ; ++I) {
5708     int64_t Op;
5709     SMLoc Loc = getLoc();
5710     if (!parseExpr(Op))
5711       return MatchOperand_ParseFail;
5712 
5713     if (Op != 0 && Op != 1) {
5714       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5715       return MatchOperand_ParseFail;
5716     }
5717 
5718     Val |= (Op << I);
5719 
5720     if (trySkipToken(AsmToken::RBrac))
5721       break;
5722 
5723     if (I + 1 == MaxSize) {
5724       Error(getLoc(), "expected a closing square bracket");
5725       return MatchOperand_ParseFail;
5726     }
5727 
5728     if (!skipToken(AsmToken::Comma, "expected a comma"))
5729       return MatchOperand_ParseFail;
5730   }
5731 
5732   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5733   return MatchOperand_Success;
5734 }
5735 
5736 OperandMatchResultTy
5737 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5738                                AMDGPUOperand::ImmTy ImmTy) {
5739   int64_t Bit;
5740   SMLoc S = getLoc();
5741 
5742   if (trySkipId(Name)) {
5743     Bit = 1;
5744   } else if (trySkipId("no", Name)) {
5745     Bit = 0;
5746   } else {
5747     return MatchOperand_NoMatch;
5748   }
5749 
5750   if (Name == "r128" && !hasMIMG_R128()) {
5751     Error(S, "r128 modifier is not supported on this GPU");
5752     return MatchOperand_ParseFail;
5753   }
5754   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5755     Error(S, "a16 modifier is not supported on this GPU");
5756     return MatchOperand_ParseFail;
5757   }
5758 
5759   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5760     ImmTy = AMDGPUOperand::ImmTyR128A16;
5761 
5762   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5763   return MatchOperand_Success;
5764 }
5765 
5766 OperandMatchResultTy
5767 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5768   unsigned CPolOn = 0;
5769   unsigned CPolOff = 0;
5770   SMLoc S = getLoc();
5771 
5772   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5773   if (isGFX940() && !Mnemo.startswith("s_")) {
5774     if (trySkipId("sc0"))
5775       CPolOn = AMDGPU::CPol::SC0;
5776     else if (trySkipId("nosc0"))
5777       CPolOff = AMDGPU::CPol::SC0;
5778     else if (trySkipId("nt"))
5779       CPolOn = AMDGPU::CPol::NT;
5780     else if (trySkipId("nont"))
5781       CPolOff = AMDGPU::CPol::NT;
5782     else if (trySkipId("sc1"))
5783       CPolOn = AMDGPU::CPol::SC1;
5784     else if (trySkipId("nosc1"))
5785       CPolOff = AMDGPU::CPol::SC1;
5786     else
5787       return MatchOperand_NoMatch;
5788   }
5789   else if (trySkipId("glc"))
5790     CPolOn = AMDGPU::CPol::GLC;
5791   else if (trySkipId("noglc"))
5792     CPolOff = AMDGPU::CPol::GLC;
5793   else if (trySkipId("slc"))
5794     CPolOn = AMDGPU::CPol::SLC;
5795   else if (trySkipId("noslc"))
5796     CPolOff = AMDGPU::CPol::SLC;
5797   else if (trySkipId("dlc"))
5798     CPolOn = AMDGPU::CPol::DLC;
5799   else if (trySkipId("nodlc"))
5800     CPolOff = AMDGPU::CPol::DLC;
5801   else if (trySkipId("scc"))
5802     CPolOn = AMDGPU::CPol::SCC;
5803   else if (trySkipId("noscc"))
5804     CPolOff = AMDGPU::CPol::SCC;
5805   else
5806     return MatchOperand_NoMatch;
5807 
5808   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5809     Error(S, "dlc modifier is not supported on this GPU");
5810     return MatchOperand_ParseFail;
5811   }
5812 
5813   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5814     Error(S, "scc modifier is not supported on this GPU");
5815     return MatchOperand_ParseFail;
5816   }
5817 
5818   if (CPolSeen & (CPolOn | CPolOff)) {
5819     Error(S, "duplicate cache policy modifier");
5820     return MatchOperand_ParseFail;
5821   }
5822 
5823   CPolSeen |= (CPolOn | CPolOff);
5824 
5825   for (unsigned I = 1; I != Operands.size(); ++I) {
5826     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5827     if (Op.isCPol()) {
5828       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5829       return MatchOperand_Success;
5830     }
5831   }
5832 
5833   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5834                                               AMDGPUOperand::ImmTyCPol));
5835 
5836   return MatchOperand_Success;
5837 }
5838 
5839 static void addOptionalImmOperand(
5840   MCInst& Inst, const OperandVector& Operands,
5841   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5842   AMDGPUOperand::ImmTy ImmT,
5843   int64_t Default = 0) {
5844   auto i = OptionalIdx.find(ImmT);
5845   if (i != OptionalIdx.end()) {
5846     unsigned Idx = i->second;
5847     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5848   } else {
5849     Inst.addOperand(MCOperand::createImm(Default));
5850   }
5851 }
5852 
5853 OperandMatchResultTy
5854 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5855                                        StringRef &Value,
5856                                        SMLoc &StringLoc) {
5857   if (!trySkipId(Prefix, AsmToken::Colon))
5858     return MatchOperand_NoMatch;
5859 
5860   StringLoc = getLoc();
5861   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5862                                                   : MatchOperand_ParseFail;
5863 }
5864 
5865 //===----------------------------------------------------------------------===//
5866 // MTBUF format
5867 //===----------------------------------------------------------------------===//
5868 
5869 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5870                                   int64_t MaxVal,
5871                                   int64_t &Fmt) {
5872   int64_t Val;
5873   SMLoc Loc = getLoc();
5874 
5875   auto Res = parseIntWithPrefix(Pref, Val);
5876   if (Res == MatchOperand_ParseFail)
5877     return false;
5878   if (Res == MatchOperand_NoMatch)
5879     return true;
5880 
5881   if (Val < 0 || Val > MaxVal) {
5882     Error(Loc, Twine("out of range ", StringRef(Pref)));
5883     return false;
5884   }
5885 
5886   Fmt = Val;
5887   return true;
5888 }
5889 
5890 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5891 // values to live in a joint format operand in the MCInst encoding.
5892 OperandMatchResultTy
5893 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5894   using namespace llvm::AMDGPU::MTBUFFormat;
5895 
5896   int64_t Dfmt = DFMT_UNDEF;
5897   int64_t Nfmt = NFMT_UNDEF;
5898 
5899   // dfmt and nfmt can appear in either order, and each is optional.
5900   for (int I = 0; I < 2; ++I) {
5901     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5902       return MatchOperand_ParseFail;
5903 
5904     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5905       return MatchOperand_ParseFail;
5906     }
5907     // Skip optional comma between dfmt/nfmt
5908     // but guard against 2 commas following each other.
5909     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5910         !peekToken().is(AsmToken::Comma)) {
5911       trySkipToken(AsmToken::Comma);
5912     }
5913   }
5914 
5915   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5916     return MatchOperand_NoMatch;
5917 
5918   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5919   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5920 
5921   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5922   return MatchOperand_Success;
5923 }
5924 
5925 OperandMatchResultTy
5926 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5927   using namespace llvm::AMDGPU::MTBUFFormat;
5928 
5929   int64_t Fmt = UFMT_UNDEF;
5930 
5931   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5932     return MatchOperand_ParseFail;
5933 
5934   if (Fmt == UFMT_UNDEF)
5935     return MatchOperand_NoMatch;
5936 
5937   Format = Fmt;
5938   return MatchOperand_Success;
5939 }
5940 
5941 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5942                                     int64_t &Nfmt,
5943                                     StringRef FormatStr,
5944                                     SMLoc Loc) {
5945   using namespace llvm::AMDGPU::MTBUFFormat;
5946   int64_t Format;
5947 
5948   Format = getDfmt(FormatStr);
5949   if (Format != DFMT_UNDEF) {
5950     Dfmt = Format;
5951     return true;
5952   }
5953 
5954   Format = getNfmt(FormatStr, getSTI());
5955   if (Format != NFMT_UNDEF) {
5956     Nfmt = Format;
5957     return true;
5958   }
5959 
5960   Error(Loc, "unsupported format");
5961   return false;
5962 }
5963 
5964 OperandMatchResultTy
5965 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5966                                           SMLoc FormatLoc,
5967                                           int64_t &Format) {
5968   using namespace llvm::AMDGPU::MTBUFFormat;
5969 
5970   int64_t Dfmt = DFMT_UNDEF;
5971   int64_t Nfmt = NFMT_UNDEF;
5972   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5973     return MatchOperand_ParseFail;
5974 
5975   if (trySkipToken(AsmToken::Comma)) {
5976     StringRef Str;
5977     SMLoc Loc = getLoc();
5978     if (!parseId(Str, "expected a format string") ||
5979         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5980       return MatchOperand_ParseFail;
5981     }
5982     if (Dfmt == DFMT_UNDEF) {
5983       Error(Loc, "duplicate numeric format");
5984       return MatchOperand_ParseFail;
5985     } else if (Nfmt == NFMT_UNDEF) {
5986       Error(Loc, "duplicate data format");
5987       return MatchOperand_ParseFail;
5988     }
5989   }
5990 
5991   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5992   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5993 
5994   if (isGFX10Plus()) {
5995     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5996     if (Ufmt == UFMT_UNDEF) {
5997       Error(FormatLoc, "unsupported format");
5998       return MatchOperand_ParseFail;
5999     }
6000     Format = Ufmt;
6001   } else {
6002     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6003   }
6004 
6005   return MatchOperand_Success;
6006 }
6007 
6008 OperandMatchResultTy
6009 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6010                                             SMLoc Loc,
6011                                             int64_t &Format) {
6012   using namespace llvm::AMDGPU::MTBUFFormat;
6013 
6014   auto Id = getUnifiedFormat(FormatStr);
6015   if (Id == UFMT_UNDEF)
6016     return MatchOperand_NoMatch;
6017 
6018   if (!isGFX10Plus()) {
6019     Error(Loc, "unified format is not supported on this GPU");
6020     return MatchOperand_ParseFail;
6021   }
6022 
6023   Format = Id;
6024   return MatchOperand_Success;
6025 }
6026 
6027 OperandMatchResultTy
6028 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6029   using namespace llvm::AMDGPU::MTBUFFormat;
6030   SMLoc Loc = getLoc();
6031 
6032   if (!parseExpr(Format))
6033     return MatchOperand_ParseFail;
6034   if (!isValidFormatEncoding(Format, getSTI())) {
6035     Error(Loc, "out of range format");
6036     return MatchOperand_ParseFail;
6037   }
6038 
6039   return MatchOperand_Success;
6040 }
6041 
6042 OperandMatchResultTy
6043 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6044   using namespace llvm::AMDGPU::MTBUFFormat;
6045 
6046   if (!trySkipId("format", AsmToken::Colon))
6047     return MatchOperand_NoMatch;
6048 
6049   if (trySkipToken(AsmToken::LBrac)) {
6050     StringRef FormatStr;
6051     SMLoc Loc = getLoc();
6052     if (!parseId(FormatStr, "expected a format string"))
6053       return MatchOperand_ParseFail;
6054 
6055     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6056     if (Res == MatchOperand_NoMatch)
6057       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6058     if (Res != MatchOperand_Success)
6059       return Res;
6060 
6061     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6062       return MatchOperand_ParseFail;
6063 
6064     return MatchOperand_Success;
6065   }
6066 
6067   return parseNumericFormat(Format);
6068 }
6069 
6070 OperandMatchResultTy
6071 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6072   using namespace llvm::AMDGPU::MTBUFFormat;
6073 
6074   int64_t Format = getDefaultFormatEncoding(getSTI());
6075   OperandMatchResultTy Res;
6076   SMLoc Loc = getLoc();
6077 
6078   // Parse legacy format syntax.
6079   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6080   if (Res == MatchOperand_ParseFail)
6081     return Res;
6082 
6083   bool FormatFound = (Res == MatchOperand_Success);
6084 
6085   Operands.push_back(
6086     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6087 
6088   if (FormatFound)
6089     trySkipToken(AsmToken::Comma);
6090 
6091   if (isToken(AsmToken::EndOfStatement)) {
6092     // We are expecting an soffset operand,
6093     // but let matcher handle the error.
6094     return MatchOperand_Success;
6095   }
6096 
6097   // Parse soffset.
6098   Res = parseRegOrImm(Operands);
6099   if (Res != MatchOperand_Success)
6100     return Res;
6101 
6102   trySkipToken(AsmToken::Comma);
6103 
6104   if (!FormatFound) {
6105     Res = parseSymbolicOrNumericFormat(Format);
6106     if (Res == MatchOperand_ParseFail)
6107       return Res;
6108     if (Res == MatchOperand_Success) {
6109       auto Size = Operands.size();
6110       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6111       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6112       Op.setImm(Format);
6113     }
6114     return MatchOperand_Success;
6115   }
6116 
6117   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6118     Error(getLoc(), "duplicate format");
6119     return MatchOperand_ParseFail;
6120   }
6121   return MatchOperand_Success;
6122 }
6123 
6124 //===----------------------------------------------------------------------===//
6125 // ds
6126 //===----------------------------------------------------------------------===//
6127 
6128 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6129                                     const OperandVector &Operands) {
6130   OptionalImmIndexMap OptionalIdx;
6131 
6132   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6133     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6134 
6135     // Add the register arguments
6136     if (Op.isReg()) {
6137       Op.addRegOperands(Inst, 1);
6138       continue;
6139     }
6140 
6141     // Handle optional arguments
6142     OptionalIdx[Op.getImmTy()] = i;
6143   }
6144 
6145   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6146   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6147   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6148 
6149   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6150 }
6151 
6152 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6153                                 bool IsGdsHardcoded) {
6154   OptionalImmIndexMap OptionalIdx;
6155 
6156   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6157     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6158 
6159     // Add the register arguments
6160     if (Op.isReg()) {
6161       Op.addRegOperands(Inst, 1);
6162       continue;
6163     }
6164 
6165     if (Op.isToken() && Op.getToken() == "gds") {
6166       IsGdsHardcoded = true;
6167       continue;
6168     }
6169 
6170     // Handle optional arguments
6171     OptionalIdx[Op.getImmTy()] = i;
6172   }
6173 
6174   AMDGPUOperand::ImmTy OffsetType =
6175     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6176      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6177      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6178                                                       AMDGPUOperand::ImmTyOffset;
6179 
6180   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6181 
6182   if (!IsGdsHardcoded) {
6183     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6184   }
6185   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6186 }
6187 
6188 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6189   OptionalImmIndexMap OptionalIdx;
6190 
6191   unsigned OperandIdx[4];
6192   unsigned EnMask = 0;
6193   int SrcIdx = 0;
6194 
6195   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6196     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6197 
6198     // Add the register arguments
6199     if (Op.isReg()) {
6200       assert(SrcIdx < 4);
6201       OperandIdx[SrcIdx] = Inst.size();
6202       Op.addRegOperands(Inst, 1);
6203       ++SrcIdx;
6204       continue;
6205     }
6206 
6207     if (Op.isOff()) {
6208       assert(SrcIdx < 4);
6209       OperandIdx[SrcIdx] = Inst.size();
6210       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6211       ++SrcIdx;
6212       continue;
6213     }
6214 
6215     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6216       Op.addImmOperands(Inst, 1);
6217       continue;
6218     }
6219 
6220     if (Op.isToken() && Op.getToken() == "done")
6221       continue;
6222 
6223     // Handle optional arguments
6224     OptionalIdx[Op.getImmTy()] = i;
6225   }
6226 
6227   assert(SrcIdx == 4);
6228 
6229   bool Compr = false;
6230   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6231     Compr = true;
6232     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6233     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6234     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6235   }
6236 
6237   for (auto i = 0; i < SrcIdx; ++i) {
6238     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6239       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6240     }
6241   }
6242 
6243   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6244   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6245 
6246   Inst.addOperand(MCOperand::createImm(EnMask));
6247 }
6248 
6249 //===----------------------------------------------------------------------===//
6250 // s_waitcnt
6251 //===----------------------------------------------------------------------===//
6252 
6253 static bool
6254 encodeCnt(
6255   const AMDGPU::IsaVersion ISA,
6256   int64_t &IntVal,
6257   int64_t CntVal,
6258   bool Saturate,
6259   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6260   unsigned (*decode)(const IsaVersion &Version, unsigned))
6261 {
6262   bool Failed = false;
6263 
6264   IntVal = encode(ISA, IntVal, CntVal);
6265   if (CntVal != decode(ISA, IntVal)) {
6266     if (Saturate) {
6267       IntVal = encode(ISA, IntVal, -1);
6268     } else {
6269       Failed = true;
6270     }
6271   }
6272   return Failed;
6273 }
6274 
6275 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6276 
6277   SMLoc CntLoc = getLoc();
6278   StringRef CntName = getTokenStr();
6279 
6280   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6281       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6282     return false;
6283 
6284   int64_t CntVal;
6285   SMLoc ValLoc = getLoc();
6286   if (!parseExpr(CntVal))
6287     return false;
6288 
6289   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6290 
6291   bool Failed = true;
6292   bool Sat = CntName.endswith("_sat");
6293 
6294   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6295     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6296   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6297     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6298   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6299     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6300   } else {
6301     Error(CntLoc, "invalid counter name " + CntName);
6302     return false;
6303   }
6304 
6305   if (Failed) {
6306     Error(ValLoc, "too large value for " + CntName);
6307     return false;
6308   }
6309 
6310   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6311     return false;
6312 
6313   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6314     if (isToken(AsmToken::EndOfStatement)) {
6315       Error(getLoc(), "expected a counter name");
6316       return false;
6317     }
6318   }
6319 
6320   return true;
6321 }
6322 
6323 OperandMatchResultTy
6324 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6325   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6326   int64_t Waitcnt = getWaitcntBitMask(ISA);
6327   SMLoc S = getLoc();
6328 
6329   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6330     while (!isToken(AsmToken::EndOfStatement)) {
6331       if (!parseCnt(Waitcnt))
6332         return MatchOperand_ParseFail;
6333     }
6334   } else {
6335     if (!parseExpr(Waitcnt))
6336       return MatchOperand_ParseFail;
6337   }
6338 
6339   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6340   return MatchOperand_Success;
6341 }
6342 
6343 bool
6344 AMDGPUOperand::isSWaitCnt() const {
6345   return isImm();
6346 }
6347 
6348 //===----------------------------------------------------------------------===//
6349 // DepCtr
6350 //===----------------------------------------------------------------------===//
6351 
6352 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6353                                   StringRef DepCtrName) {
6354   switch (ErrorId) {
6355   case OPR_ID_UNKNOWN:
6356     Error(Loc, Twine("invalid counter name ", DepCtrName));
6357     return;
6358   case OPR_ID_UNSUPPORTED:
6359     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6360     return;
6361   case OPR_ID_DUPLICATE:
6362     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6363     return;
6364   case OPR_VAL_INVALID:
6365     Error(Loc, Twine("invalid value for ", DepCtrName));
6366     return;
6367   default:
6368     assert(false);
6369   }
6370 }
6371 
6372 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6373 
6374   using namespace llvm::AMDGPU::DepCtr;
6375 
6376   SMLoc DepCtrLoc = getLoc();
6377   StringRef DepCtrName = getTokenStr();
6378 
6379   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6380       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6381     return false;
6382 
6383   int64_t ExprVal;
6384   if (!parseExpr(ExprVal))
6385     return false;
6386 
6387   unsigned PrevOprMask = UsedOprMask;
6388   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6389 
6390   if (CntVal < 0) {
6391     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6392     return false;
6393   }
6394 
6395   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6396     return false;
6397 
6398   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6399     if (isToken(AsmToken::EndOfStatement)) {
6400       Error(getLoc(), "expected a counter name");
6401       return false;
6402     }
6403   }
6404 
6405   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6406   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6407   return true;
6408 }
6409 
6410 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6411   using namespace llvm::AMDGPU::DepCtr;
6412 
6413   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6414   SMLoc Loc = getLoc();
6415 
6416   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6417     unsigned UsedOprMask = 0;
6418     while (!isToken(AsmToken::EndOfStatement)) {
6419       if (!parseDepCtr(DepCtr, UsedOprMask))
6420         return MatchOperand_ParseFail;
6421     }
6422   } else {
6423     if (!parseExpr(DepCtr))
6424       return MatchOperand_ParseFail;
6425   }
6426 
6427   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6428   return MatchOperand_Success;
6429 }
6430 
6431 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6432 
6433 //===----------------------------------------------------------------------===//
6434 // hwreg
6435 //===----------------------------------------------------------------------===//
6436 
6437 bool
6438 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6439                                 OperandInfoTy &Offset,
6440                                 OperandInfoTy &Width) {
6441   using namespace llvm::AMDGPU::Hwreg;
6442 
6443   // The register may be specified by name or using a numeric code
6444   HwReg.Loc = getLoc();
6445   if (isToken(AsmToken::Identifier) &&
6446       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6447     HwReg.IsSymbolic = true;
6448     lex(); // skip register name
6449   } else if (!parseExpr(HwReg.Id, "a register name")) {
6450     return false;
6451   }
6452 
6453   if (trySkipToken(AsmToken::RParen))
6454     return true;
6455 
6456   // parse optional params
6457   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6458     return false;
6459 
6460   Offset.Loc = getLoc();
6461   if (!parseExpr(Offset.Id))
6462     return false;
6463 
6464   if (!skipToken(AsmToken::Comma, "expected a comma"))
6465     return false;
6466 
6467   Width.Loc = getLoc();
6468   return parseExpr(Width.Id) &&
6469          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6470 }
6471 
6472 bool
6473 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6474                                const OperandInfoTy &Offset,
6475                                const OperandInfoTy &Width) {
6476 
6477   using namespace llvm::AMDGPU::Hwreg;
6478 
6479   if (HwReg.IsSymbolic) {
6480     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6481       Error(HwReg.Loc,
6482             "specified hardware register is not supported on this GPU");
6483       return false;
6484     }
6485   } else {
6486     if (!isValidHwreg(HwReg.Id)) {
6487       Error(HwReg.Loc,
6488             "invalid code of hardware register: only 6-bit values are legal");
6489       return false;
6490     }
6491   }
6492   if (!isValidHwregOffset(Offset.Id)) {
6493     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6494     return false;
6495   }
6496   if (!isValidHwregWidth(Width.Id)) {
6497     Error(Width.Loc,
6498           "invalid bitfield width: only values from 1 to 32 are legal");
6499     return false;
6500   }
6501   return true;
6502 }
6503 
6504 OperandMatchResultTy
6505 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6506   using namespace llvm::AMDGPU::Hwreg;
6507 
6508   int64_t ImmVal = 0;
6509   SMLoc Loc = getLoc();
6510 
6511   if (trySkipId("hwreg", AsmToken::LParen)) {
6512     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6513     OperandInfoTy Offset(OFFSET_DEFAULT_);
6514     OperandInfoTy Width(WIDTH_DEFAULT_);
6515     if (parseHwregBody(HwReg, Offset, Width) &&
6516         validateHwreg(HwReg, Offset, Width)) {
6517       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6518     } else {
6519       return MatchOperand_ParseFail;
6520     }
6521   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6522     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6523       Error(Loc, "invalid immediate: only 16-bit values are legal");
6524       return MatchOperand_ParseFail;
6525     }
6526   } else {
6527     return MatchOperand_ParseFail;
6528   }
6529 
6530   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6531   return MatchOperand_Success;
6532 }
6533 
6534 bool AMDGPUOperand::isHwreg() const {
6535   return isImmTy(ImmTyHwreg);
6536 }
6537 
6538 //===----------------------------------------------------------------------===//
6539 // sendmsg
6540 //===----------------------------------------------------------------------===//
6541 
6542 bool
6543 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6544                                   OperandInfoTy &Op,
6545                                   OperandInfoTy &Stream) {
6546   using namespace llvm::AMDGPU::SendMsg;
6547 
6548   Msg.Loc = getLoc();
6549   if (isToken(AsmToken::Identifier) &&
6550       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6551     Msg.IsSymbolic = true;
6552     lex(); // skip message name
6553   } else if (!parseExpr(Msg.Id, "a message name")) {
6554     return false;
6555   }
6556 
6557   if (trySkipToken(AsmToken::Comma)) {
6558     Op.IsDefined = true;
6559     Op.Loc = getLoc();
6560     if (isToken(AsmToken::Identifier) &&
6561         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6562       lex(); // skip operation name
6563     } else if (!parseExpr(Op.Id, "an operation name")) {
6564       return false;
6565     }
6566 
6567     if (trySkipToken(AsmToken::Comma)) {
6568       Stream.IsDefined = true;
6569       Stream.Loc = getLoc();
6570       if (!parseExpr(Stream.Id))
6571         return false;
6572     }
6573   }
6574 
6575   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6576 }
6577 
6578 bool
6579 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6580                                  const OperandInfoTy &Op,
6581                                  const OperandInfoTy &Stream) {
6582   using namespace llvm::AMDGPU::SendMsg;
6583 
6584   // Validation strictness depends on whether message is specified
6585   // in a symbolic or in a numeric form. In the latter case
6586   // only encoding possibility is checked.
6587   bool Strict = Msg.IsSymbolic;
6588 
6589   if (Strict) {
6590     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6591       Error(Msg.Loc, "specified message id is not supported on this GPU");
6592       return false;
6593     }
6594   } else {
6595     if (!isValidMsgId(Msg.Id)) {
6596       Error(Msg.Loc, "invalid message id");
6597       return false;
6598     }
6599   }
6600   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6601     if (Op.IsDefined) {
6602       Error(Op.Loc, "message does not support operations");
6603     } else {
6604       Error(Msg.Loc, "missing message operation");
6605     }
6606     return false;
6607   }
6608   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6609     Error(Op.Loc, "invalid operation id");
6610     return false;
6611   }
6612   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6613     Error(Stream.Loc, "message operation does not support streams");
6614     return false;
6615   }
6616   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6617     Error(Stream.Loc, "invalid message stream id");
6618     return false;
6619   }
6620   return true;
6621 }
6622 
6623 OperandMatchResultTy
6624 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6625   using namespace llvm::AMDGPU::SendMsg;
6626 
6627   int64_t ImmVal = 0;
6628   SMLoc Loc = getLoc();
6629 
6630   if (trySkipId("sendmsg", AsmToken::LParen)) {
6631     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6632     OperandInfoTy Op(OP_NONE_);
6633     OperandInfoTy Stream(STREAM_ID_NONE_);
6634     if (parseSendMsgBody(Msg, Op, Stream) &&
6635         validateSendMsg(Msg, Op, Stream)) {
6636       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6637     } else {
6638       return MatchOperand_ParseFail;
6639     }
6640   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6641     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6642       Error(Loc, "invalid immediate: only 16-bit values are legal");
6643       return MatchOperand_ParseFail;
6644     }
6645   } else {
6646     return MatchOperand_ParseFail;
6647   }
6648 
6649   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6650   return MatchOperand_Success;
6651 }
6652 
6653 bool AMDGPUOperand::isSendMsg() const {
6654   return isImmTy(ImmTySendMsg);
6655 }
6656 
6657 //===----------------------------------------------------------------------===//
6658 // v_interp
6659 //===----------------------------------------------------------------------===//
6660 
6661 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6662   StringRef Str;
6663   SMLoc S = getLoc();
6664 
6665   if (!parseId(Str))
6666     return MatchOperand_NoMatch;
6667 
6668   int Slot = StringSwitch<int>(Str)
6669     .Case("p10", 0)
6670     .Case("p20", 1)
6671     .Case("p0", 2)
6672     .Default(-1);
6673 
6674   if (Slot == -1) {
6675     Error(S, "invalid interpolation slot");
6676     return MatchOperand_ParseFail;
6677   }
6678 
6679   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6680                                               AMDGPUOperand::ImmTyInterpSlot));
6681   return MatchOperand_Success;
6682 }
6683 
6684 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6685   StringRef Str;
6686   SMLoc S = getLoc();
6687 
6688   if (!parseId(Str))
6689     return MatchOperand_NoMatch;
6690 
6691   if (!Str.startswith("attr")) {
6692     Error(S, "invalid interpolation attribute");
6693     return MatchOperand_ParseFail;
6694   }
6695 
6696   StringRef Chan = Str.take_back(2);
6697   int AttrChan = StringSwitch<int>(Chan)
6698     .Case(".x", 0)
6699     .Case(".y", 1)
6700     .Case(".z", 2)
6701     .Case(".w", 3)
6702     .Default(-1);
6703   if (AttrChan == -1) {
6704     Error(S, "invalid or missing interpolation attribute channel");
6705     return MatchOperand_ParseFail;
6706   }
6707 
6708   Str = Str.drop_back(2).drop_front(4);
6709 
6710   uint8_t Attr;
6711   if (Str.getAsInteger(10, Attr)) {
6712     Error(S, "invalid or missing interpolation attribute number");
6713     return MatchOperand_ParseFail;
6714   }
6715 
6716   if (Attr > 63) {
6717     Error(S, "out of bounds interpolation attribute number");
6718     return MatchOperand_ParseFail;
6719   }
6720 
6721   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6722 
6723   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6724                                               AMDGPUOperand::ImmTyInterpAttr));
6725   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6726                                               AMDGPUOperand::ImmTyAttrChan));
6727   return MatchOperand_Success;
6728 }
6729 
6730 //===----------------------------------------------------------------------===//
6731 // exp
6732 //===----------------------------------------------------------------------===//
6733 
6734 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6735   using namespace llvm::AMDGPU::Exp;
6736 
6737   StringRef Str;
6738   SMLoc S = getLoc();
6739 
6740   if (!parseId(Str))
6741     return MatchOperand_NoMatch;
6742 
6743   unsigned Id = getTgtId(Str);
6744   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6745     Error(S, (Id == ET_INVALID) ?
6746                 "invalid exp target" :
6747                 "exp target is not supported on this GPU");
6748     return MatchOperand_ParseFail;
6749   }
6750 
6751   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6752                                               AMDGPUOperand::ImmTyExpTgt));
6753   return MatchOperand_Success;
6754 }
6755 
6756 //===----------------------------------------------------------------------===//
6757 // parser helpers
6758 //===----------------------------------------------------------------------===//
6759 
6760 bool
6761 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6762   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6763 }
6764 
6765 bool
6766 AMDGPUAsmParser::isId(const StringRef Id) const {
6767   return isId(getToken(), Id);
6768 }
6769 
6770 bool
6771 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6772   return getTokenKind() == Kind;
6773 }
6774 
6775 bool
6776 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6777   if (isId(Id)) {
6778     lex();
6779     return true;
6780   }
6781   return false;
6782 }
6783 
6784 bool
6785 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6786   if (isToken(AsmToken::Identifier)) {
6787     StringRef Tok = getTokenStr();
6788     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6789       lex();
6790       return true;
6791     }
6792   }
6793   return false;
6794 }
6795 
6796 bool
6797 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6798   if (isId(Id) && peekToken().is(Kind)) {
6799     lex();
6800     lex();
6801     return true;
6802   }
6803   return false;
6804 }
6805 
6806 bool
6807 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6808   if (isToken(Kind)) {
6809     lex();
6810     return true;
6811   }
6812   return false;
6813 }
6814 
6815 bool
6816 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6817                            const StringRef ErrMsg) {
6818   if (!trySkipToken(Kind)) {
6819     Error(getLoc(), ErrMsg);
6820     return false;
6821   }
6822   return true;
6823 }
6824 
6825 bool
6826 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6827   SMLoc S = getLoc();
6828 
6829   const MCExpr *Expr;
6830   if (Parser.parseExpression(Expr))
6831     return false;
6832 
6833   if (Expr->evaluateAsAbsolute(Imm))
6834     return true;
6835 
6836   if (Expected.empty()) {
6837     Error(S, "expected absolute expression");
6838   } else {
6839     Error(S, Twine("expected ", Expected) +
6840              Twine(" or an absolute expression"));
6841   }
6842   return false;
6843 }
6844 
6845 bool
6846 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6847   SMLoc S = getLoc();
6848 
6849   const MCExpr *Expr;
6850   if (Parser.parseExpression(Expr))
6851     return false;
6852 
6853   int64_t IntVal;
6854   if (Expr->evaluateAsAbsolute(IntVal)) {
6855     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6856   } else {
6857     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6858   }
6859   return true;
6860 }
6861 
6862 bool
6863 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6864   if (isToken(AsmToken::String)) {
6865     Val = getToken().getStringContents();
6866     lex();
6867     return true;
6868   } else {
6869     Error(getLoc(), ErrMsg);
6870     return false;
6871   }
6872 }
6873 
6874 bool
6875 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6876   if (isToken(AsmToken::Identifier)) {
6877     Val = getTokenStr();
6878     lex();
6879     return true;
6880   } else {
6881     if (!ErrMsg.empty())
6882       Error(getLoc(), ErrMsg);
6883     return false;
6884   }
6885 }
6886 
6887 AsmToken
6888 AMDGPUAsmParser::getToken() const {
6889   return Parser.getTok();
6890 }
6891 
6892 AsmToken
6893 AMDGPUAsmParser::peekToken() {
6894   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6895 }
6896 
6897 void
6898 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6899   auto TokCount = getLexer().peekTokens(Tokens);
6900 
6901   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6902     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6903 }
6904 
6905 AsmToken::TokenKind
6906 AMDGPUAsmParser::getTokenKind() const {
6907   return getLexer().getKind();
6908 }
6909 
6910 SMLoc
6911 AMDGPUAsmParser::getLoc() const {
6912   return getToken().getLoc();
6913 }
6914 
6915 StringRef
6916 AMDGPUAsmParser::getTokenStr() const {
6917   return getToken().getString();
6918 }
6919 
6920 void
6921 AMDGPUAsmParser::lex() {
6922   Parser.Lex();
6923 }
6924 
6925 SMLoc
6926 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6927                                const OperandVector &Operands) const {
6928   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6929     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6930     if (Test(Op))
6931       return Op.getStartLoc();
6932   }
6933   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6934 }
6935 
6936 SMLoc
6937 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6938                            const OperandVector &Operands) const {
6939   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6940   return getOperandLoc(Test, Operands);
6941 }
6942 
6943 SMLoc
6944 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6945                            const OperandVector &Operands) const {
6946   auto Test = [=](const AMDGPUOperand& Op) {
6947     return Op.isRegKind() && Op.getReg() == Reg;
6948   };
6949   return getOperandLoc(Test, Operands);
6950 }
6951 
6952 SMLoc
6953 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6954   auto Test = [](const AMDGPUOperand& Op) {
6955     return Op.IsImmKindLiteral() || Op.isExpr();
6956   };
6957   return getOperandLoc(Test, Operands);
6958 }
6959 
6960 SMLoc
6961 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6962   auto Test = [](const AMDGPUOperand& Op) {
6963     return Op.isImmKindConst();
6964   };
6965   return getOperandLoc(Test, Operands);
6966 }
6967 
6968 //===----------------------------------------------------------------------===//
6969 // swizzle
6970 //===----------------------------------------------------------------------===//
6971 
6972 LLVM_READNONE
6973 static unsigned
6974 encodeBitmaskPerm(const unsigned AndMask,
6975                   const unsigned OrMask,
6976                   const unsigned XorMask) {
6977   using namespace llvm::AMDGPU::Swizzle;
6978 
6979   return BITMASK_PERM_ENC |
6980          (AndMask << BITMASK_AND_SHIFT) |
6981          (OrMask  << BITMASK_OR_SHIFT)  |
6982          (XorMask << BITMASK_XOR_SHIFT);
6983 }
6984 
6985 bool
6986 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6987                                      const unsigned MinVal,
6988                                      const unsigned MaxVal,
6989                                      const StringRef ErrMsg,
6990                                      SMLoc &Loc) {
6991   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6992     return false;
6993   }
6994   Loc = getLoc();
6995   if (!parseExpr(Op)) {
6996     return false;
6997   }
6998   if (Op < MinVal || Op > MaxVal) {
6999     Error(Loc, ErrMsg);
7000     return false;
7001   }
7002 
7003   return true;
7004 }
7005 
7006 bool
7007 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7008                                       const unsigned MinVal,
7009                                       const unsigned MaxVal,
7010                                       const StringRef ErrMsg) {
7011   SMLoc Loc;
7012   for (unsigned i = 0; i < OpNum; ++i) {
7013     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7014       return false;
7015   }
7016 
7017   return true;
7018 }
7019 
7020 bool
7021 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7022   using namespace llvm::AMDGPU::Swizzle;
7023 
7024   int64_t Lane[LANE_NUM];
7025   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7026                            "expected a 2-bit lane id")) {
7027     Imm = QUAD_PERM_ENC;
7028     for (unsigned I = 0; I < LANE_NUM; ++I) {
7029       Imm |= Lane[I] << (LANE_SHIFT * I);
7030     }
7031     return true;
7032   }
7033   return false;
7034 }
7035 
7036 bool
7037 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7038   using namespace llvm::AMDGPU::Swizzle;
7039 
7040   SMLoc Loc;
7041   int64_t GroupSize;
7042   int64_t LaneIdx;
7043 
7044   if (!parseSwizzleOperand(GroupSize,
7045                            2, 32,
7046                            "group size must be in the interval [2,32]",
7047                            Loc)) {
7048     return false;
7049   }
7050   if (!isPowerOf2_64(GroupSize)) {
7051     Error(Loc, "group size must be a power of two");
7052     return false;
7053   }
7054   if (parseSwizzleOperand(LaneIdx,
7055                           0, GroupSize - 1,
7056                           "lane id must be in the interval [0,group size - 1]",
7057                           Loc)) {
7058     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7059     return true;
7060   }
7061   return false;
7062 }
7063 
7064 bool
7065 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7066   using namespace llvm::AMDGPU::Swizzle;
7067 
7068   SMLoc Loc;
7069   int64_t GroupSize;
7070 
7071   if (!parseSwizzleOperand(GroupSize,
7072                            2, 32,
7073                            "group size must be in the interval [2,32]",
7074                            Loc)) {
7075     return false;
7076   }
7077   if (!isPowerOf2_64(GroupSize)) {
7078     Error(Loc, "group size must be a power of two");
7079     return false;
7080   }
7081 
7082   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7083   return true;
7084 }
7085 
7086 bool
7087 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7088   using namespace llvm::AMDGPU::Swizzle;
7089 
7090   SMLoc Loc;
7091   int64_t GroupSize;
7092 
7093   if (!parseSwizzleOperand(GroupSize,
7094                            1, 16,
7095                            "group size must be in the interval [1,16]",
7096                            Loc)) {
7097     return false;
7098   }
7099   if (!isPowerOf2_64(GroupSize)) {
7100     Error(Loc, "group size must be a power of two");
7101     return false;
7102   }
7103 
7104   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7105   return true;
7106 }
7107 
7108 bool
7109 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7110   using namespace llvm::AMDGPU::Swizzle;
7111 
7112   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7113     return false;
7114   }
7115 
7116   StringRef Ctl;
7117   SMLoc StrLoc = getLoc();
7118   if (!parseString(Ctl)) {
7119     return false;
7120   }
7121   if (Ctl.size() != BITMASK_WIDTH) {
7122     Error(StrLoc, "expected a 5-character mask");
7123     return false;
7124   }
7125 
7126   unsigned AndMask = 0;
7127   unsigned OrMask = 0;
7128   unsigned XorMask = 0;
7129 
7130   for (size_t i = 0; i < Ctl.size(); ++i) {
7131     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7132     switch(Ctl[i]) {
7133     default:
7134       Error(StrLoc, "invalid mask");
7135       return false;
7136     case '0':
7137       break;
7138     case '1':
7139       OrMask |= Mask;
7140       break;
7141     case 'p':
7142       AndMask |= Mask;
7143       break;
7144     case 'i':
7145       AndMask |= Mask;
7146       XorMask |= Mask;
7147       break;
7148     }
7149   }
7150 
7151   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7152   return true;
7153 }
7154 
7155 bool
7156 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7157 
7158   SMLoc OffsetLoc = getLoc();
7159 
7160   if (!parseExpr(Imm, "a swizzle macro")) {
7161     return false;
7162   }
7163   if (!isUInt<16>(Imm)) {
7164     Error(OffsetLoc, "expected a 16-bit offset");
7165     return false;
7166   }
7167   return true;
7168 }
7169 
7170 bool
7171 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7172   using namespace llvm::AMDGPU::Swizzle;
7173 
7174   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7175 
7176     SMLoc ModeLoc = getLoc();
7177     bool Ok = false;
7178 
7179     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7180       Ok = parseSwizzleQuadPerm(Imm);
7181     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7182       Ok = parseSwizzleBitmaskPerm(Imm);
7183     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7184       Ok = parseSwizzleBroadcast(Imm);
7185     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7186       Ok = parseSwizzleSwap(Imm);
7187     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7188       Ok = parseSwizzleReverse(Imm);
7189     } else {
7190       Error(ModeLoc, "expected a swizzle mode");
7191     }
7192 
7193     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7194   }
7195 
7196   return false;
7197 }
7198 
7199 OperandMatchResultTy
7200 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7201   SMLoc S = getLoc();
7202   int64_t Imm = 0;
7203 
7204   if (trySkipId("offset")) {
7205 
7206     bool Ok = false;
7207     if (skipToken(AsmToken::Colon, "expected a colon")) {
7208       if (trySkipId("swizzle")) {
7209         Ok = parseSwizzleMacro(Imm);
7210       } else {
7211         Ok = parseSwizzleOffset(Imm);
7212       }
7213     }
7214 
7215     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7216 
7217     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7218   } else {
7219     // Swizzle "offset" operand is optional.
7220     // If it is omitted, try parsing other optional operands.
7221     return parseOptionalOpr(Operands);
7222   }
7223 }
7224 
7225 bool
7226 AMDGPUOperand::isSwizzle() const {
7227   return isImmTy(ImmTySwizzle);
7228 }
7229 
7230 //===----------------------------------------------------------------------===//
7231 // VGPR Index Mode
7232 //===----------------------------------------------------------------------===//
7233 
7234 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7235 
7236   using namespace llvm::AMDGPU::VGPRIndexMode;
7237 
7238   if (trySkipToken(AsmToken::RParen)) {
7239     return OFF;
7240   }
7241 
7242   int64_t Imm = 0;
7243 
7244   while (true) {
7245     unsigned Mode = 0;
7246     SMLoc S = getLoc();
7247 
7248     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7249       if (trySkipId(IdSymbolic[ModeId])) {
7250         Mode = 1 << ModeId;
7251         break;
7252       }
7253     }
7254 
7255     if (Mode == 0) {
7256       Error(S, (Imm == 0)?
7257                "expected a VGPR index mode or a closing parenthesis" :
7258                "expected a VGPR index mode");
7259       return UNDEF;
7260     }
7261 
7262     if (Imm & Mode) {
7263       Error(S, "duplicate VGPR index mode");
7264       return UNDEF;
7265     }
7266     Imm |= Mode;
7267 
7268     if (trySkipToken(AsmToken::RParen))
7269       break;
7270     if (!skipToken(AsmToken::Comma,
7271                    "expected a comma or a closing parenthesis"))
7272       return UNDEF;
7273   }
7274 
7275   return Imm;
7276 }
7277 
7278 OperandMatchResultTy
7279 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7280 
7281   using namespace llvm::AMDGPU::VGPRIndexMode;
7282 
7283   int64_t Imm = 0;
7284   SMLoc S = getLoc();
7285 
7286   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7287     Imm = parseGPRIdxMacro();
7288     if (Imm == UNDEF)
7289       return MatchOperand_ParseFail;
7290   } else {
7291     if (getParser().parseAbsoluteExpression(Imm))
7292       return MatchOperand_ParseFail;
7293     if (Imm < 0 || !isUInt<4>(Imm)) {
7294       Error(S, "invalid immediate: only 4-bit values are legal");
7295       return MatchOperand_ParseFail;
7296     }
7297   }
7298 
7299   Operands.push_back(
7300       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7301   return MatchOperand_Success;
7302 }
7303 
7304 bool AMDGPUOperand::isGPRIdxMode() const {
7305   return isImmTy(ImmTyGprIdxMode);
7306 }
7307 
7308 //===----------------------------------------------------------------------===//
7309 // sopp branch targets
7310 //===----------------------------------------------------------------------===//
7311 
7312 OperandMatchResultTy
7313 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7314 
7315   // Make sure we are not parsing something
7316   // that looks like a label or an expression but is not.
7317   // This will improve error messages.
7318   if (isRegister() || isModifier())
7319     return MatchOperand_NoMatch;
7320 
7321   if (!parseExpr(Operands))
7322     return MatchOperand_ParseFail;
7323 
7324   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7325   assert(Opr.isImm() || Opr.isExpr());
7326   SMLoc Loc = Opr.getStartLoc();
7327 
7328   // Currently we do not support arbitrary expressions as branch targets.
7329   // Only labels and absolute expressions are accepted.
7330   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7331     Error(Loc, "expected an absolute expression or a label");
7332   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7333     Error(Loc, "expected a 16-bit signed jump offset");
7334   }
7335 
7336   return MatchOperand_Success;
7337 }
7338 
7339 //===----------------------------------------------------------------------===//
7340 // Boolean holding registers
7341 //===----------------------------------------------------------------------===//
7342 
7343 OperandMatchResultTy
7344 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7345   return parseReg(Operands);
7346 }
7347 
7348 //===----------------------------------------------------------------------===//
7349 // mubuf
7350 //===----------------------------------------------------------------------===//
7351 
7352 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7353   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7354 }
7355 
7356 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7357                                    const OperandVector &Operands,
7358                                    bool IsAtomic,
7359                                    bool IsLds) {
7360   bool IsLdsOpcode = IsLds;
7361   bool HasLdsModifier = false;
7362   OptionalImmIndexMap OptionalIdx;
7363   unsigned FirstOperandIdx = 1;
7364   bool IsAtomicReturn = false;
7365 
7366   if (IsAtomic) {
7367     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7368       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7369       if (!Op.isCPol())
7370         continue;
7371       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7372       break;
7373     }
7374 
7375     if (!IsAtomicReturn) {
7376       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7377       if (NewOpc != -1)
7378         Inst.setOpcode(NewOpc);
7379     }
7380 
7381     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7382                       SIInstrFlags::IsAtomicRet;
7383   }
7384 
7385   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7386     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7387 
7388     // Add the register arguments
7389     if (Op.isReg()) {
7390       Op.addRegOperands(Inst, 1);
7391       // Insert a tied src for atomic return dst.
7392       // This cannot be postponed as subsequent calls to
7393       // addImmOperands rely on correct number of MC operands.
7394       if (IsAtomicReturn && i == FirstOperandIdx)
7395         Op.addRegOperands(Inst, 1);
7396       continue;
7397     }
7398 
7399     // Handle the case where soffset is an immediate
7400     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7401       Op.addImmOperands(Inst, 1);
7402       continue;
7403     }
7404 
7405     HasLdsModifier |= Op.isLDS();
7406 
7407     // Handle tokens like 'offen' which are sometimes hard-coded into the
7408     // asm string.  There are no MCInst operands for these.
7409     if (Op.isToken()) {
7410       continue;
7411     }
7412     assert(Op.isImm());
7413 
7414     // Handle optional arguments
7415     OptionalIdx[Op.getImmTy()] = i;
7416   }
7417 
7418   // This is a workaround for an llvm quirk which may result in an
7419   // incorrect instruction selection. Lds and non-lds versions of
7420   // MUBUF instructions are identical except that lds versions
7421   // have mandatory 'lds' modifier. However this modifier follows
7422   // optional modifiers and llvm asm matcher regards this 'lds'
7423   // modifier as an optional one. As a result, an lds version
7424   // of opcode may be selected even if it has no 'lds' modifier.
7425   if (IsLdsOpcode && !HasLdsModifier) {
7426     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7427     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7428       Inst.setOpcode(NoLdsOpcode);
7429       IsLdsOpcode = false;
7430     }
7431   }
7432 
7433   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7434   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7435 
7436   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7437     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7438   }
7439   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7440 }
7441 
7442 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7443   OptionalImmIndexMap OptionalIdx;
7444 
7445   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7446     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7447 
7448     // Add the register arguments
7449     if (Op.isReg()) {
7450       Op.addRegOperands(Inst, 1);
7451       continue;
7452     }
7453 
7454     // Handle the case where soffset is an immediate
7455     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7456       Op.addImmOperands(Inst, 1);
7457       continue;
7458     }
7459 
7460     // Handle tokens like 'offen' which are sometimes hard-coded into the
7461     // asm string.  There are no MCInst operands for these.
7462     if (Op.isToken()) {
7463       continue;
7464     }
7465     assert(Op.isImm());
7466 
7467     // Handle optional arguments
7468     OptionalIdx[Op.getImmTy()] = i;
7469   }
7470 
7471   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7472                         AMDGPUOperand::ImmTyOffset);
7473   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7474   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7475   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7476   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7477 }
7478 
7479 //===----------------------------------------------------------------------===//
7480 // mimg
7481 //===----------------------------------------------------------------------===//
7482 
7483 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7484                               bool IsAtomic) {
7485   unsigned I = 1;
7486   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7487   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7488     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7489   }
7490 
7491   if (IsAtomic) {
7492     // Add src, same as dst
7493     assert(Desc.getNumDefs() == 1);
7494     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7495   }
7496 
7497   OptionalImmIndexMap OptionalIdx;
7498 
7499   for (unsigned E = Operands.size(); I != E; ++I) {
7500     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7501 
7502     // Add the register arguments
7503     if (Op.isReg()) {
7504       Op.addRegOperands(Inst, 1);
7505     } else if (Op.isImmModifier()) {
7506       OptionalIdx[Op.getImmTy()] = I;
7507     } else if (!Op.isToken()) {
7508       llvm_unreachable("unexpected operand type");
7509     }
7510   }
7511 
7512   bool IsGFX10Plus = isGFX10Plus();
7513 
7514   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7515   if (IsGFX10Plus)
7516     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7517   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7518   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7519   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7520   if (IsGFX10Plus)
7521     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7522   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7523     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7524   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7525   if (!IsGFX10Plus)
7526     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7527   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7528 }
7529 
7530 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7531   cvtMIMG(Inst, Operands, true);
7532 }
7533 
7534 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7535   OptionalImmIndexMap OptionalIdx;
7536   bool IsAtomicReturn = false;
7537 
7538   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7539     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7540     if (!Op.isCPol())
7541       continue;
7542     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7543     break;
7544   }
7545 
7546   if (!IsAtomicReturn) {
7547     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7548     if (NewOpc != -1)
7549       Inst.setOpcode(NewOpc);
7550   }
7551 
7552   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7553                     SIInstrFlags::IsAtomicRet;
7554 
7555   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7556     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7557 
7558     // Add the register arguments
7559     if (Op.isReg()) {
7560       Op.addRegOperands(Inst, 1);
7561       if (IsAtomicReturn && i == 1)
7562         Op.addRegOperands(Inst, 1);
7563       continue;
7564     }
7565 
7566     // Handle the case where soffset is an immediate
7567     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7568       Op.addImmOperands(Inst, 1);
7569       continue;
7570     }
7571 
7572     // Handle tokens like 'offen' which are sometimes hard-coded into the
7573     // asm string.  There are no MCInst operands for these.
7574     if (Op.isToken()) {
7575       continue;
7576     }
7577     assert(Op.isImm());
7578 
7579     // Handle optional arguments
7580     OptionalIdx[Op.getImmTy()] = i;
7581   }
7582 
7583   if ((int)Inst.getNumOperands() <=
7584       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7585     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7586   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7587 }
7588 
7589 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7590                                       const OperandVector &Operands) {
7591   for (unsigned I = 1; I < Operands.size(); ++I) {
7592     auto &Operand = (AMDGPUOperand &)*Operands[I];
7593     if (Operand.isReg())
7594       Operand.addRegOperands(Inst, 1);
7595   }
7596 
7597   Inst.addOperand(MCOperand::createImm(1)); // a16
7598 }
7599 
7600 //===----------------------------------------------------------------------===//
7601 // smrd
7602 //===----------------------------------------------------------------------===//
7603 
7604 bool AMDGPUOperand::isSMRDOffset8() const {
7605   return isImm() && isUInt<8>(getImm());
7606 }
7607 
7608 bool AMDGPUOperand::isSMEMOffset() const {
7609   return isImm(); // Offset range is checked later by validator.
7610 }
7611 
7612 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7613   // 32-bit literals are only supported on CI and we only want to use them
7614   // when the offset is > 8-bits.
7615   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7616 }
7617 
7618 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7619   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7620 }
7621 
7622 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7623   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7624 }
7625 
7626 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7627   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7628 }
7629 
7630 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7631   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7632 }
7633 
7634 //===----------------------------------------------------------------------===//
7635 // vop3
7636 //===----------------------------------------------------------------------===//
7637 
7638 static bool ConvertOmodMul(int64_t &Mul) {
7639   if (Mul != 1 && Mul != 2 && Mul != 4)
7640     return false;
7641 
7642   Mul >>= 1;
7643   return true;
7644 }
7645 
7646 static bool ConvertOmodDiv(int64_t &Div) {
7647   if (Div == 1) {
7648     Div = 0;
7649     return true;
7650   }
7651 
7652   if (Div == 2) {
7653     Div = 3;
7654     return true;
7655   }
7656 
7657   return false;
7658 }
7659 
7660 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7661 // This is intentional and ensures compatibility with sp3.
7662 // See bug 35397 for details.
7663 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7664   if (BoundCtrl == 0 || BoundCtrl == 1) {
7665     BoundCtrl = 1;
7666     return true;
7667   }
7668   return false;
7669 }
7670 
7671 // Note: the order in this table matches the order of operands in AsmString.
7672 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7673   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7674   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7675   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7676   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7677   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7678   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7679   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7680   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7681   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7682   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7683   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7684   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7685   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7686   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7687   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7688   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7689   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7690   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7691   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7692   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7693   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7694   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7695   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7696   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7697   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7698   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7699   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7700   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7701   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7702   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7703   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7704   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7705   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7706   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7707   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7708   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7709   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7710   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7711   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7712   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7713   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7714 };
7715 
7716 void AMDGPUAsmParser::onBeginOfFile() {
7717   if (!getParser().getStreamer().getTargetStreamer() ||
7718       getSTI().getTargetTriple().getArch() == Triple::r600)
7719     return;
7720 
7721   if (!getTargetStreamer().getTargetID())
7722     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7723 
7724   if (isHsaAbiVersion3AndAbove(&getSTI()))
7725     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7726 }
7727 
7728 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7729 
7730   OperandMatchResultTy res = parseOptionalOpr(Operands);
7731 
7732   // This is a hack to enable hardcoded mandatory operands which follow
7733   // optional operands.
7734   //
7735   // Current design assumes that all operands after the first optional operand
7736   // are also optional. However implementation of some instructions violates
7737   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7738   //
7739   // To alleviate this problem, we have to (implicitly) parse extra operands
7740   // to make sure autogenerated parser of custom operands never hit hardcoded
7741   // mandatory operands.
7742 
7743   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7744     if (res != MatchOperand_Success ||
7745         isToken(AsmToken::EndOfStatement))
7746       break;
7747 
7748     trySkipToken(AsmToken::Comma);
7749     res = parseOptionalOpr(Operands);
7750   }
7751 
7752   return res;
7753 }
7754 
7755 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7756   OperandMatchResultTy res;
7757   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7758     // try to parse any optional operand here
7759     if (Op.IsBit) {
7760       res = parseNamedBit(Op.Name, Operands, Op.Type);
7761     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7762       res = parseOModOperand(Operands);
7763     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7764                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7765                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7766       res = parseSDWASel(Operands, Op.Name, Op.Type);
7767     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7768       res = parseSDWADstUnused(Operands);
7769     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7770                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7771                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7772                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7773       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7774                                         Op.ConvertResult);
7775     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7776       res = parseDim(Operands);
7777     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7778       res = parseCPol(Operands);
7779     } else {
7780       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7781       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
7782         res = parseOperandArrayWithPrefix("neg", Operands,
7783                                           AMDGPUOperand::ImmTyBLGP,
7784                                           nullptr);
7785       }
7786     }
7787     if (res != MatchOperand_NoMatch) {
7788       return res;
7789     }
7790   }
7791   return MatchOperand_NoMatch;
7792 }
7793 
7794 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7795   StringRef Name = getTokenStr();
7796   if (Name == "mul") {
7797     return parseIntWithPrefix("mul", Operands,
7798                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7799   }
7800 
7801   if (Name == "div") {
7802     return parseIntWithPrefix("div", Operands,
7803                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7804   }
7805 
7806   return MatchOperand_NoMatch;
7807 }
7808 
7809 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7810   cvtVOP3P(Inst, Operands);
7811 
7812   int Opc = Inst.getOpcode();
7813 
7814   int SrcNum;
7815   const int Ops[] = { AMDGPU::OpName::src0,
7816                       AMDGPU::OpName::src1,
7817                       AMDGPU::OpName::src2 };
7818   for (SrcNum = 0;
7819        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7820        ++SrcNum);
7821   assert(SrcNum > 0);
7822 
7823   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7824   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7825 
7826   if ((OpSel & (1 << SrcNum)) != 0) {
7827     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7828     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7829     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7830   }
7831 }
7832 
7833 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7834       // 1. This operand is input modifiers
7835   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7836       // 2. This is not last operand
7837       && Desc.NumOperands > (OpNum + 1)
7838       // 3. Next operand is register class
7839       && Desc.OpInfo[OpNum + 1].RegClass != -1
7840       // 4. Next register is not tied to any other operand
7841       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7842 }
7843 
7844 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7845 {
7846   OptionalImmIndexMap OptionalIdx;
7847   unsigned Opc = Inst.getOpcode();
7848 
7849   unsigned I = 1;
7850   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7851   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7852     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7853   }
7854 
7855   for (unsigned E = Operands.size(); I != E; ++I) {
7856     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7857     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7858       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7859     } else if (Op.isInterpSlot() ||
7860                Op.isInterpAttr() ||
7861                Op.isAttrChan()) {
7862       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7863     } else if (Op.isImmModifier()) {
7864       OptionalIdx[Op.getImmTy()] = I;
7865     } else {
7866       llvm_unreachable("unhandled operand type");
7867     }
7868   }
7869 
7870   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7871     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7872   }
7873 
7874   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7875     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7876   }
7877 
7878   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7879     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7880   }
7881 }
7882 
7883 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7884                               OptionalImmIndexMap &OptionalIdx) {
7885   unsigned Opc = Inst.getOpcode();
7886 
7887   unsigned I = 1;
7888   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7889   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7890     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7891   }
7892 
7893   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7894     // This instruction has src modifiers
7895     for (unsigned E = Operands.size(); I != E; ++I) {
7896       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7897       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7898         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7899       } else if (Op.isImmModifier()) {
7900         OptionalIdx[Op.getImmTy()] = I;
7901       } else if (Op.isRegOrImm()) {
7902         Op.addRegOrImmOperands(Inst, 1);
7903       } else {
7904         llvm_unreachable("unhandled operand type");
7905       }
7906     }
7907   } else {
7908     // No src modifiers
7909     for (unsigned E = Operands.size(); I != E; ++I) {
7910       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7911       if (Op.isMod()) {
7912         OptionalIdx[Op.getImmTy()] = I;
7913       } else {
7914         Op.addRegOrImmOperands(Inst, 1);
7915       }
7916     }
7917   }
7918 
7919   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7920     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7921   }
7922 
7923   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7924     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7925   }
7926 
7927   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7928   // it has src2 register operand that is tied to dst operand
7929   // we don't allow modifiers for this operand in assembler so src2_modifiers
7930   // should be 0.
7931   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7932       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7933       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7934       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7935       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7936       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7937       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7938       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7939       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7940       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7941       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7942     auto it = Inst.begin();
7943     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7944     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7945     ++it;
7946     // Copy the operand to ensure it's not invalidated when Inst grows.
7947     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7948   }
7949 }
7950 
7951 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7952   OptionalImmIndexMap OptionalIdx;
7953   cvtVOP3(Inst, Operands, OptionalIdx);
7954 }
7955 
7956 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7957                                OptionalImmIndexMap &OptIdx) {
7958   const int Opc = Inst.getOpcode();
7959   const MCInstrDesc &Desc = MII.get(Opc);
7960 
7961   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7962 
7963   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7964     assert(!IsPacked);
7965     Inst.addOperand(Inst.getOperand(0));
7966   }
7967 
7968   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7969   // instruction, and then figure out where to actually put the modifiers
7970 
7971   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7972   if (OpSelIdx != -1) {
7973     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7974   }
7975 
7976   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7977   if (OpSelHiIdx != -1) {
7978     int DefaultVal = IsPacked ? -1 : 0;
7979     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7980                           DefaultVal);
7981   }
7982 
7983   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7984   if (NegLoIdx != -1) {
7985     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7986     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7987   }
7988 
7989   const int Ops[] = { AMDGPU::OpName::src0,
7990                       AMDGPU::OpName::src1,
7991                       AMDGPU::OpName::src2 };
7992   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7993                          AMDGPU::OpName::src1_modifiers,
7994                          AMDGPU::OpName::src2_modifiers };
7995 
7996   unsigned OpSel = 0;
7997   unsigned OpSelHi = 0;
7998   unsigned NegLo = 0;
7999   unsigned NegHi = 0;
8000 
8001   if (OpSelIdx != -1)
8002     OpSel = Inst.getOperand(OpSelIdx).getImm();
8003 
8004   if (OpSelHiIdx != -1)
8005     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8006 
8007   if (NegLoIdx != -1) {
8008     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8009     NegLo = Inst.getOperand(NegLoIdx).getImm();
8010     NegHi = Inst.getOperand(NegHiIdx).getImm();
8011   }
8012 
8013   for (int J = 0; J < 3; ++J) {
8014     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8015     if (OpIdx == -1)
8016       break;
8017 
8018     uint32_t ModVal = 0;
8019 
8020     if ((OpSel & (1 << J)) != 0)
8021       ModVal |= SISrcMods::OP_SEL_0;
8022 
8023     if ((OpSelHi & (1 << J)) != 0)
8024       ModVal |= SISrcMods::OP_SEL_1;
8025 
8026     if ((NegLo & (1 << J)) != 0)
8027       ModVal |= SISrcMods::NEG;
8028 
8029     if ((NegHi & (1 << J)) != 0)
8030       ModVal |= SISrcMods::NEG_HI;
8031 
8032     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8033 
8034     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8035   }
8036 }
8037 
8038 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8039   OptionalImmIndexMap OptIdx;
8040   cvtVOP3(Inst, Operands, OptIdx);
8041   cvtVOP3P(Inst, Operands, OptIdx);
8042 }
8043 
8044 //===----------------------------------------------------------------------===//
8045 // dpp
8046 //===----------------------------------------------------------------------===//
8047 
8048 bool AMDGPUOperand::isDPP8() const {
8049   return isImmTy(ImmTyDPP8);
8050 }
8051 
8052 bool AMDGPUOperand::isDPPCtrl() const {
8053   using namespace AMDGPU::DPP;
8054 
8055   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8056   if (result) {
8057     int64_t Imm = getImm();
8058     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8059            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8060            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8061            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8062            (Imm == DppCtrl::WAVE_SHL1) ||
8063            (Imm == DppCtrl::WAVE_ROL1) ||
8064            (Imm == DppCtrl::WAVE_SHR1) ||
8065            (Imm == DppCtrl::WAVE_ROR1) ||
8066            (Imm == DppCtrl::ROW_MIRROR) ||
8067            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8068            (Imm == DppCtrl::BCAST15) ||
8069            (Imm == DppCtrl::BCAST31) ||
8070            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8071            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8072   }
8073   return false;
8074 }
8075 
8076 //===----------------------------------------------------------------------===//
8077 // mAI
8078 //===----------------------------------------------------------------------===//
8079 
8080 bool AMDGPUOperand::isBLGP() const {
8081   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8082 }
8083 
8084 bool AMDGPUOperand::isCBSZ() const {
8085   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8086 }
8087 
8088 bool AMDGPUOperand::isABID() const {
8089   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8090 }
8091 
8092 bool AMDGPUOperand::isS16Imm() const {
8093   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8094 }
8095 
8096 bool AMDGPUOperand::isU16Imm() const {
8097   return isImm() && isUInt<16>(getImm());
8098 }
8099 
8100 //===----------------------------------------------------------------------===//
8101 // dim
8102 //===----------------------------------------------------------------------===//
8103 
8104 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8105   // We want to allow "dim:1D" etc.,
8106   // but the initial 1 is tokenized as an integer.
8107   std::string Token;
8108   if (isToken(AsmToken::Integer)) {
8109     SMLoc Loc = getToken().getEndLoc();
8110     Token = std::string(getTokenStr());
8111     lex();
8112     if (getLoc() != Loc)
8113       return false;
8114   }
8115 
8116   StringRef Suffix;
8117   if (!parseId(Suffix))
8118     return false;
8119   Token += Suffix;
8120 
8121   StringRef DimId = Token;
8122   if (DimId.startswith("SQ_RSRC_IMG_"))
8123     DimId = DimId.drop_front(12);
8124 
8125   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8126   if (!DimInfo)
8127     return false;
8128 
8129   Encoding = DimInfo->Encoding;
8130   return true;
8131 }
8132 
8133 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8134   if (!isGFX10Plus())
8135     return MatchOperand_NoMatch;
8136 
8137   SMLoc S = getLoc();
8138 
8139   if (!trySkipId("dim", AsmToken::Colon))
8140     return MatchOperand_NoMatch;
8141 
8142   unsigned Encoding;
8143   SMLoc Loc = getLoc();
8144   if (!parseDimId(Encoding)) {
8145     Error(Loc, "invalid dim value");
8146     return MatchOperand_ParseFail;
8147   }
8148 
8149   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8150                                               AMDGPUOperand::ImmTyDim));
8151   return MatchOperand_Success;
8152 }
8153 
8154 //===----------------------------------------------------------------------===//
8155 // dpp
8156 //===----------------------------------------------------------------------===//
8157 
8158 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8159   SMLoc S = getLoc();
8160 
8161   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8162     return MatchOperand_NoMatch;
8163 
8164   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8165 
8166   int64_t Sels[8];
8167 
8168   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8169     return MatchOperand_ParseFail;
8170 
8171   for (size_t i = 0; i < 8; ++i) {
8172     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8173       return MatchOperand_ParseFail;
8174 
8175     SMLoc Loc = getLoc();
8176     if (getParser().parseAbsoluteExpression(Sels[i]))
8177       return MatchOperand_ParseFail;
8178     if (0 > Sels[i] || 7 < Sels[i]) {
8179       Error(Loc, "expected a 3-bit value");
8180       return MatchOperand_ParseFail;
8181     }
8182   }
8183 
8184   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8185     return MatchOperand_ParseFail;
8186 
8187   unsigned DPP8 = 0;
8188   for (size_t i = 0; i < 8; ++i)
8189     DPP8 |= (Sels[i] << (i * 3));
8190 
8191   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8192   return MatchOperand_Success;
8193 }
8194 
8195 bool
8196 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8197                                     const OperandVector &Operands) {
8198   if (Ctrl == "row_newbcast")
8199     return isGFX90A();
8200 
8201   if (Ctrl == "row_share" ||
8202       Ctrl == "row_xmask")
8203     return isGFX10Plus();
8204 
8205   if (Ctrl == "wave_shl" ||
8206       Ctrl == "wave_shr" ||
8207       Ctrl == "wave_rol" ||
8208       Ctrl == "wave_ror" ||
8209       Ctrl == "row_bcast")
8210     return isVI() || isGFX9();
8211 
8212   return Ctrl == "row_mirror" ||
8213          Ctrl == "row_half_mirror" ||
8214          Ctrl == "quad_perm" ||
8215          Ctrl == "row_shl" ||
8216          Ctrl == "row_shr" ||
8217          Ctrl == "row_ror";
8218 }
8219 
8220 int64_t
8221 AMDGPUAsmParser::parseDPPCtrlPerm() {
8222   // quad_perm:[%d,%d,%d,%d]
8223 
8224   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8225     return -1;
8226 
8227   int64_t Val = 0;
8228   for (int i = 0; i < 4; ++i) {
8229     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8230       return -1;
8231 
8232     int64_t Temp;
8233     SMLoc Loc = getLoc();
8234     if (getParser().parseAbsoluteExpression(Temp))
8235       return -1;
8236     if (Temp < 0 || Temp > 3) {
8237       Error(Loc, "expected a 2-bit value");
8238       return -1;
8239     }
8240 
8241     Val += (Temp << i * 2);
8242   }
8243 
8244   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8245     return -1;
8246 
8247   return Val;
8248 }
8249 
8250 int64_t
8251 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8252   using namespace AMDGPU::DPP;
8253 
8254   // sel:%d
8255 
8256   int64_t Val;
8257   SMLoc Loc = getLoc();
8258 
8259   if (getParser().parseAbsoluteExpression(Val))
8260     return -1;
8261 
8262   struct DppCtrlCheck {
8263     int64_t Ctrl;
8264     int Lo;
8265     int Hi;
8266   };
8267 
8268   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8269     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8270     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8271     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8272     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8273     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8274     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8275     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8276     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8277     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8278     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8279     .Default({-1, 0, 0});
8280 
8281   bool Valid;
8282   if (Check.Ctrl == -1) {
8283     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8284     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8285   } else {
8286     Valid = Check.Lo <= Val && Val <= Check.Hi;
8287     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8288   }
8289 
8290   if (!Valid) {
8291     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8292     return -1;
8293   }
8294 
8295   return Val;
8296 }
8297 
8298 OperandMatchResultTy
8299 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8300   using namespace AMDGPU::DPP;
8301 
8302   if (!isToken(AsmToken::Identifier) ||
8303       !isSupportedDPPCtrl(getTokenStr(), Operands))
8304     return MatchOperand_NoMatch;
8305 
8306   SMLoc S = getLoc();
8307   int64_t Val = -1;
8308   StringRef Ctrl;
8309 
8310   parseId(Ctrl);
8311 
8312   if (Ctrl == "row_mirror") {
8313     Val = DppCtrl::ROW_MIRROR;
8314   } else if (Ctrl == "row_half_mirror") {
8315     Val = DppCtrl::ROW_HALF_MIRROR;
8316   } else {
8317     if (skipToken(AsmToken::Colon, "expected a colon")) {
8318       if (Ctrl == "quad_perm") {
8319         Val = parseDPPCtrlPerm();
8320       } else {
8321         Val = parseDPPCtrlSel(Ctrl);
8322       }
8323     }
8324   }
8325 
8326   if (Val == -1)
8327     return MatchOperand_ParseFail;
8328 
8329   Operands.push_back(
8330     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8331   return MatchOperand_Success;
8332 }
8333 
8334 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8335   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8336 }
8337 
8338 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8339   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8340 }
8341 
8342 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8343   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8344 }
8345 
8346 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8347   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8348 }
8349 
8350 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8351   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8352 }
8353 
8354 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8355   OptionalImmIndexMap OptionalIdx;
8356 
8357   unsigned Opc = Inst.getOpcode();
8358   bool HasModifiers =
8359       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8360   unsigned I = 1;
8361   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8362   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8363     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8364   }
8365 
8366   int Fi = 0;
8367   for (unsigned E = Operands.size(); I != E; ++I) {
8368     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8369                                             MCOI::TIED_TO);
8370     if (TiedTo != -1) {
8371       assert((unsigned)TiedTo < Inst.getNumOperands());
8372       // handle tied old or src2 for MAC instructions
8373       Inst.addOperand(Inst.getOperand(TiedTo));
8374     }
8375     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8376     // Add the register arguments
8377     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8378       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8379       // Skip it.
8380       continue;
8381     }
8382 
8383     if (IsDPP8) {
8384       if (Op.isDPP8()) {
8385         Op.addImmOperands(Inst, 1);
8386       } else if (HasModifiers &&
8387                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8388         Op.addRegWithFPInputModsOperands(Inst, 2);
8389       } else if (Op.isFI()) {
8390         Fi = Op.getImm();
8391       } else if (Op.isReg()) {
8392         Op.addRegOperands(Inst, 1);
8393       } else {
8394         llvm_unreachable("Invalid operand type");
8395       }
8396     } else {
8397       if (HasModifiers &&
8398           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8399         Op.addRegWithFPInputModsOperands(Inst, 2);
8400       } else if (Op.isReg()) {
8401         Op.addRegOperands(Inst, 1);
8402       } else if (Op.isDPPCtrl()) {
8403         Op.addImmOperands(Inst, 1);
8404       } else if (Op.isImm()) {
8405         // Handle optional arguments
8406         OptionalIdx[Op.getImmTy()] = I;
8407       } else {
8408         llvm_unreachable("Invalid operand type");
8409       }
8410     }
8411   }
8412 
8413   if (IsDPP8) {
8414     using namespace llvm::AMDGPU::DPP;
8415     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8416   } else {
8417     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8418     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8419     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8420     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8421       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8422     }
8423   }
8424 }
8425 
8426 //===----------------------------------------------------------------------===//
8427 // sdwa
8428 //===----------------------------------------------------------------------===//
8429 
8430 OperandMatchResultTy
8431 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8432                               AMDGPUOperand::ImmTy Type) {
8433   using namespace llvm::AMDGPU::SDWA;
8434 
8435   SMLoc S = getLoc();
8436   StringRef Value;
8437   OperandMatchResultTy res;
8438 
8439   SMLoc StringLoc;
8440   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8441   if (res != MatchOperand_Success) {
8442     return res;
8443   }
8444 
8445   int64_t Int;
8446   Int = StringSwitch<int64_t>(Value)
8447         .Case("BYTE_0", SdwaSel::BYTE_0)
8448         .Case("BYTE_1", SdwaSel::BYTE_1)
8449         .Case("BYTE_2", SdwaSel::BYTE_2)
8450         .Case("BYTE_3", SdwaSel::BYTE_3)
8451         .Case("WORD_0", SdwaSel::WORD_0)
8452         .Case("WORD_1", SdwaSel::WORD_1)
8453         .Case("DWORD", SdwaSel::DWORD)
8454         .Default(0xffffffff);
8455 
8456   if (Int == 0xffffffff) {
8457     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8458     return MatchOperand_ParseFail;
8459   }
8460 
8461   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8462   return MatchOperand_Success;
8463 }
8464 
8465 OperandMatchResultTy
8466 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8467   using namespace llvm::AMDGPU::SDWA;
8468 
8469   SMLoc S = getLoc();
8470   StringRef Value;
8471   OperandMatchResultTy res;
8472 
8473   SMLoc StringLoc;
8474   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8475   if (res != MatchOperand_Success) {
8476     return res;
8477   }
8478 
8479   int64_t Int;
8480   Int = StringSwitch<int64_t>(Value)
8481         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8482         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8483         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8484         .Default(0xffffffff);
8485 
8486   if (Int == 0xffffffff) {
8487     Error(StringLoc, "invalid dst_unused value");
8488     return MatchOperand_ParseFail;
8489   }
8490 
8491   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8492   return MatchOperand_Success;
8493 }
8494 
8495 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8496   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8497 }
8498 
8499 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8500   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8501 }
8502 
8503 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8504   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8505 }
8506 
8507 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8508   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8509 }
8510 
8511 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8512   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8513 }
8514 
8515 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8516                               uint64_t BasicInstType,
8517                               bool SkipDstVcc,
8518                               bool SkipSrcVcc) {
8519   using namespace llvm::AMDGPU::SDWA;
8520 
8521   OptionalImmIndexMap OptionalIdx;
8522   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8523   bool SkippedVcc = false;
8524 
8525   unsigned I = 1;
8526   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8527   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8528     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8529   }
8530 
8531   for (unsigned E = Operands.size(); I != E; ++I) {
8532     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8533     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8534         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8535       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8536       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8537       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8538       // Skip VCC only if we didn't skip it on previous iteration.
8539       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8540       if (BasicInstType == SIInstrFlags::VOP2 &&
8541           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8542            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8543         SkippedVcc = true;
8544         continue;
8545       } else if (BasicInstType == SIInstrFlags::VOPC &&
8546                  Inst.getNumOperands() == 0) {
8547         SkippedVcc = true;
8548         continue;
8549       }
8550     }
8551     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8552       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8553     } else if (Op.isImm()) {
8554       // Handle optional arguments
8555       OptionalIdx[Op.getImmTy()] = I;
8556     } else {
8557       llvm_unreachable("Invalid operand type");
8558     }
8559     SkippedVcc = false;
8560   }
8561 
8562   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8563       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8564       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8565     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8566     switch (BasicInstType) {
8567     case SIInstrFlags::VOP1:
8568       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8569       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8570         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8571       }
8572       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8573       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8574       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8575       break;
8576 
8577     case SIInstrFlags::VOP2:
8578       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8579       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8580         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8581       }
8582       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8583       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8584       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8585       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8586       break;
8587 
8588     case SIInstrFlags::VOPC:
8589       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8590         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8591       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8592       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8593       break;
8594 
8595     default:
8596       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8597     }
8598   }
8599 
8600   // special case v_mac_{f16, f32}:
8601   // it has src2 register operand that is tied to dst operand
8602   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8603       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8604     auto it = Inst.begin();
8605     std::advance(
8606       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8607     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8608   }
8609 }
8610 
8611 //===----------------------------------------------------------------------===//
8612 // mAI
8613 //===----------------------------------------------------------------------===//
8614 
8615 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8616   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8617 }
8618 
8619 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8620   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8621 }
8622 
8623 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8624   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8625 }
8626 
8627 /// Force static initialization.
8628 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8629   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8630   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8631 }
8632 
8633 #define GET_REGISTER_MATCHER
8634 #define GET_MATCHER_IMPLEMENTATION
8635 #define GET_MNEMONIC_SPELL_CHECKER
8636 #define GET_MNEMONIC_CHECKER
8637 #include "AMDGPUGenAsmMatcher.inc"
8638 
8639 // This function should be defined after auto-generated include so that we have
8640 // MatchClassKind enum defined
8641 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8642                                                      unsigned Kind) {
8643   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8644   // But MatchInstructionImpl() expects to meet token and fails to validate
8645   // operand. This method checks if we are given immediate operand but expect to
8646   // get corresponding token.
8647   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8648   switch (Kind) {
8649   case MCK_addr64:
8650     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8651   case MCK_gds:
8652     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8653   case MCK_lds:
8654     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8655   case MCK_idxen:
8656     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8657   case MCK_offen:
8658     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8659   case MCK_SSrcB32:
8660     // When operands have expression values, they will return true for isToken,
8661     // because it is not possible to distinguish between a token and an
8662     // expression at parse time. MatchInstructionImpl() will always try to
8663     // match an operand as a token, when isToken returns true, and when the
8664     // name of the expression is not a valid token, the match will fail,
8665     // so we need to handle it here.
8666     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8667   case MCK_SSrcF32:
8668     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8669   case MCK_SoppBrTarget:
8670     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8671   case MCK_VReg32OrOff:
8672     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8673   case MCK_InterpSlot:
8674     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8675   case MCK_Attr:
8676     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8677   case MCK_AttrChan:
8678     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8679   case MCK_ImmSMEMOffset:
8680     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8681   case MCK_SReg_64:
8682   case MCK_SReg_64_XEXEC:
8683     // Null is defined as a 32-bit register but
8684     // it should also be enabled with 64-bit operands.
8685     // The following code enables it for SReg_64 operands
8686     // used as source and destination. Remaining source
8687     // operands are handled in isInlinableImm.
8688     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8689   default:
8690     return Match_InvalidOperand;
8691   }
8692 }
8693 
8694 //===----------------------------------------------------------------------===//
8695 // endpgm
8696 //===----------------------------------------------------------------------===//
8697 
8698 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8699   SMLoc S = getLoc();
8700   int64_t Imm = 0;
8701 
8702   if (!parseExpr(Imm)) {
8703     // The operand is optional, if not present default to 0
8704     Imm = 0;
8705   }
8706 
8707   if (!isUInt<16>(Imm)) {
8708     Error(S, "expected a 16-bit value");
8709     return MatchOperand_ParseFail;
8710   }
8711 
8712   Operands.push_back(
8713       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8714   return MatchOperand_Success;
8715 }
8716 
8717 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8718