1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/TargetParser.h"
40 
41 using namespace llvm;
42 using namespace llvm::AMDGPU;
43 using namespace llvm::amdhsa;
44 
45 namespace {
46 
47 class AMDGPUAsmParser;
48 
49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
50 
51 //===----------------------------------------------------------------------===//
52 // Operand
53 //===----------------------------------------------------------------------===//
54 
55 class AMDGPUOperand : public MCParsedAsmOperand {
56   enum KindTy {
57     Token,
58     Immediate,
59     Register,
60     Expression
61   } Kind;
62 
63   SMLoc StartLoc, EndLoc;
64   const AMDGPUAsmParser *AsmParser;
65 
66 public:
67   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
68       : Kind(Kind_), AsmParser(AsmParser_) {}
69 
70   using Ptr = std::unique_ptr<AMDGPUOperand>;
71 
72   struct Modifiers {
73     bool Abs = false;
74     bool Neg = false;
75     bool Sext = false;
76 
77     bool hasFPModifiers() const { return Abs || Neg; }
78     bool hasIntModifiers() const { return Sext; }
79     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
80 
81     int64_t getFPModifiersOperand() const {
82       int64_t Operand = 0;
83       Operand |= Abs ? SISrcMods::ABS : 0u;
84       Operand |= Neg ? SISrcMods::NEG : 0u;
85       return Operand;
86     }
87 
88     int64_t getIntModifiersOperand() const {
89       int64_t Operand = 0;
90       Operand |= Sext ? SISrcMods::SEXT : 0u;
91       return Operand;
92     }
93 
94     int64_t getModifiersOperand() const {
95       assert(!(hasFPModifiers() && hasIntModifiers())
96            && "fp and int modifiers should not be used simultaneously");
97       if (hasFPModifiers()) {
98         return getFPModifiersOperand();
99       } else if (hasIntModifiers()) {
100         return getIntModifiersOperand();
101       } else {
102         return 0;
103       }
104     }
105 
106     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
107   };
108 
109   enum ImmTy {
110     ImmTyNone,
111     ImmTyGDS,
112     ImmTyLDS,
113     ImmTyOffen,
114     ImmTyIdxen,
115     ImmTyAddr64,
116     ImmTyOffset,
117     ImmTyInstOffset,
118     ImmTyOffset0,
119     ImmTyOffset1,
120     ImmTyCPol,
121     ImmTySWZ,
122     ImmTyTFE,
123     ImmTyD16,
124     ImmTyClampSI,
125     ImmTyOModSI,
126     ImmTyDPP8,
127     ImmTyDppCtrl,
128     ImmTyDppRowMask,
129     ImmTyDppBankMask,
130     ImmTyDppBoundCtrl,
131     ImmTyDppFi,
132     ImmTySdwaDstSel,
133     ImmTySdwaSrc0Sel,
134     ImmTySdwaSrc1Sel,
135     ImmTySdwaDstUnused,
136     ImmTyDMask,
137     ImmTyDim,
138     ImmTyUNorm,
139     ImmTyDA,
140     ImmTyR128A16,
141     ImmTyA16,
142     ImmTyLWE,
143     ImmTyExpTgt,
144     ImmTyExpCompr,
145     ImmTyExpVM,
146     ImmTyFORMAT,
147     ImmTyHwreg,
148     ImmTyOff,
149     ImmTySendMsg,
150     ImmTyInterpSlot,
151     ImmTyInterpAttr,
152     ImmTyAttrChan,
153     ImmTyOpSel,
154     ImmTyOpSelHi,
155     ImmTyNegLo,
156     ImmTyNegHi,
157     ImmTySwizzle,
158     ImmTyGprIdxMode,
159     ImmTyHigh,
160     ImmTyBLGP,
161     ImmTyCBSZ,
162     ImmTyABID,
163     ImmTyEndpgm,
164   };
165 
166   enum ImmKindTy {
167     ImmKindTyNone,
168     ImmKindTyLiteral,
169     ImmKindTyConst,
170   };
171 
172 private:
173   struct TokOp {
174     const char *Data;
175     unsigned Length;
176   };
177 
178   struct ImmOp {
179     int64_t Val;
180     ImmTy Type;
181     bool IsFPImm;
182     mutable ImmKindTy Kind;
183     Modifiers Mods;
184   };
185 
186   struct RegOp {
187     unsigned RegNo;
188     Modifiers Mods;
189   };
190 
191   union {
192     TokOp Tok;
193     ImmOp Imm;
194     RegOp Reg;
195     const MCExpr *Expr;
196   };
197 
198 public:
199   bool isToken() const override {
200     if (Kind == Token)
201       return true;
202 
203     // When parsing operands, we can't always tell if something was meant to be
204     // a token, like 'gds', or an expression that references a global variable.
205     // In this case, we assume the string is an expression, and if we need to
206     // interpret is a token, then we treat the symbol name as the token.
207     return isSymbolRefExpr();
208   }
209 
210   bool isSymbolRefExpr() const {
211     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
212   }
213 
214   bool isImm() const override {
215     return Kind == Immediate;
216   }
217 
218   void setImmKindNone() const {
219     assert(isImm());
220     Imm.Kind = ImmKindTyNone;
221   }
222 
223   void setImmKindLiteral() const {
224     assert(isImm());
225     Imm.Kind = ImmKindTyLiteral;
226   }
227 
228   void setImmKindConst() const {
229     assert(isImm());
230     Imm.Kind = ImmKindTyConst;
231   }
232 
233   bool IsImmKindLiteral() const {
234     return isImm() && Imm.Kind == ImmKindTyLiteral;
235   }
236 
237   bool isImmKindConst() const {
238     return isImm() && Imm.Kind == ImmKindTyConst;
239   }
240 
241   bool isInlinableImm(MVT type) const;
242   bool isLiteralImm(MVT type) const;
243 
244   bool isRegKind() const {
245     return Kind == Register;
246   }
247 
248   bool isReg() const override {
249     return isRegKind() && !hasModifiers();
250   }
251 
252   bool isRegOrInline(unsigned RCID, MVT type) const {
253     return isRegClass(RCID) || isInlinableImm(type);
254   }
255 
256   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
257     return isRegOrInline(RCID, type) || isLiteralImm(type);
258   }
259 
260   bool isRegOrImmWithInt16InputMods() const {
261     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
262   }
263 
264   bool isRegOrImmWithInt32InputMods() const {
265     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
266   }
267 
268   bool isRegOrImmWithInt64InputMods() const {
269     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
270   }
271 
272   bool isRegOrImmWithFP16InputMods() const {
273     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
274   }
275 
276   bool isRegOrImmWithFP32InputMods() const {
277     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
278   }
279 
280   bool isRegOrImmWithFP64InputMods() const {
281     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
282   }
283 
284   bool isVReg() const {
285     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
286            isRegClass(AMDGPU::VReg_64RegClassID) ||
287            isRegClass(AMDGPU::VReg_96RegClassID) ||
288            isRegClass(AMDGPU::VReg_128RegClassID) ||
289            isRegClass(AMDGPU::VReg_160RegClassID) ||
290            isRegClass(AMDGPU::VReg_192RegClassID) ||
291            isRegClass(AMDGPU::VReg_256RegClassID) ||
292            isRegClass(AMDGPU::VReg_512RegClassID) ||
293            isRegClass(AMDGPU::VReg_1024RegClassID);
294   }
295 
296   bool isVReg32() const {
297     return isRegClass(AMDGPU::VGPR_32RegClassID);
298   }
299 
300   bool isVReg32OrOff() const {
301     return isOff() || isVReg32();
302   }
303 
304   bool isNull() const {
305     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
306   }
307 
308   bool isVRegWithInputMods() const;
309 
310   bool isSDWAOperand(MVT type) const;
311   bool isSDWAFP16Operand() const;
312   bool isSDWAFP32Operand() const;
313   bool isSDWAInt16Operand() const;
314   bool isSDWAInt32Operand() const;
315 
316   bool isImmTy(ImmTy ImmT) const {
317     return isImm() && Imm.Type == ImmT;
318   }
319 
320   bool isImmModifier() const {
321     return isImm() && Imm.Type != ImmTyNone;
322   }
323 
324   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
325   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
326   bool isDMask() const { return isImmTy(ImmTyDMask); }
327   bool isDim() const { return isImmTy(ImmTyDim); }
328   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
329   bool isDA() const { return isImmTy(ImmTyDA); }
330   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
331   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
332   bool isLWE() const { return isImmTy(ImmTyLWE); }
333   bool isOff() const { return isImmTy(ImmTyOff); }
334   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
335   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
336   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
337   bool isOffen() const { return isImmTy(ImmTyOffen); }
338   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
339   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
340   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
341   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
342   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
343 
344   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
345   bool isGDS() const { return isImmTy(ImmTyGDS); }
346   bool isLDS() const { return isImmTy(ImmTyLDS); }
347   bool isCPol() const { return isImmTy(ImmTyCPol); }
348   bool isSWZ() const { return isImmTy(ImmTySWZ); }
349   bool isTFE() const { return isImmTy(ImmTyTFE); }
350   bool isD16() const { return isImmTy(ImmTyD16); }
351   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
352   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
353   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
354   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
355   bool isFI() const { return isImmTy(ImmTyDppFi); }
356   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
357   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
358   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
359   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
360   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
361   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
362   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
363   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
364   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
365   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
366   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
367   bool isHigh() const { return isImmTy(ImmTyHigh); }
368 
369   bool isMod() const {
370     return isClampSI() || isOModSI();
371   }
372 
373   bool isRegOrImm() const {
374     return isReg() || isImm();
375   }
376 
377   bool isRegClass(unsigned RCID) const;
378 
379   bool isInlineValue() const;
380 
381   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
382     return isRegOrInline(RCID, type) && !hasModifiers();
383   }
384 
385   bool isSCSrcB16() const {
386     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
387   }
388 
389   bool isSCSrcV2B16() const {
390     return isSCSrcB16();
391   }
392 
393   bool isSCSrcB32() const {
394     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
395   }
396 
397   bool isSCSrcB64() const {
398     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
399   }
400 
401   bool isBoolReg() const;
402 
403   bool isSCSrcF16() const {
404     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
405   }
406 
407   bool isSCSrcV2F16() const {
408     return isSCSrcF16();
409   }
410 
411   bool isSCSrcF32() const {
412     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
413   }
414 
415   bool isSCSrcF64() const {
416     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
417   }
418 
419   bool isSSrcB32() const {
420     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
421   }
422 
423   bool isSSrcB16() const {
424     return isSCSrcB16() || isLiteralImm(MVT::i16);
425   }
426 
427   bool isSSrcV2B16() const {
428     llvm_unreachable("cannot happen");
429     return isSSrcB16();
430   }
431 
432   bool isSSrcB64() const {
433     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
434     // See isVSrc64().
435     return isSCSrcB64() || isLiteralImm(MVT::i64);
436   }
437 
438   bool isSSrcF32() const {
439     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
440   }
441 
442   bool isSSrcF64() const {
443     return isSCSrcB64() || isLiteralImm(MVT::f64);
444   }
445 
446   bool isSSrcF16() const {
447     return isSCSrcB16() || isLiteralImm(MVT::f16);
448   }
449 
450   bool isSSrcV2F16() const {
451     llvm_unreachable("cannot happen");
452     return isSSrcF16();
453   }
454 
455   bool isSSrcV2FP32() const {
456     llvm_unreachable("cannot happen");
457     return isSSrcF32();
458   }
459 
460   bool isSCSrcV2FP32() const {
461     llvm_unreachable("cannot happen");
462     return isSCSrcF32();
463   }
464 
465   bool isSSrcV2INT32() const {
466     llvm_unreachable("cannot happen");
467     return isSSrcB32();
468   }
469 
470   bool isSCSrcV2INT32() const {
471     llvm_unreachable("cannot happen");
472     return isSCSrcB32();
473   }
474 
475   bool isSSrcOrLdsB32() const {
476     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
477            isLiteralImm(MVT::i32) || isExpr();
478   }
479 
480   bool isVCSrcB32() const {
481     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
482   }
483 
484   bool isVCSrcB64() const {
485     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
486   }
487 
488   bool isVCSrcB16() const {
489     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
490   }
491 
492   bool isVCSrcV2B16() const {
493     return isVCSrcB16();
494   }
495 
496   bool isVCSrcF32() const {
497     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
498   }
499 
500   bool isVCSrcF64() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
502   }
503 
504   bool isVCSrcF16() const {
505     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
506   }
507 
508   bool isVCSrcV2F16() const {
509     return isVCSrcF16();
510   }
511 
512   bool isVSrcB32() const {
513     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
514   }
515 
516   bool isVSrcB64() const {
517     return isVCSrcF64() || isLiteralImm(MVT::i64);
518   }
519 
520   bool isVSrcB16() const {
521     return isVCSrcB16() || isLiteralImm(MVT::i16);
522   }
523 
524   bool isVSrcV2B16() const {
525     return isVSrcB16() || isLiteralImm(MVT::v2i16);
526   }
527 
528   bool isVCSrcV2FP32() const {
529     return isVCSrcF64();
530   }
531 
532   bool isVSrcV2FP32() const {
533     return isVSrcF64() || isLiteralImm(MVT::v2f32);
534   }
535 
536   bool isVCSrcV2INT32() const {
537     return isVCSrcB64();
538   }
539 
540   bool isVSrcV2INT32() const {
541     return isVSrcB64() || isLiteralImm(MVT::v2i32);
542   }
543 
544   bool isVSrcF32() const {
545     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
546   }
547 
548   bool isVSrcF64() const {
549     return isVCSrcF64() || isLiteralImm(MVT::f64);
550   }
551 
552   bool isVSrcF16() const {
553     return isVCSrcF16() || isLiteralImm(MVT::f16);
554   }
555 
556   bool isVSrcV2F16() const {
557     return isVSrcF16() || isLiteralImm(MVT::v2f16);
558   }
559 
560   bool isVISrcB32() const {
561     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
562   }
563 
564   bool isVISrcB16() const {
565     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
566   }
567 
568   bool isVISrcV2B16() const {
569     return isVISrcB16();
570   }
571 
572   bool isVISrcF32() const {
573     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
574   }
575 
576   bool isVISrcF16() const {
577     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
578   }
579 
580   bool isVISrcV2F16() const {
581     return isVISrcF16() || isVISrcB32();
582   }
583 
584   bool isVISrc_64B64() const {
585     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
586   }
587 
588   bool isVISrc_64F64() const {
589     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
590   }
591 
592   bool isVISrc_64V2FP32() const {
593     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
594   }
595 
596   bool isVISrc_64V2INT32() const {
597     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
598   }
599 
600   bool isVISrc_256B64() const {
601     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
602   }
603 
604   bool isVISrc_256F64() const {
605     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
606   }
607 
608   bool isVISrc_128B16() const {
609     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
610   }
611 
612   bool isVISrc_128V2B16() const {
613     return isVISrc_128B16();
614   }
615 
616   bool isVISrc_128B32() const {
617     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
618   }
619 
620   bool isVISrc_128F32() const {
621     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
622   }
623 
624   bool isVISrc_256V2FP32() const {
625     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
626   }
627 
628   bool isVISrc_256V2INT32() const {
629     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
630   }
631 
632   bool isVISrc_512B32() const {
633     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
634   }
635 
636   bool isVISrc_512B16() const {
637     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
638   }
639 
640   bool isVISrc_512V2B16() const {
641     return isVISrc_512B16();
642   }
643 
644   bool isVISrc_512F32() const {
645     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
646   }
647 
648   bool isVISrc_512F16() const {
649     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
650   }
651 
652   bool isVISrc_512V2F16() const {
653     return isVISrc_512F16() || isVISrc_512B32();
654   }
655 
656   bool isVISrc_1024B32() const {
657     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
658   }
659 
660   bool isVISrc_1024B16() const {
661     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
662   }
663 
664   bool isVISrc_1024V2B16() const {
665     return isVISrc_1024B16();
666   }
667 
668   bool isVISrc_1024F32() const {
669     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
670   }
671 
672   bool isVISrc_1024F16() const {
673     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
674   }
675 
676   bool isVISrc_1024V2F16() const {
677     return isVISrc_1024F16() || isVISrc_1024B32();
678   }
679 
680   bool isAISrcB32() const {
681     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
682   }
683 
684   bool isAISrcB16() const {
685     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
686   }
687 
688   bool isAISrcV2B16() const {
689     return isAISrcB16();
690   }
691 
692   bool isAISrcF32() const {
693     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
694   }
695 
696   bool isAISrcF16() const {
697     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
698   }
699 
700   bool isAISrcV2F16() const {
701     return isAISrcF16() || isAISrcB32();
702   }
703 
704   bool isAISrc_64B64() const {
705     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
706   }
707 
708   bool isAISrc_64F64() const {
709     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
710   }
711 
712   bool isAISrc_128B32() const {
713     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
714   }
715 
716   bool isAISrc_128B16() const {
717     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
718   }
719 
720   bool isAISrc_128V2B16() const {
721     return isAISrc_128B16();
722   }
723 
724   bool isAISrc_128F32() const {
725     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
726   }
727 
728   bool isAISrc_128F16() const {
729     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
730   }
731 
732   bool isAISrc_128V2F16() const {
733     return isAISrc_128F16() || isAISrc_128B32();
734   }
735 
736   bool isVISrc_128F16() const {
737     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
738   }
739 
740   bool isVISrc_128V2F16() const {
741     return isVISrc_128F16() || isVISrc_128B32();
742   }
743 
744   bool isAISrc_256B64() const {
745     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
746   }
747 
748   bool isAISrc_256F64() const {
749     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
750   }
751 
752   bool isAISrc_512B32() const {
753     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
754   }
755 
756   bool isAISrc_512B16() const {
757     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
758   }
759 
760   bool isAISrc_512V2B16() const {
761     return isAISrc_512B16();
762   }
763 
764   bool isAISrc_512F32() const {
765     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
766   }
767 
768   bool isAISrc_512F16() const {
769     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
770   }
771 
772   bool isAISrc_512V2F16() const {
773     return isAISrc_512F16() || isAISrc_512B32();
774   }
775 
776   bool isAISrc_1024B32() const {
777     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
778   }
779 
780   bool isAISrc_1024B16() const {
781     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
782   }
783 
784   bool isAISrc_1024V2B16() const {
785     return isAISrc_1024B16();
786   }
787 
788   bool isAISrc_1024F32() const {
789     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
790   }
791 
792   bool isAISrc_1024F16() const {
793     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
794   }
795 
796   bool isAISrc_1024V2F16() const {
797     return isAISrc_1024F16() || isAISrc_1024B32();
798   }
799 
800   bool isKImmFP32() const {
801     return isLiteralImm(MVT::f32);
802   }
803 
804   bool isKImmFP16() const {
805     return isLiteralImm(MVT::f16);
806   }
807 
808   bool isMem() const override {
809     return false;
810   }
811 
812   bool isExpr() const {
813     return Kind == Expression;
814   }
815 
816   bool isSoppBrTarget() const {
817     return isExpr() || isImm();
818   }
819 
820   bool isSWaitCnt() const;
821   bool isDepCtr() const;
822   bool isHwreg() const;
823   bool isSendMsg() const;
824   bool isSwizzle() const;
825   bool isSMRDOffset8() const;
826   bool isSMEMOffset() const;
827   bool isSMRDLiteralOffset() const;
828   bool isDPP8() const;
829   bool isDPPCtrl() const;
830   bool isBLGP() const;
831   bool isCBSZ() const;
832   bool isABID() const;
833   bool isGPRIdxMode() const;
834   bool isS16Imm() const;
835   bool isU16Imm() const;
836   bool isEndpgm() const;
837 
838   StringRef getExpressionAsToken() const {
839     assert(isExpr());
840     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
841     return S->getSymbol().getName();
842   }
843 
844   StringRef getToken() const {
845     assert(isToken());
846 
847     if (Kind == Expression)
848       return getExpressionAsToken();
849 
850     return StringRef(Tok.Data, Tok.Length);
851   }
852 
853   int64_t getImm() const {
854     assert(isImm());
855     return Imm.Val;
856   }
857 
858   void setImm(int64_t Val) {
859     assert(isImm());
860     Imm.Val = Val;
861   }
862 
863   ImmTy getImmTy() const {
864     assert(isImm());
865     return Imm.Type;
866   }
867 
868   unsigned getReg() const override {
869     assert(isRegKind());
870     return Reg.RegNo;
871   }
872 
873   SMLoc getStartLoc() const override {
874     return StartLoc;
875   }
876 
877   SMLoc getEndLoc() const override {
878     return EndLoc;
879   }
880 
881   SMRange getLocRange() const {
882     return SMRange(StartLoc, EndLoc);
883   }
884 
885   Modifiers getModifiers() const {
886     assert(isRegKind() || isImmTy(ImmTyNone));
887     return isRegKind() ? Reg.Mods : Imm.Mods;
888   }
889 
890   void setModifiers(Modifiers Mods) {
891     assert(isRegKind() || isImmTy(ImmTyNone));
892     if (isRegKind())
893       Reg.Mods = Mods;
894     else
895       Imm.Mods = Mods;
896   }
897 
898   bool hasModifiers() const {
899     return getModifiers().hasModifiers();
900   }
901 
902   bool hasFPModifiers() const {
903     return getModifiers().hasFPModifiers();
904   }
905 
906   bool hasIntModifiers() const {
907     return getModifiers().hasIntModifiers();
908   }
909 
910   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
911 
912   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
913 
914   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
915 
916   template <unsigned Bitwidth>
917   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
918 
919   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
920     addKImmFPOperands<16>(Inst, N);
921   }
922 
923   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
924     addKImmFPOperands<32>(Inst, N);
925   }
926 
927   void addRegOperands(MCInst &Inst, unsigned N) const;
928 
929   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
930     addRegOperands(Inst, N);
931   }
932 
933   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
934     if (isRegKind())
935       addRegOperands(Inst, N);
936     else if (isExpr())
937       Inst.addOperand(MCOperand::createExpr(Expr));
938     else
939       addImmOperands(Inst, N);
940   }
941 
942   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
943     Modifiers Mods = getModifiers();
944     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
945     if (isRegKind()) {
946       addRegOperands(Inst, N);
947     } else {
948       addImmOperands(Inst, N, false);
949     }
950   }
951 
952   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
953     assert(!hasIntModifiers());
954     addRegOrImmWithInputModsOperands(Inst, N);
955   }
956 
957   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
958     assert(!hasFPModifiers());
959     addRegOrImmWithInputModsOperands(Inst, N);
960   }
961 
962   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
963     Modifiers Mods = getModifiers();
964     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
965     assert(isRegKind());
966     addRegOperands(Inst, N);
967   }
968 
969   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
970     assert(!hasIntModifiers());
971     addRegWithInputModsOperands(Inst, N);
972   }
973 
974   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
975     assert(!hasFPModifiers());
976     addRegWithInputModsOperands(Inst, N);
977   }
978 
979   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
980     if (isImm())
981       addImmOperands(Inst, N);
982     else {
983       assert(isExpr());
984       Inst.addOperand(MCOperand::createExpr(Expr));
985     }
986   }
987 
988   static void printImmTy(raw_ostream& OS, ImmTy Type) {
989     switch (Type) {
990     case ImmTyNone: OS << "None"; break;
991     case ImmTyGDS: OS << "GDS"; break;
992     case ImmTyLDS: OS << "LDS"; break;
993     case ImmTyOffen: OS << "Offen"; break;
994     case ImmTyIdxen: OS << "Idxen"; break;
995     case ImmTyAddr64: OS << "Addr64"; break;
996     case ImmTyOffset: OS << "Offset"; break;
997     case ImmTyInstOffset: OS << "InstOffset"; break;
998     case ImmTyOffset0: OS << "Offset0"; break;
999     case ImmTyOffset1: OS << "Offset1"; break;
1000     case ImmTyCPol: OS << "CPol"; break;
1001     case ImmTySWZ: OS << "SWZ"; break;
1002     case ImmTyTFE: OS << "TFE"; break;
1003     case ImmTyD16: OS << "D16"; break;
1004     case ImmTyFORMAT: OS << "FORMAT"; break;
1005     case ImmTyClampSI: OS << "ClampSI"; break;
1006     case ImmTyOModSI: OS << "OModSI"; break;
1007     case ImmTyDPP8: OS << "DPP8"; break;
1008     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1009     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1010     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1011     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1012     case ImmTyDppFi: OS << "FI"; break;
1013     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1014     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1015     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1016     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1017     case ImmTyDMask: OS << "DMask"; break;
1018     case ImmTyDim: OS << "Dim"; break;
1019     case ImmTyUNorm: OS << "UNorm"; break;
1020     case ImmTyDA: OS << "DA"; break;
1021     case ImmTyR128A16: OS << "R128A16"; break;
1022     case ImmTyA16: OS << "A16"; break;
1023     case ImmTyLWE: OS << "LWE"; break;
1024     case ImmTyOff: OS << "Off"; break;
1025     case ImmTyExpTgt: OS << "ExpTgt"; break;
1026     case ImmTyExpCompr: OS << "ExpCompr"; break;
1027     case ImmTyExpVM: OS << "ExpVM"; break;
1028     case ImmTyHwreg: OS << "Hwreg"; break;
1029     case ImmTySendMsg: OS << "SendMsg"; break;
1030     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1031     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1032     case ImmTyAttrChan: OS << "AttrChan"; break;
1033     case ImmTyOpSel: OS << "OpSel"; break;
1034     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1035     case ImmTyNegLo: OS << "NegLo"; break;
1036     case ImmTyNegHi: OS << "NegHi"; break;
1037     case ImmTySwizzle: OS << "Swizzle"; break;
1038     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1039     case ImmTyHigh: OS << "High"; break;
1040     case ImmTyBLGP: OS << "BLGP"; break;
1041     case ImmTyCBSZ: OS << "CBSZ"; break;
1042     case ImmTyABID: OS << "ABID"; break;
1043     case ImmTyEndpgm: OS << "Endpgm"; break;
1044     }
1045   }
1046 
1047   void print(raw_ostream &OS) const override {
1048     switch (Kind) {
1049     case Register:
1050       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1051       break;
1052     case Immediate:
1053       OS << '<' << getImm();
1054       if (getImmTy() != ImmTyNone) {
1055         OS << " type: "; printImmTy(OS, getImmTy());
1056       }
1057       OS << " mods: " << Imm.Mods << '>';
1058       break;
1059     case Token:
1060       OS << '\'' << getToken() << '\'';
1061       break;
1062     case Expression:
1063       OS << "<expr " << *Expr << '>';
1064       break;
1065     }
1066   }
1067 
1068   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1069                                       int64_t Val, SMLoc Loc,
1070                                       ImmTy Type = ImmTyNone,
1071                                       bool IsFPImm = false) {
1072     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1073     Op->Imm.Val = Val;
1074     Op->Imm.IsFPImm = IsFPImm;
1075     Op->Imm.Kind = ImmKindTyNone;
1076     Op->Imm.Type = Type;
1077     Op->Imm.Mods = Modifiers();
1078     Op->StartLoc = Loc;
1079     Op->EndLoc = Loc;
1080     return Op;
1081   }
1082 
1083   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1084                                         StringRef Str, SMLoc Loc,
1085                                         bool HasExplicitEncodingSize = true) {
1086     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1087     Res->Tok.Data = Str.data();
1088     Res->Tok.Length = Str.size();
1089     Res->StartLoc = Loc;
1090     Res->EndLoc = Loc;
1091     return Res;
1092   }
1093 
1094   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1095                                       unsigned RegNo, SMLoc S,
1096                                       SMLoc E) {
1097     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1098     Op->Reg.RegNo = RegNo;
1099     Op->Reg.Mods = Modifiers();
1100     Op->StartLoc = S;
1101     Op->EndLoc = E;
1102     return Op;
1103   }
1104 
1105   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1106                                        const class MCExpr *Expr, SMLoc S) {
1107     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1108     Op->Expr = Expr;
1109     Op->StartLoc = S;
1110     Op->EndLoc = S;
1111     return Op;
1112   }
1113 };
1114 
1115 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1116   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1117   return OS;
1118 }
1119 
1120 //===----------------------------------------------------------------------===//
1121 // AsmParser
1122 //===----------------------------------------------------------------------===//
1123 
1124 // Holds info related to the current kernel, e.g. count of SGPRs used.
1125 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1126 // .amdgpu_hsa_kernel or at EOF.
1127 class KernelScopeInfo {
1128   int SgprIndexUnusedMin = -1;
1129   int VgprIndexUnusedMin = -1;
1130   int AgprIndexUnusedMin = -1;
1131   MCContext *Ctx = nullptr;
1132   MCSubtargetInfo const *MSTI = nullptr;
1133 
1134   void usesSgprAt(int i) {
1135     if (i >= SgprIndexUnusedMin) {
1136       SgprIndexUnusedMin = ++i;
1137       if (Ctx) {
1138         MCSymbol* const Sym =
1139           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1140         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1141       }
1142     }
1143   }
1144 
1145   void usesVgprAt(int i) {
1146     if (i >= VgprIndexUnusedMin) {
1147       VgprIndexUnusedMin = ++i;
1148       if (Ctx) {
1149         MCSymbol* const Sym =
1150           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1151         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1152                                          VgprIndexUnusedMin);
1153         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1154       }
1155     }
1156   }
1157 
1158   void usesAgprAt(int i) {
1159     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1160     if (!hasMAIInsts(*MSTI))
1161       return;
1162 
1163     if (i >= AgprIndexUnusedMin) {
1164       AgprIndexUnusedMin = ++i;
1165       if (Ctx) {
1166         MCSymbol* const Sym =
1167           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1168         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1169 
1170         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1171         MCSymbol* const vSym =
1172           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1173         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1174                                          VgprIndexUnusedMin);
1175         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1176       }
1177     }
1178   }
1179 
1180 public:
1181   KernelScopeInfo() = default;
1182 
1183   void initialize(MCContext &Context) {
1184     Ctx = &Context;
1185     MSTI = Ctx->getSubtargetInfo();
1186 
1187     usesSgprAt(SgprIndexUnusedMin = -1);
1188     usesVgprAt(VgprIndexUnusedMin = -1);
1189     if (hasMAIInsts(*MSTI)) {
1190       usesAgprAt(AgprIndexUnusedMin = -1);
1191     }
1192   }
1193 
1194   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1195                     unsigned RegWidth) {
1196     switch (RegKind) {
1197     case IS_SGPR:
1198       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1199       break;
1200     case IS_AGPR:
1201       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1202       break;
1203     case IS_VGPR:
1204       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1205       break;
1206     default:
1207       break;
1208     }
1209   }
1210 };
1211 
1212 class AMDGPUAsmParser : public MCTargetAsmParser {
1213   MCAsmParser &Parser;
1214 
1215   // Number of extra operands parsed after the first optional operand.
1216   // This may be necessary to skip hardcoded mandatory operands.
1217   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1218 
1219   unsigned ForcedEncodingSize = 0;
1220   bool ForcedDPP = false;
1221   bool ForcedSDWA = false;
1222   KernelScopeInfo KernelScope;
1223   unsigned CPolSeen;
1224 
1225   /// @name Auto-generated Match Functions
1226   /// {
1227 
1228 #define GET_ASSEMBLER_HEADER
1229 #include "AMDGPUGenAsmMatcher.inc"
1230 
1231   /// }
1232 
1233 private:
1234   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1235   bool OutOfRangeError(SMRange Range);
1236   /// Calculate VGPR/SGPR blocks required for given target, reserved
1237   /// registers, and user-specified NextFreeXGPR values.
1238   ///
1239   /// \param Features [in] Target features, used for bug corrections.
1240   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1241   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1242   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1243   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1244   /// descriptor field, if valid.
1245   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1246   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1247   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1248   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1249   /// \param VGPRBlocks [out] Result VGPR block count.
1250   /// \param SGPRBlocks [out] Result SGPR block count.
1251   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1252                           bool FlatScrUsed, bool XNACKUsed,
1253                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1254                           SMRange VGPRRange, unsigned NextFreeSGPR,
1255                           SMRange SGPRRange, unsigned &VGPRBlocks,
1256                           unsigned &SGPRBlocks);
1257   bool ParseDirectiveAMDGCNTarget();
1258   bool ParseDirectiveAMDHSAKernel();
1259   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1260   bool ParseDirectiveHSACodeObjectVersion();
1261   bool ParseDirectiveHSACodeObjectISA();
1262   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1263   bool ParseDirectiveAMDKernelCodeT();
1264   // TODO: Possibly make subtargetHasRegister const.
1265   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1266   bool ParseDirectiveAMDGPUHsaKernel();
1267 
1268   bool ParseDirectiveISAVersion();
1269   bool ParseDirectiveHSAMetadata();
1270   bool ParseDirectivePALMetadataBegin();
1271   bool ParseDirectivePALMetadata();
1272   bool ParseDirectiveAMDGPULDS();
1273 
1274   /// Common code to parse out a block of text (typically YAML) between start and
1275   /// end directives.
1276   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1277                            const char *AssemblerDirectiveEnd,
1278                            std::string &CollectString);
1279 
1280   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1281                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1282   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1283                            unsigned &RegNum, unsigned &RegWidth,
1284                            bool RestoreOnFailure = false);
1285   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1286                            unsigned &RegNum, unsigned &RegWidth,
1287                            SmallVectorImpl<AsmToken> &Tokens);
1288   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1289                            unsigned &RegWidth,
1290                            SmallVectorImpl<AsmToken> &Tokens);
1291   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1292                            unsigned &RegWidth,
1293                            SmallVectorImpl<AsmToken> &Tokens);
1294   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1295                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1296   bool ParseRegRange(unsigned& Num, unsigned& Width);
1297   unsigned getRegularReg(RegisterKind RegKind,
1298                          unsigned RegNum,
1299                          unsigned RegWidth,
1300                          SMLoc Loc);
1301 
1302   bool isRegister();
1303   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1304   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1305   void initializeGprCountSymbol(RegisterKind RegKind);
1306   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1307                              unsigned RegWidth);
1308   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1309                     bool IsAtomic, bool IsLds = false);
1310   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1311                  bool IsGdsHardcoded);
1312 
1313 public:
1314   enum AMDGPUMatchResultTy {
1315     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1316   };
1317   enum OperandMode {
1318     OperandMode_Default,
1319     OperandMode_NSA,
1320   };
1321 
1322   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1323 
1324   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1325                const MCInstrInfo &MII,
1326                const MCTargetOptions &Options)
1327       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1328     MCAsmParserExtension::Initialize(Parser);
1329 
1330     if (getFeatureBits().none()) {
1331       // Set default features.
1332       copySTI().ToggleFeature("southern-islands");
1333     }
1334 
1335     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1336 
1337     {
1338       // TODO: make those pre-defined variables read-only.
1339       // Currently there is none suitable machinery in the core llvm-mc for this.
1340       // MCSymbol::isRedefinable is intended for another purpose, and
1341       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1342       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1343       MCContext &Ctx = getContext();
1344       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1345         MCSymbol *Sym =
1346             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1347         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1348         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1349         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1350         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1351         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1352       } else {
1353         MCSymbol *Sym =
1354             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1355         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1356         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1357         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1358         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1359         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1360       }
1361       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1362         initializeGprCountSymbol(IS_VGPR);
1363         initializeGprCountSymbol(IS_SGPR);
1364       } else
1365         KernelScope.initialize(getContext());
1366     }
1367   }
1368 
1369   bool hasMIMG_R128() const {
1370     return AMDGPU::hasMIMG_R128(getSTI());
1371   }
1372 
1373   bool hasPackedD16() const {
1374     return AMDGPU::hasPackedD16(getSTI());
1375   }
1376 
1377   bool hasGFX10A16() const {
1378     return AMDGPU::hasGFX10A16(getSTI());
1379   }
1380 
1381   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1382 
1383   bool isSI() const {
1384     return AMDGPU::isSI(getSTI());
1385   }
1386 
1387   bool isCI() const {
1388     return AMDGPU::isCI(getSTI());
1389   }
1390 
1391   bool isVI() const {
1392     return AMDGPU::isVI(getSTI());
1393   }
1394 
1395   bool isGFX9() const {
1396     return AMDGPU::isGFX9(getSTI());
1397   }
1398 
1399   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1400   bool isGFX90A() const {
1401     return AMDGPU::isGFX90A(getSTI());
1402   }
1403 
1404   bool isGFX940() const {
1405     return AMDGPU::isGFX940(getSTI());
1406   }
1407 
1408   bool isGFX9Plus() const {
1409     return AMDGPU::isGFX9Plus(getSTI());
1410   }
1411 
1412   bool isGFX10() const {
1413     return AMDGPU::isGFX10(getSTI());
1414   }
1415 
1416   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1417 
1418   bool isGFX10_BEncoding() const {
1419     return AMDGPU::isGFX10_BEncoding(getSTI());
1420   }
1421 
1422   bool hasInv2PiInlineImm() const {
1423     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1424   }
1425 
1426   bool hasFlatOffsets() const {
1427     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1428   }
1429 
1430   bool hasArchitectedFlatScratch() const {
1431     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1432   }
1433 
1434   bool hasSGPR102_SGPR103() const {
1435     return !isVI() && !isGFX9();
1436   }
1437 
1438   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1439 
1440   bool hasIntClamp() const {
1441     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1442   }
1443 
1444   AMDGPUTargetStreamer &getTargetStreamer() {
1445     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1446     return static_cast<AMDGPUTargetStreamer &>(TS);
1447   }
1448 
1449   const MCRegisterInfo *getMRI() const {
1450     // We need this const_cast because for some reason getContext() is not const
1451     // in MCAsmParser.
1452     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1453   }
1454 
1455   const MCInstrInfo *getMII() const {
1456     return &MII;
1457   }
1458 
1459   const FeatureBitset &getFeatureBits() const {
1460     return getSTI().getFeatureBits();
1461   }
1462 
1463   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1464   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1465   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1466 
1467   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1468   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1469   bool isForcedDPP() const { return ForcedDPP; }
1470   bool isForcedSDWA() const { return ForcedSDWA; }
1471   ArrayRef<unsigned> getMatchedVariants() const;
1472   StringRef getMatchedVariantName() const;
1473 
1474   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1475   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1476                      bool RestoreOnFailure);
1477   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1478   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1479                                         SMLoc &EndLoc) override;
1480   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1481   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1482                                       unsigned Kind) override;
1483   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1484                                OperandVector &Operands, MCStreamer &Out,
1485                                uint64_t &ErrorInfo,
1486                                bool MatchingInlineAsm) override;
1487   bool ParseDirective(AsmToken DirectiveID) override;
1488   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1489                                     OperandMode Mode = OperandMode_Default);
1490   StringRef parseMnemonicSuffix(StringRef Name);
1491   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1492                         SMLoc NameLoc, OperandVector &Operands) override;
1493   //bool ProcessInstruction(MCInst &Inst);
1494 
1495   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1496 
1497   OperandMatchResultTy
1498   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1499                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1500                      bool (*ConvertResult)(int64_t &) = nullptr);
1501 
1502   OperandMatchResultTy
1503   parseOperandArrayWithPrefix(const char *Prefix,
1504                               OperandVector &Operands,
1505                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1506                               bool (*ConvertResult)(int64_t&) = nullptr);
1507 
1508   OperandMatchResultTy
1509   parseNamedBit(StringRef Name, OperandVector &Operands,
1510                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1511   OperandMatchResultTy parseCPol(OperandVector &Operands);
1512   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1513                                              StringRef &Value,
1514                                              SMLoc &StringLoc);
1515 
1516   bool isModifier();
1517   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1518   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1519   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1520   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1521   bool parseSP3NegModifier();
1522   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1523   OperandMatchResultTy parseReg(OperandVector &Operands);
1524   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1525   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1526   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1527   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1528   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1529   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1530   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1531   OperandMatchResultTy parseUfmt(int64_t &Format);
1532   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1533   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1534   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1535   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1536   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1537   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1538   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1539 
1540   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1541   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1542   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1543   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1544 
1545   bool parseCnt(int64_t &IntVal);
1546   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1547 
1548   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1549   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1550   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1551 
1552   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1553 
1554 private:
1555   struct OperandInfoTy {
1556     SMLoc Loc;
1557     int64_t Id;
1558     bool IsSymbolic = false;
1559     bool IsDefined = false;
1560 
1561     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1562   };
1563 
1564   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1565   bool validateSendMsg(const OperandInfoTy &Msg,
1566                        const OperandInfoTy &Op,
1567                        const OperandInfoTy &Stream);
1568 
1569   bool parseHwregBody(OperandInfoTy &HwReg,
1570                       OperandInfoTy &Offset,
1571                       OperandInfoTy &Width);
1572   bool validateHwreg(const OperandInfoTy &HwReg,
1573                      const OperandInfoTy &Offset,
1574                      const OperandInfoTy &Width);
1575 
1576   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1577   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1578   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1579 
1580   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1581                       const OperandVector &Operands) const;
1582   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1583   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1584   SMLoc getLitLoc(const OperandVector &Operands) const;
1585   SMLoc getConstLoc(const OperandVector &Operands) const;
1586 
1587   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1588   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1589   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1590   bool validateSOPLiteral(const MCInst &Inst) const;
1591   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1592   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1593   bool validateIntClampSupported(const MCInst &Inst);
1594   bool validateMIMGAtomicDMask(const MCInst &Inst);
1595   bool validateMIMGGatherDMask(const MCInst &Inst);
1596   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1597   Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1598   bool validateMIMGAddrSize(const MCInst &Inst);
1599   bool validateMIMGD16(const MCInst &Inst);
1600   bool validateMIMGDim(const MCInst &Inst);
1601   bool validateMIMGMSAA(const MCInst &Inst);
1602   bool validateOpSel(const MCInst &Inst);
1603   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1604   bool validateVccOperand(unsigned Reg) const;
1605   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1606   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1607   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1608   bool validateAGPRLdSt(const MCInst &Inst) const;
1609   bool validateVGPRAlign(const MCInst &Inst) const;
1610   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1611   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1612   bool validateDivScale(const MCInst &Inst);
1613   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1614                              const SMLoc &IDLoc);
1615   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1616   unsigned getConstantBusLimit(unsigned Opcode) const;
1617   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1618   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1619   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1620 
1621   bool isSupportedMnemo(StringRef Mnemo,
1622                         const FeatureBitset &FBS);
1623   bool isSupportedMnemo(StringRef Mnemo,
1624                         const FeatureBitset &FBS,
1625                         ArrayRef<unsigned> Variants);
1626   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1627 
1628   bool isId(const StringRef Id) const;
1629   bool isId(const AsmToken &Token, const StringRef Id) const;
1630   bool isToken(const AsmToken::TokenKind Kind) const;
1631   bool trySkipId(const StringRef Id);
1632   bool trySkipId(const StringRef Pref, const StringRef Id);
1633   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1634   bool trySkipToken(const AsmToken::TokenKind Kind);
1635   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1636   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1637   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1638 
1639   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1640   AsmToken::TokenKind getTokenKind() const;
1641   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1642   bool parseExpr(OperandVector &Operands);
1643   StringRef getTokenStr() const;
1644   AsmToken peekToken();
1645   AsmToken getToken() const;
1646   SMLoc getLoc() const;
1647   void lex();
1648 
1649 public:
1650   void onBeginOfFile() override;
1651 
1652   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1653   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1654 
1655   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1656   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1657   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1658   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1659   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1660   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1661 
1662   bool parseSwizzleOperand(int64_t &Op,
1663                            const unsigned MinVal,
1664                            const unsigned MaxVal,
1665                            const StringRef ErrMsg,
1666                            SMLoc &Loc);
1667   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1668                             const unsigned MinVal,
1669                             const unsigned MaxVal,
1670                             const StringRef ErrMsg);
1671   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1672   bool parseSwizzleOffset(int64_t &Imm);
1673   bool parseSwizzleMacro(int64_t &Imm);
1674   bool parseSwizzleQuadPerm(int64_t &Imm);
1675   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1676   bool parseSwizzleBroadcast(int64_t &Imm);
1677   bool parseSwizzleSwap(int64_t &Imm);
1678   bool parseSwizzleReverse(int64_t &Imm);
1679 
1680   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1681   int64_t parseGPRIdxMacro();
1682 
1683   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1684   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1685   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1686   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1687 
1688   AMDGPUOperand::Ptr defaultCPol() const;
1689 
1690   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1691   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1692   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1693   AMDGPUOperand::Ptr defaultFlatOffset() const;
1694 
1695   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1696 
1697   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1698                OptionalImmIndexMap &OptionalIdx);
1699   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1700   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1701   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1702   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1703                 OptionalImmIndexMap &OptionalIdx);
1704 
1705   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1706 
1707   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1708                bool IsAtomic = false);
1709   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1710   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1711 
1712   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1713 
1714   bool parseDimId(unsigned &Encoding);
1715   OperandMatchResultTy parseDim(OperandVector &Operands);
1716   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1717   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1718   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1719   int64_t parseDPPCtrlSel(StringRef Ctrl);
1720   int64_t parseDPPCtrlPerm();
1721   AMDGPUOperand::Ptr defaultRowMask() const;
1722   AMDGPUOperand::Ptr defaultBankMask() const;
1723   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1724   AMDGPUOperand::Ptr defaultFI() const;
1725   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1726   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1727 
1728   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1729                                     AMDGPUOperand::ImmTy Type);
1730   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1731   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1732   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1733   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1734   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1735   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1736   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1737                uint64_t BasicInstType,
1738                bool SkipDstVcc = false,
1739                bool SkipSrcVcc = false);
1740 
1741   AMDGPUOperand::Ptr defaultBLGP() const;
1742   AMDGPUOperand::Ptr defaultCBSZ() const;
1743   AMDGPUOperand::Ptr defaultABID() const;
1744 
1745   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1746   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1747 };
1748 
1749 struct OptionalOperand {
1750   const char *Name;
1751   AMDGPUOperand::ImmTy Type;
1752   bool IsBit;
1753   bool (*ConvertResult)(int64_t&);
1754 };
1755 
1756 } // end anonymous namespace
1757 
1758 // May be called with integer type with equivalent bitwidth.
1759 static const fltSemantics *getFltSemantics(unsigned Size) {
1760   switch (Size) {
1761   case 4:
1762     return &APFloat::IEEEsingle();
1763   case 8:
1764     return &APFloat::IEEEdouble();
1765   case 2:
1766     return &APFloat::IEEEhalf();
1767   default:
1768     llvm_unreachable("unsupported fp type");
1769   }
1770 }
1771 
1772 static const fltSemantics *getFltSemantics(MVT VT) {
1773   return getFltSemantics(VT.getSizeInBits() / 8);
1774 }
1775 
1776 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1777   switch (OperandType) {
1778   case AMDGPU::OPERAND_REG_IMM_INT32:
1779   case AMDGPU::OPERAND_REG_IMM_FP32:
1780   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1781   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1782   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1783   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1784   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1785   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1786   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1787   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1788   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1789   case AMDGPU::OPERAND_KIMM32:
1790     return &APFloat::IEEEsingle();
1791   case AMDGPU::OPERAND_REG_IMM_INT64:
1792   case AMDGPU::OPERAND_REG_IMM_FP64:
1793   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1794   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1795   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1796     return &APFloat::IEEEdouble();
1797   case AMDGPU::OPERAND_REG_IMM_INT16:
1798   case AMDGPU::OPERAND_REG_IMM_FP16:
1799   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1800   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1801   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1802   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1803   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1804   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1805   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1806   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1807   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1808   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1809   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1810   case AMDGPU::OPERAND_KIMM16:
1811     return &APFloat::IEEEhalf();
1812   default:
1813     llvm_unreachable("unsupported fp type");
1814   }
1815 }
1816 
1817 //===----------------------------------------------------------------------===//
1818 // Operand
1819 //===----------------------------------------------------------------------===//
1820 
1821 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1822   bool Lost;
1823 
1824   // Convert literal to single precision
1825   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1826                                                APFloat::rmNearestTiesToEven,
1827                                                &Lost);
1828   // We allow precision lost but not overflow or underflow
1829   if (Status != APFloat::opOK &&
1830       Lost &&
1831       ((Status & APFloat::opOverflow)  != 0 ||
1832        (Status & APFloat::opUnderflow) != 0)) {
1833     return false;
1834   }
1835 
1836   return true;
1837 }
1838 
1839 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1840   return isUIntN(Size, Val) || isIntN(Size, Val);
1841 }
1842 
1843 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1844   if (VT.getScalarType() == MVT::i16) {
1845     // FP immediate values are broken.
1846     return isInlinableIntLiteral(Val);
1847   }
1848 
1849   // f16/v2f16 operands work correctly for all values.
1850   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1851 }
1852 
1853 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1854 
1855   // This is a hack to enable named inline values like
1856   // shared_base with both 32-bit and 64-bit operands.
1857   // Note that these values are defined as
1858   // 32-bit operands only.
1859   if (isInlineValue()) {
1860     return true;
1861   }
1862 
1863   if (!isImmTy(ImmTyNone)) {
1864     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1865     return false;
1866   }
1867   // TODO: We should avoid using host float here. It would be better to
1868   // check the float bit values which is what a few other places do.
1869   // We've had bot failures before due to weird NaN support on mips hosts.
1870 
1871   APInt Literal(64, Imm.Val);
1872 
1873   if (Imm.IsFPImm) { // We got fp literal token
1874     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1875       return AMDGPU::isInlinableLiteral64(Imm.Val,
1876                                           AsmParser->hasInv2PiInlineImm());
1877     }
1878 
1879     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1880     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1881       return false;
1882 
1883     if (type.getScalarSizeInBits() == 16) {
1884       return isInlineableLiteralOp16(
1885         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1886         type, AsmParser->hasInv2PiInlineImm());
1887     }
1888 
1889     // Check if single precision literal is inlinable
1890     return AMDGPU::isInlinableLiteral32(
1891       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1892       AsmParser->hasInv2PiInlineImm());
1893   }
1894 
1895   // We got int literal token.
1896   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1897     return AMDGPU::isInlinableLiteral64(Imm.Val,
1898                                         AsmParser->hasInv2PiInlineImm());
1899   }
1900 
1901   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1902     return false;
1903   }
1904 
1905   if (type.getScalarSizeInBits() == 16) {
1906     return isInlineableLiteralOp16(
1907       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1908       type, AsmParser->hasInv2PiInlineImm());
1909   }
1910 
1911   return AMDGPU::isInlinableLiteral32(
1912     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1913     AsmParser->hasInv2PiInlineImm());
1914 }
1915 
1916 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1917   // Check that this immediate can be added as literal
1918   if (!isImmTy(ImmTyNone)) {
1919     return false;
1920   }
1921 
1922   if (!Imm.IsFPImm) {
1923     // We got int literal token.
1924 
1925     if (type == MVT::f64 && hasFPModifiers()) {
1926       // Cannot apply fp modifiers to int literals preserving the same semantics
1927       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1928       // disable these cases.
1929       return false;
1930     }
1931 
1932     unsigned Size = type.getSizeInBits();
1933     if (Size == 64)
1934       Size = 32;
1935 
1936     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1937     // types.
1938     return isSafeTruncation(Imm.Val, Size);
1939   }
1940 
1941   // We got fp literal token
1942   if (type == MVT::f64) { // Expected 64-bit fp operand
1943     // We would set low 64-bits of literal to zeroes but we accept this literals
1944     return true;
1945   }
1946 
1947   if (type == MVT::i64) { // Expected 64-bit int operand
1948     // We don't allow fp literals in 64-bit integer instructions. It is
1949     // unclear how we should encode them.
1950     return false;
1951   }
1952 
1953   // We allow fp literals with f16x2 operands assuming that the specified
1954   // literal goes into the lower half and the upper half is zero. We also
1955   // require that the literal may be losslessly converted to f16.
1956   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1957                      (type == MVT::v2i16)? MVT::i16 :
1958                      (type == MVT::v2f32)? MVT::f32 : type;
1959 
1960   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1961   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1962 }
1963 
1964 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1965   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1966 }
1967 
1968 bool AMDGPUOperand::isVRegWithInputMods() const {
1969   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1970          // GFX90A allows DPP on 64-bit operands.
1971          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1972           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1973 }
1974 
1975 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1976   if (AsmParser->isVI())
1977     return isVReg32();
1978   else if (AsmParser->isGFX9Plus())
1979     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1980   else
1981     return false;
1982 }
1983 
1984 bool AMDGPUOperand::isSDWAFP16Operand() const {
1985   return isSDWAOperand(MVT::f16);
1986 }
1987 
1988 bool AMDGPUOperand::isSDWAFP32Operand() const {
1989   return isSDWAOperand(MVT::f32);
1990 }
1991 
1992 bool AMDGPUOperand::isSDWAInt16Operand() const {
1993   return isSDWAOperand(MVT::i16);
1994 }
1995 
1996 bool AMDGPUOperand::isSDWAInt32Operand() const {
1997   return isSDWAOperand(MVT::i32);
1998 }
1999 
2000 bool AMDGPUOperand::isBoolReg() const {
2001   auto FB = AsmParser->getFeatureBits();
2002   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2003                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2004 }
2005 
2006 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2007 {
2008   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2009   assert(Size == 2 || Size == 4 || Size == 8);
2010 
2011   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2012 
2013   if (Imm.Mods.Abs) {
2014     Val &= ~FpSignMask;
2015   }
2016   if (Imm.Mods.Neg) {
2017     Val ^= FpSignMask;
2018   }
2019 
2020   return Val;
2021 }
2022 
2023 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2024   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2025                              Inst.getNumOperands())) {
2026     addLiteralImmOperand(Inst, Imm.Val,
2027                          ApplyModifiers &
2028                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2029   } else {
2030     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2031     Inst.addOperand(MCOperand::createImm(Imm.Val));
2032     setImmKindNone();
2033   }
2034 }
2035 
2036 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2037   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2038   auto OpNum = Inst.getNumOperands();
2039   // Check that this operand accepts literals
2040   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2041 
2042   if (ApplyModifiers) {
2043     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2044     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2045     Val = applyInputFPModifiers(Val, Size);
2046   }
2047 
2048   APInt Literal(64, Val);
2049   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2050 
2051   if (Imm.IsFPImm) { // We got fp literal token
2052     switch (OpTy) {
2053     case AMDGPU::OPERAND_REG_IMM_INT64:
2054     case AMDGPU::OPERAND_REG_IMM_FP64:
2055     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2056     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2057     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2058       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2059                                        AsmParser->hasInv2PiInlineImm())) {
2060         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2061         setImmKindConst();
2062         return;
2063       }
2064 
2065       // Non-inlineable
2066       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2067         // For fp operands we check if low 32 bits are zeros
2068         if (Literal.getLoBits(32) != 0) {
2069           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2070           "Can't encode literal as exact 64-bit floating-point operand. "
2071           "Low 32-bits will be set to zero");
2072         }
2073 
2074         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2075         setImmKindLiteral();
2076         return;
2077       }
2078 
2079       // We don't allow fp literals in 64-bit integer instructions. It is
2080       // unclear how we should encode them. This case should be checked earlier
2081       // in predicate methods (isLiteralImm())
2082       llvm_unreachable("fp literal in 64-bit integer instruction.");
2083 
2084     case AMDGPU::OPERAND_REG_IMM_INT32:
2085     case AMDGPU::OPERAND_REG_IMM_FP32:
2086     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2087     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2088     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2089     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2090     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2091     case AMDGPU::OPERAND_REG_IMM_INT16:
2092     case AMDGPU::OPERAND_REG_IMM_FP16:
2093     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2094     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2095     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2096     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2097     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2098     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2099     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2100     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2101     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2102     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2103     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2104     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2105     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2106     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2107     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2108     case AMDGPU::OPERAND_KIMM32:
2109     case AMDGPU::OPERAND_KIMM16: {
2110       bool lost;
2111       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2112       // Convert literal to single precision
2113       FPLiteral.convert(*getOpFltSemantics(OpTy),
2114                         APFloat::rmNearestTiesToEven, &lost);
2115       // We allow precision lost but not overflow or underflow. This should be
2116       // checked earlier in isLiteralImm()
2117 
2118       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2119       Inst.addOperand(MCOperand::createImm(ImmVal));
2120       setImmKindLiteral();
2121       return;
2122     }
2123     default:
2124       llvm_unreachable("invalid operand size");
2125     }
2126 
2127     return;
2128   }
2129 
2130   // We got int literal token.
2131   // Only sign extend inline immediates.
2132   switch (OpTy) {
2133   case AMDGPU::OPERAND_REG_IMM_INT32:
2134   case AMDGPU::OPERAND_REG_IMM_FP32:
2135   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2136   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2137   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2138   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2139   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2140   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2141   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2142   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2143   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2144   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2145   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2146     if (isSafeTruncation(Val, 32) &&
2147         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2148                                      AsmParser->hasInv2PiInlineImm())) {
2149       Inst.addOperand(MCOperand::createImm(Val));
2150       setImmKindConst();
2151       return;
2152     }
2153 
2154     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2155     setImmKindLiteral();
2156     return;
2157 
2158   case AMDGPU::OPERAND_REG_IMM_INT64:
2159   case AMDGPU::OPERAND_REG_IMM_FP64:
2160   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2161   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2162   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2163     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2164       Inst.addOperand(MCOperand::createImm(Val));
2165       setImmKindConst();
2166       return;
2167     }
2168 
2169     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2170     setImmKindLiteral();
2171     return;
2172 
2173   case AMDGPU::OPERAND_REG_IMM_INT16:
2174   case AMDGPU::OPERAND_REG_IMM_FP16:
2175   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2176   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2177   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2178   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2179   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2180     if (isSafeTruncation(Val, 16) &&
2181         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2182                                      AsmParser->hasInv2PiInlineImm())) {
2183       Inst.addOperand(MCOperand::createImm(Val));
2184       setImmKindConst();
2185       return;
2186     }
2187 
2188     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2189     setImmKindLiteral();
2190     return;
2191 
2192   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2193   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2194   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2195   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2196     assert(isSafeTruncation(Val, 16));
2197     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2198                                         AsmParser->hasInv2PiInlineImm()));
2199 
2200     Inst.addOperand(MCOperand::createImm(Val));
2201     return;
2202   }
2203   case AMDGPU::OPERAND_KIMM32:
2204     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2205     setImmKindNone();
2206     return;
2207   case AMDGPU::OPERAND_KIMM16:
2208     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2209     setImmKindNone();
2210     return;
2211   default:
2212     llvm_unreachable("invalid operand size");
2213   }
2214 }
2215 
2216 template <unsigned Bitwidth>
2217 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2218   APInt Literal(64, Imm.Val);
2219   setImmKindNone();
2220 
2221   if (!Imm.IsFPImm) {
2222     // We got int literal token.
2223     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2224     return;
2225   }
2226 
2227   bool Lost;
2228   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2229   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2230                     APFloat::rmNearestTiesToEven, &Lost);
2231   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2232 }
2233 
2234 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2235   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2236 }
2237 
2238 static bool isInlineValue(unsigned Reg) {
2239   switch (Reg) {
2240   case AMDGPU::SRC_SHARED_BASE:
2241   case AMDGPU::SRC_SHARED_LIMIT:
2242   case AMDGPU::SRC_PRIVATE_BASE:
2243   case AMDGPU::SRC_PRIVATE_LIMIT:
2244   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2245     return true;
2246   case AMDGPU::SRC_VCCZ:
2247   case AMDGPU::SRC_EXECZ:
2248   case AMDGPU::SRC_SCC:
2249     return true;
2250   case AMDGPU::SGPR_NULL:
2251     return true;
2252   default:
2253     return false;
2254   }
2255 }
2256 
2257 bool AMDGPUOperand::isInlineValue() const {
2258   return isRegKind() && ::isInlineValue(getReg());
2259 }
2260 
2261 //===----------------------------------------------------------------------===//
2262 // AsmParser
2263 //===----------------------------------------------------------------------===//
2264 
2265 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2266   if (Is == IS_VGPR) {
2267     switch (RegWidth) {
2268       default: return -1;
2269       case 32:
2270         return AMDGPU::VGPR_32RegClassID;
2271       case 64:
2272         return AMDGPU::VReg_64RegClassID;
2273       case 96:
2274         return AMDGPU::VReg_96RegClassID;
2275       case 128:
2276         return AMDGPU::VReg_128RegClassID;
2277       case 160:
2278         return AMDGPU::VReg_160RegClassID;
2279       case 192:
2280         return AMDGPU::VReg_192RegClassID;
2281       case 224:
2282         return AMDGPU::VReg_224RegClassID;
2283       case 256:
2284         return AMDGPU::VReg_256RegClassID;
2285       case 512:
2286         return AMDGPU::VReg_512RegClassID;
2287       case 1024:
2288         return AMDGPU::VReg_1024RegClassID;
2289     }
2290   } else if (Is == IS_TTMP) {
2291     switch (RegWidth) {
2292       default: return -1;
2293       case 32:
2294         return AMDGPU::TTMP_32RegClassID;
2295       case 64:
2296         return AMDGPU::TTMP_64RegClassID;
2297       case 128:
2298         return AMDGPU::TTMP_128RegClassID;
2299       case 256:
2300         return AMDGPU::TTMP_256RegClassID;
2301       case 512:
2302         return AMDGPU::TTMP_512RegClassID;
2303     }
2304   } else if (Is == IS_SGPR) {
2305     switch (RegWidth) {
2306       default: return -1;
2307       case 32:
2308         return AMDGPU::SGPR_32RegClassID;
2309       case 64:
2310         return AMDGPU::SGPR_64RegClassID;
2311       case 96:
2312         return AMDGPU::SGPR_96RegClassID;
2313       case 128:
2314         return AMDGPU::SGPR_128RegClassID;
2315       case 160:
2316         return AMDGPU::SGPR_160RegClassID;
2317       case 192:
2318         return AMDGPU::SGPR_192RegClassID;
2319       case 224:
2320         return AMDGPU::SGPR_224RegClassID;
2321       case 256:
2322         return AMDGPU::SGPR_256RegClassID;
2323       case 512:
2324         return AMDGPU::SGPR_512RegClassID;
2325     }
2326   } else if (Is == IS_AGPR) {
2327     switch (RegWidth) {
2328       default: return -1;
2329       case 32:
2330         return AMDGPU::AGPR_32RegClassID;
2331       case 64:
2332         return AMDGPU::AReg_64RegClassID;
2333       case 96:
2334         return AMDGPU::AReg_96RegClassID;
2335       case 128:
2336         return AMDGPU::AReg_128RegClassID;
2337       case 160:
2338         return AMDGPU::AReg_160RegClassID;
2339       case 192:
2340         return AMDGPU::AReg_192RegClassID;
2341       case 224:
2342         return AMDGPU::AReg_224RegClassID;
2343       case 256:
2344         return AMDGPU::AReg_256RegClassID;
2345       case 512:
2346         return AMDGPU::AReg_512RegClassID;
2347       case 1024:
2348         return AMDGPU::AReg_1024RegClassID;
2349     }
2350   }
2351   return -1;
2352 }
2353 
2354 static unsigned getSpecialRegForName(StringRef RegName) {
2355   return StringSwitch<unsigned>(RegName)
2356     .Case("exec", AMDGPU::EXEC)
2357     .Case("vcc", AMDGPU::VCC)
2358     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2359     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2360     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2361     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2362     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2363     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2364     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2365     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2366     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2367     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2368     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2369     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2370     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2371     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2372     .Case("m0", AMDGPU::M0)
2373     .Case("vccz", AMDGPU::SRC_VCCZ)
2374     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2375     .Case("execz", AMDGPU::SRC_EXECZ)
2376     .Case("src_execz", AMDGPU::SRC_EXECZ)
2377     .Case("scc", AMDGPU::SRC_SCC)
2378     .Case("src_scc", AMDGPU::SRC_SCC)
2379     .Case("tba", AMDGPU::TBA)
2380     .Case("tma", AMDGPU::TMA)
2381     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2382     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2383     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2384     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2385     .Case("vcc_lo", AMDGPU::VCC_LO)
2386     .Case("vcc_hi", AMDGPU::VCC_HI)
2387     .Case("exec_lo", AMDGPU::EXEC_LO)
2388     .Case("exec_hi", AMDGPU::EXEC_HI)
2389     .Case("tma_lo", AMDGPU::TMA_LO)
2390     .Case("tma_hi", AMDGPU::TMA_HI)
2391     .Case("tba_lo", AMDGPU::TBA_LO)
2392     .Case("tba_hi", AMDGPU::TBA_HI)
2393     .Case("pc", AMDGPU::PC_REG)
2394     .Case("null", AMDGPU::SGPR_NULL)
2395     .Default(AMDGPU::NoRegister);
2396 }
2397 
2398 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2399                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2400   auto R = parseRegister();
2401   if (!R) return true;
2402   assert(R->isReg());
2403   RegNo = R->getReg();
2404   StartLoc = R->getStartLoc();
2405   EndLoc = R->getEndLoc();
2406   return false;
2407 }
2408 
2409 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2410                                     SMLoc &EndLoc) {
2411   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2412 }
2413 
2414 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2415                                                        SMLoc &StartLoc,
2416                                                        SMLoc &EndLoc) {
2417   bool Result =
2418       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2419   bool PendingErrors = getParser().hasPendingError();
2420   getParser().clearPendingErrors();
2421   if (PendingErrors)
2422     return MatchOperand_ParseFail;
2423   if (Result)
2424     return MatchOperand_NoMatch;
2425   return MatchOperand_Success;
2426 }
2427 
2428 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2429                                             RegisterKind RegKind, unsigned Reg1,
2430                                             SMLoc Loc) {
2431   switch (RegKind) {
2432   case IS_SPECIAL:
2433     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2434       Reg = AMDGPU::EXEC;
2435       RegWidth = 64;
2436       return true;
2437     }
2438     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2439       Reg = AMDGPU::FLAT_SCR;
2440       RegWidth = 64;
2441       return true;
2442     }
2443     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2444       Reg = AMDGPU::XNACK_MASK;
2445       RegWidth = 64;
2446       return true;
2447     }
2448     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2449       Reg = AMDGPU::VCC;
2450       RegWidth = 64;
2451       return true;
2452     }
2453     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2454       Reg = AMDGPU::TBA;
2455       RegWidth = 64;
2456       return true;
2457     }
2458     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2459       Reg = AMDGPU::TMA;
2460       RegWidth = 64;
2461       return true;
2462     }
2463     Error(Loc, "register does not fit in the list");
2464     return false;
2465   case IS_VGPR:
2466   case IS_SGPR:
2467   case IS_AGPR:
2468   case IS_TTMP:
2469     if (Reg1 != Reg + RegWidth / 32) {
2470       Error(Loc, "registers in a list must have consecutive indices");
2471       return false;
2472     }
2473     RegWidth += 32;
2474     return true;
2475   default:
2476     llvm_unreachable("unexpected register kind");
2477   }
2478 }
2479 
2480 struct RegInfo {
2481   StringLiteral Name;
2482   RegisterKind Kind;
2483 };
2484 
2485 static constexpr RegInfo RegularRegisters[] = {
2486   {{"v"},    IS_VGPR},
2487   {{"s"},    IS_SGPR},
2488   {{"ttmp"}, IS_TTMP},
2489   {{"acc"},  IS_AGPR},
2490   {{"a"},    IS_AGPR},
2491 };
2492 
2493 static bool isRegularReg(RegisterKind Kind) {
2494   return Kind == IS_VGPR ||
2495          Kind == IS_SGPR ||
2496          Kind == IS_TTMP ||
2497          Kind == IS_AGPR;
2498 }
2499 
2500 static const RegInfo* getRegularRegInfo(StringRef Str) {
2501   for (const RegInfo &Reg : RegularRegisters)
2502     if (Str.startswith(Reg.Name))
2503       return &Reg;
2504   return nullptr;
2505 }
2506 
2507 static bool getRegNum(StringRef Str, unsigned& Num) {
2508   return !Str.getAsInteger(10, Num);
2509 }
2510 
2511 bool
2512 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2513                             const AsmToken &NextToken) const {
2514 
2515   // A list of consecutive registers: [s0,s1,s2,s3]
2516   if (Token.is(AsmToken::LBrac))
2517     return true;
2518 
2519   if (!Token.is(AsmToken::Identifier))
2520     return false;
2521 
2522   // A single register like s0 or a range of registers like s[0:1]
2523 
2524   StringRef Str = Token.getString();
2525   const RegInfo *Reg = getRegularRegInfo(Str);
2526   if (Reg) {
2527     StringRef RegName = Reg->Name;
2528     StringRef RegSuffix = Str.substr(RegName.size());
2529     if (!RegSuffix.empty()) {
2530       unsigned Num;
2531       // A single register with an index: rXX
2532       if (getRegNum(RegSuffix, Num))
2533         return true;
2534     } else {
2535       // A range of registers: r[XX:YY].
2536       if (NextToken.is(AsmToken::LBrac))
2537         return true;
2538     }
2539   }
2540 
2541   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2542 }
2543 
2544 bool
2545 AMDGPUAsmParser::isRegister()
2546 {
2547   return isRegister(getToken(), peekToken());
2548 }
2549 
2550 unsigned
2551 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2552                                unsigned RegNum,
2553                                unsigned RegWidth,
2554                                SMLoc Loc) {
2555 
2556   assert(isRegularReg(RegKind));
2557 
2558   unsigned AlignSize = 1;
2559   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2560     // SGPR and TTMP registers must be aligned.
2561     // Max required alignment is 4 dwords.
2562     AlignSize = std::min(RegWidth / 32, 4u);
2563   }
2564 
2565   if (RegNum % AlignSize != 0) {
2566     Error(Loc, "invalid register alignment");
2567     return AMDGPU::NoRegister;
2568   }
2569 
2570   unsigned RegIdx = RegNum / AlignSize;
2571   int RCID = getRegClass(RegKind, RegWidth);
2572   if (RCID == -1) {
2573     Error(Loc, "invalid or unsupported register size");
2574     return AMDGPU::NoRegister;
2575   }
2576 
2577   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2578   const MCRegisterClass RC = TRI->getRegClass(RCID);
2579   if (RegIdx >= RC.getNumRegs()) {
2580     Error(Loc, "register index is out of range");
2581     return AMDGPU::NoRegister;
2582   }
2583 
2584   return RC.getRegister(RegIdx);
2585 }
2586 
2587 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2588   int64_t RegLo, RegHi;
2589   if (!skipToken(AsmToken::LBrac, "missing register index"))
2590     return false;
2591 
2592   SMLoc FirstIdxLoc = getLoc();
2593   SMLoc SecondIdxLoc;
2594 
2595   if (!parseExpr(RegLo))
2596     return false;
2597 
2598   if (trySkipToken(AsmToken::Colon)) {
2599     SecondIdxLoc = getLoc();
2600     if (!parseExpr(RegHi))
2601       return false;
2602   } else {
2603     RegHi = RegLo;
2604   }
2605 
2606   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2607     return false;
2608 
2609   if (!isUInt<32>(RegLo)) {
2610     Error(FirstIdxLoc, "invalid register index");
2611     return false;
2612   }
2613 
2614   if (!isUInt<32>(RegHi)) {
2615     Error(SecondIdxLoc, "invalid register index");
2616     return false;
2617   }
2618 
2619   if (RegLo > RegHi) {
2620     Error(FirstIdxLoc, "first register index should not exceed second index");
2621     return false;
2622   }
2623 
2624   Num = static_cast<unsigned>(RegLo);
2625   RegWidth = 32 * ((RegHi - RegLo) + 1);
2626   return true;
2627 }
2628 
2629 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2630                                           unsigned &RegNum, unsigned &RegWidth,
2631                                           SmallVectorImpl<AsmToken> &Tokens) {
2632   assert(isToken(AsmToken::Identifier));
2633   unsigned Reg = getSpecialRegForName(getTokenStr());
2634   if (Reg) {
2635     RegNum = 0;
2636     RegWidth = 32;
2637     RegKind = IS_SPECIAL;
2638     Tokens.push_back(getToken());
2639     lex(); // skip register name
2640   }
2641   return Reg;
2642 }
2643 
2644 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2645                                           unsigned &RegNum, unsigned &RegWidth,
2646                                           SmallVectorImpl<AsmToken> &Tokens) {
2647   assert(isToken(AsmToken::Identifier));
2648   StringRef RegName = getTokenStr();
2649   auto Loc = getLoc();
2650 
2651   const RegInfo *RI = getRegularRegInfo(RegName);
2652   if (!RI) {
2653     Error(Loc, "invalid register name");
2654     return AMDGPU::NoRegister;
2655   }
2656 
2657   Tokens.push_back(getToken());
2658   lex(); // skip register name
2659 
2660   RegKind = RI->Kind;
2661   StringRef RegSuffix = RegName.substr(RI->Name.size());
2662   if (!RegSuffix.empty()) {
2663     // Single 32-bit register: vXX.
2664     if (!getRegNum(RegSuffix, RegNum)) {
2665       Error(Loc, "invalid register index");
2666       return AMDGPU::NoRegister;
2667     }
2668     RegWidth = 32;
2669   } else {
2670     // Range of registers: v[XX:YY]. ":YY" is optional.
2671     if (!ParseRegRange(RegNum, RegWidth))
2672       return AMDGPU::NoRegister;
2673   }
2674 
2675   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2676 }
2677 
2678 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2679                                        unsigned &RegWidth,
2680                                        SmallVectorImpl<AsmToken> &Tokens) {
2681   unsigned Reg = AMDGPU::NoRegister;
2682   auto ListLoc = getLoc();
2683 
2684   if (!skipToken(AsmToken::LBrac,
2685                  "expected a register or a list of registers")) {
2686     return AMDGPU::NoRegister;
2687   }
2688 
2689   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2690 
2691   auto Loc = getLoc();
2692   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2693     return AMDGPU::NoRegister;
2694   if (RegWidth != 32) {
2695     Error(Loc, "expected a single 32-bit register");
2696     return AMDGPU::NoRegister;
2697   }
2698 
2699   for (; trySkipToken(AsmToken::Comma); ) {
2700     RegisterKind NextRegKind;
2701     unsigned NextReg, NextRegNum, NextRegWidth;
2702     Loc = getLoc();
2703 
2704     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2705                              NextRegNum, NextRegWidth,
2706                              Tokens)) {
2707       return AMDGPU::NoRegister;
2708     }
2709     if (NextRegWidth != 32) {
2710       Error(Loc, "expected a single 32-bit register");
2711       return AMDGPU::NoRegister;
2712     }
2713     if (NextRegKind != RegKind) {
2714       Error(Loc, "registers in a list must be of the same kind");
2715       return AMDGPU::NoRegister;
2716     }
2717     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2718       return AMDGPU::NoRegister;
2719   }
2720 
2721   if (!skipToken(AsmToken::RBrac,
2722                  "expected a comma or a closing square bracket")) {
2723     return AMDGPU::NoRegister;
2724   }
2725 
2726   if (isRegularReg(RegKind))
2727     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2728 
2729   return Reg;
2730 }
2731 
2732 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2733                                           unsigned &RegNum, unsigned &RegWidth,
2734                                           SmallVectorImpl<AsmToken> &Tokens) {
2735   auto Loc = getLoc();
2736   Reg = AMDGPU::NoRegister;
2737 
2738   if (isToken(AsmToken::Identifier)) {
2739     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2740     if (Reg == AMDGPU::NoRegister)
2741       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2742   } else {
2743     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2744   }
2745 
2746   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2747   if (Reg == AMDGPU::NoRegister) {
2748     assert(Parser.hasPendingError());
2749     return false;
2750   }
2751 
2752   if (!subtargetHasRegister(*TRI, Reg)) {
2753     if (Reg == AMDGPU::SGPR_NULL) {
2754       Error(Loc, "'null' operand is not supported on this GPU");
2755     } else {
2756       Error(Loc, "register not available on this GPU");
2757     }
2758     return false;
2759   }
2760 
2761   return true;
2762 }
2763 
2764 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2765                                           unsigned &RegNum, unsigned &RegWidth,
2766                                           bool RestoreOnFailure /*=false*/) {
2767   Reg = AMDGPU::NoRegister;
2768 
2769   SmallVector<AsmToken, 1> Tokens;
2770   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2771     if (RestoreOnFailure) {
2772       while (!Tokens.empty()) {
2773         getLexer().UnLex(Tokens.pop_back_val());
2774       }
2775     }
2776     return true;
2777   }
2778   return false;
2779 }
2780 
2781 Optional<StringRef>
2782 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2783   switch (RegKind) {
2784   case IS_VGPR:
2785     return StringRef(".amdgcn.next_free_vgpr");
2786   case IS_SGPR:
2787     return StringRef(".amdgcn.next_free_sgpr");
2788   default:
2789     return None;
2790   }
2791 }
2792 
2793 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2794   auto SymbolName = getGprCountSymbolName(RegKind);
2795   assert(SymbolName && "initializing invalid register kind");
2796   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2797   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2798 }
2799 
2800 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2801                                             unsigned DwordRegIndex,
2802                                             unsigned RegWidth) {
2803   // Symbols are only defined for GCN targets
2804   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2805     return true;
2806 
2807   auto SymbolName = getGprCountSymbolName(RegKind);
2808   if (!SymbolName)
2809     return true;
2810   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2811 
2812   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2813   int64_t OldCount;
2814 
2815   if (!Sym->isVariable())
2816     return !Error(getLoc(),
2817                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2818   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2819     return !Error(
2820         getLoc(),
2821         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2822 
2823   if (OldCount <= NewMax)
2824     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2825 
2826   return true;
2827 }
2828 
2829 std::unique_ptr<AMDGPUOperand>
2830 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2831   const auto &Tok = getToken();
2832   SMLoc StartLoc = Tok.getLoc();
2833   SMLoc EndLoc = Tok.getEndLoc();
2834   RegisterKind RegKind;
2835   unsigned Reg, RegNum, RegWidth;
2836 
2837   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2838     return nullptr;
2839   }
2840   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2841     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2842       return nullptr;
2843   } else
2844     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2845   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2846 }
2847 
2848 OperandMatchResultTy
2849 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2850   // TODO: add syntactic sugar for 1/(2*PI)
2851 
2852   assert(!isRegister());
2853   assert(!isModifier());
2854 
2855   const auto& Tok = getToken();
2856   const auto& NextTok = peekToken();
2857   bool IsReal = Tok.is(AsmToken::Real);
2858   SMLoc S = getLoc();
2859   bool Negate = false;
2860 
2861   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2862     lex();
2863     IsReal = true;
2864     Negate = true;
2865   }
2866 
2867   if (IsReal) {
2868     // Floating-point expressions are not supported.
2869     // Can only allow floating-point literals with an
2870     // optional sign.
2871 
2872     StringRef Num = getTokenStr();
2873     lex();
2874 
2875     APFloat RealVal(APFloat::IEEEdouble());
2876     auto roundMode = APFloat::rmNearestTiesToEven;
2877     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2878       return MatchOperand_ParseFail;
2879     }
2880     if (Negate)
2881       RealVal.changeSign();
2882 
2883     Operands.push_back(
2884       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2885                                AMDGPUOperand::ImmTyNone, true));
2886 
2887     return MatchOperand_Success;
2888 
2889   } else {
2890     int64_t IntVal;
2891     const MCExpr *Expr;
2892     SMLoc S = getLoc();
2893 
2894     if (HasSP3AbsModifier) {
2895       // This is a workaround for handling expressions
2896       // as arguments of SP3 'abs' modifier, for example:
2897       //     |1.0|
2898       //     |-1|
2899       //     |1+x|
2900       // This syntax is not compatible with syntax of standard
2901       // MC expressions (due to the trailing '|').
2902       SMLoc EndLoc;
2903       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2904         return MatchOperand_ParseFail;
2905     } else {
2906       if (Parser.parseExpression(Expr))
2907         return MatchOperand_ParseFail;
2908     }
2909 
2910     if (Expr->evaluateAsAbsolute(IntVal)) {
2911       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2912     } else {
2913       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2914     }
2915 
2916     return MatchOperand_Success;
2917   }
2918 
2919   return MatchOperand_NoMatch;
2920 }
2921 
2922 OperandMatchResultTy
2923 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2924   if (!isRegister())
2925     return MatchOperand_NoMatch;
2926 
2927   if (auto R = parseRegister()) {
2928     assert(R->isReg());
2929     Operands.push_back(std::move(R));
2930     return MatchOperand_Success;
2931   }
2932   return MatchOperand_ParseFail;
2933 }
2934 
2935 OperandMatchResultTy
2936 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2937   auto res = parseReg(Operands);
2938   if (res != MatchOperand_NoMatch) {
2939     return res;
2940   } else if (isModifier()) {
2941     return MatchOperand_NoMatch;
2942   } else {
2943     return parseImm(Operands, HasSP3AbsMod);
2944   }
2945 }
2946 
2947 bool
2948 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2949   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2950     const auto &str = Token.getString();
2951     return str == "abs" || str == "neg" || str == "sext";
2952   }
2953   return false;
2954 }
2955 
2956 bool
2957 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2958   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2959 }
2960 
2961 bool
2962 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2963   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2964 }
2965 
2966 bool
2967 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2968   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2969 }
2970 
2971 // Check if this is an operand modifier or an opcode modifier
2972 // which may look like an expression but it is not. We should
2973 // avoid parsing these modifiers as expressions. Currently
2974 // recognized sequences are:
2975 //   |...|
2976 //   abs(...)
2977 //   neg(...)
2978 //   sext(...)
2979 //   -reg
2980 //   -|...|
2981 //   -abs(...)
2982 //   name:...
2983 // Note that simple opcode modifiers like 'gds' may be parsed as
2984 // expressions; this is a special case. See getExpressionAsToken.
2985 //
2986 bool
2987 AMDGPUAsmParser::isModifier() {
2988 
2989   AsmToken Tok = getToken();
2990   AsmToken NextToken[2];
2991   peekTokens(NextToken);
2992 
2993   return isOperandModifier(Tok, NextToken[0]) ||
2994          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2995          isOpcodeModifierWithVal(Tok, NextToken[0]);
2996 }
2997 
2998 // Check if the current token is an SP3 'neg' modifier.
2999 // Currently this modifier is allowed in the following context:
3000 //
3001 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3002 // 2. Before an 'abs' modifier: -abs(...)
3003 // 3. Before an SP3 'abs' modifier: -|...|
3004 //
3005 // In all other cases "-" is handled as a part
3006 // of an expression that follows the sign.
3007 //
3008 // Note: When "-" is followed by an integer literal,
3009 // this is interpreted as integer negation rather
3010 // than a floating-point NEG modifier applied to N.
3011 // Beside being contr-intuitive, such use of floating-point
3012 // NEG modifier would have resulted in different meaning
3013 // of integer literals used with VOP1/2/C and VOP3,
3014 // for example:
3015 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3016 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3017 // Negative fp literals with preceding "-" are
3018 // handled likewise for uniformity
3019 //
3020 bool
3021 AMDGPUAsmParser::parseSP3NegModifier() {
3022 
3023   AsmToken NextToken[2];
3024   peekTokens(NextToken);
3025 
3026   if (isToken(AsmToken::Minus) &&
3027       (isRegister(NextToken[0], NextToken[1]) ||
3028        NextToken[0].is(AsmToken::Pipe) ||
3029        isId(NextToken[0], "abs"))) {
3030     lex();
3031     return true;
3032   }
3033 
3034   return false;
3035 }
3036 
3037 OperandMatchResultTy
3038 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3039                                               bool AllowImm) {
3040   bool Neg, SP3Neg;
3041   bool Abs, SP3Abs;
3042   SMLoc Loc;
3043 
3044   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3045   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3046     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3047     return MatchOperand_ParseFail;
3048   }
3049 
3050   SP3Neg = parseSP3NegModifier();
3051 
3052   Loc = getLoc();
3053   Neg = trySkipId("neg");
3054   if (Neg && SP3Neg) {
3055     Error(Loc, "expected register or immediate");
3056     return MatchOperand_ParseFail;
3057   }
3058   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3059     return MatchOperand_ParseFail;
3060 
3061   Abs = trySkipId("abs");
3062   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3063     return MatchOperand_ParseFail;
3064 
3065   Loc = getLoc();
3066   SP3Abs = trySkipToken(AsmToken::Pipe);
3067   if (Abs && SP3Abs) {
3068     Error(Loc, "expected register or immediate");
3069     return MatchOperand_ParseFail;
3070   }
3071 
3072   OperandMatchResultTy Res;
3073   if (AllowImm) {
3074     Res = parseRegOrImm(Operands, SP3Abs);
3075   } else {
3076     Res = parseReg(Operands);
3077   }
3078   if (Res != MatchOperand_Success) {
3079     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3080   }
3081 
3082   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3083     return MatchOperand_ParseFail;
3084   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3085     return MatchOperand_ParseFail;
3086   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3087     return MatchOperand_ParseFail;
3088 
3089   AMDGPUOperand::Modifiers Mods;
3090   Mods.Abs = Abs || SP3Abs;
3091   Mods.Neg = Neg || SP3Neg;
3092 
3093   if (Mods.hasFPModifiers()) {
3094     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3095     if (Op.isExpr()) {
3096       Error(Op.getStartLoc(), "expected an absolute expression");
3097       return MatchOperand_ParseFail;
3098     }
3099     Op.setModifiers(Mods);
3100   }
3101   return MatchOperand_Success;
3102 }
3103 
3104 OperandMatchResultTy
3105 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3106                                                bool AllowImm) {
3107   bool Sext = trySkipId("sext");
3108   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3109     return MatchOperand_ParseFail;
3110 
3111   OperandMatchResultTy Res;
3112   if (AllowImm) {
3113     Res = parseRegOrImm(Operands);
3114   } else {
3115     Res = parseReg(Operands);
3116   }
3117   if (Res != MatchOperand_Success) {
3118     return Sext? MatchOperand_ParseFail : Res;
3119   }
3120 
3121   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3122     return MatchOperand_ParseFail;
3123 
3124   AMDGPUOperand::Modifiers Mods;
3125   Mods.Sext = Sext;
3126 
3127   if (Mods.hasIntModifiers()) {
3128     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3129     if (Op.isExpr()) {
3130       Error(Op.getStartLoc(), "expected an absolute expression");
3131       return MatchOperand_ParseFail;
3132     }
3133     Op.setModifiers(Mods);
3134   }
3135 
3136   return MatchOperand_Success;
3137 }
3138 
3139 OperandMatchResultTy
3140 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3141   return parseRegOrImmWithFPInputMods(Operands, false);
3142 }
3143 
3144 OperandMatchResultTy
3145 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3146   return parseRegOrImmWithIntInputMods(Operands, false);
3147 }
3148 
3149 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3150   auto Loc = getLoc();
3151   if (trySkipId("off")) {
3152     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3153                                                 AMDGPUOperand::ImmTyOff, false));
3154     return MatchOperand_Success;
3155   }
3156 
3157   if (!isRegister())
3158     return MatchOperand_NoMatch;
3159 
3160   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3161   if (Reg) {
3162     Operands.push_back(std::move(Reg));
3163     return MatchOperand_Success;
3164   }
3165 
3166   return MatchOperand_ParseFail;
3167 
3168 }
3169 
3170 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3171   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3172 
3173   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3174       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3175       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3176       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3177     return Match_InvalidOperand;
3178 
3179   if ((TSFlags & SIInstrFlags::VOP3) &&
3180       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3181       getForcedEncodingSize() != 64)
3182     return Match_PreferE32;
3183 
3184   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3185       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3186     // v_mac_f32/16 allow only dst_sel == DWORD;
3187     auto OpNum =
3188         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3189     const auto &Op = Inst.getOperand(OpNum);
3190     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3191       return Match_InvalidOperand;
3192     }
3193   }
3194 
3195   return Match_Success;
3196 }
3197 
3198 static ArrayRef<unsigned> getAllVariants() {
3199   static const unsigned Variants[] = {
3200     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3201     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3202   };
3203 
3204   return makeArrayRef(Variants);
3205 }
3206 
3207 // What asm variants we should check
3208 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3209   if (getForcedEncodingSize() == 32) {
3210     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3211     return makeArrayRef(Variants);
3212   }
3213 
3214   if (isForcedVOP3()) {
3215     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3216     return makeArrayRef(Variants);
3217   }
3218 
3219   if (isForcedSDWA()) {
3220     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3221                                         AMDGPUAsmVariants::SDWA9};
3222     return makeArrayRef(Variants);
3223   }
3224 
3225   if (isForcedDPP()) {
3226     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3227     return makeArrayRef(Variants);
3228   }
3229 
3230   return getAllVariants();
3231 }
3232 
3233 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3234   if (getForcedEncodingSize() == 32)
3235     return "e32";
3236 
3237   if (isForcedVOP3())
3238     return "e64";
3239 
3240   if (isForcedSDWA())
3241     return "sdwa";
3242 
3243   if (isForcedDPP())
3244     return "dpp";
3245 
3246   return "";
3247 }
3248 
3249 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3250   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3251   const unsigned Num = Desc.getNumImplicitUses();
3252   for (unsigned i = 0; i < Num; ++i) {
3253     unsigned Reg = Desc.ImplicitUses[i];
3254     switch (Reg) {
3255     case AMDGPU::FLAT_SCR:
3256     case AMDGPU::VCC:
3257     case AMDGPU::VCC_LO:
3258     case AMDGPU::VCC_HI:
3259     case AMDGPU::M0:
3260       return Reg;
3261     default:
3262       break;
3263     }
3264   }
3265   return AMDGPU::NoRegister;
3266 }
3267 
3268 // NB: This code is correct only when used to check constant
3269 // bus limitations because GFX7 support no f16 inline constants.
3270 // Note that there are no cases when a GFX7 opcode violates
3271 // constant bus limitations due to the use of an f16 constant.
3272 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3273                                        unsigned OpIdx) const {
3274   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3275 
3276   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3277     return false;
3278   }
3279 
3280   const MCOperand &MO = Inst.getOperand(OpIdx);
3281 
3282   int64_t Val = MO.getImm();
3283   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3284 
3285   switch (OpSize) { // expected operand size
3286   case 8:
3287     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3288   case 4:
3289     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3290   case 2: {
3291     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3292     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3293         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3294         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3295       return AMDGPU::isInlinableIntLiteral(Val);
3296 
3297     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3298         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3299         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3300       return AMDGPU::isInlinableIntLiteralV216(Val);
3301 
3302     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3303         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3304         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3305       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3306 
3307     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3308   }
3309   default:
3310     llvm_unreachable("invalid operand size");
3311   }
3312 }
3313 
3314 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3315   if (!isGFX10Plus())
3316     return 1;
3317 
3318   switch (Opcode) {
3319   // 64-bit shift instructions can use only one scalar value input
3320   case AMDGPU::V_LSHLREV_B64_e64:
3321   case AMDGPU::V_LSHLREV_B64_gfx10:
3322   case AMDGPU::V_LSHRREV_B64_e64:
3323   case AMDGPU::V_LSHRREV_B64_gfx10:
3324   case AMDGPU::V_ASHRREV_I64_e64:
3325   case AMDGPU::V_ASHRREV_I64_gfx10:
3326   case AMDGPU::V_LSHL_B64_e64:
3327   case AMDGPU::V_LSHR_B64_e64:
3328   case AMDGPU::V_ASHR_I64_e64:
3329     return 1;
3330   default:
3331     return 2;
3332   }
3333 }
3334 
3335 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3336   const MCOperand &MO = Inst.getOperand(OpIdx);
3337   if (MO.isImm()) {
3338     return !isInlineConstant(Inst, OpIdx);
3339   } else if (MO.isReg()) {
3340     auto Reg = MO.getReg();
3341     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3342     auto PReg = mc2PseudoReg(Reg);
3343     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3344   } else {
3345     return true;
3346   }
3347 }
3348 
3349 bool
3350 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3351                                                 const OperandVector &Operands) {
3352   const unsigned Opcode = Inst.getOpcode();
3353   const MCInstrDesc &Desc = MII.get(Opcode);
3354   unsigned LastSGPR = AMDGPU::NoRegister;
3355   unsigned ConstantBusUseCount = 0;
3356   unsigned NumLiterals = 0;
3357   unsigned LiteralSize;
3358 
3359   if (Desc.TSFlags &
3360       (SIInstrFlags::VOPC |
3361        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3362        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3363        SIInstrFlags::SDWA)) {
3364     // Check special imm operands (used by madmk, etc)
3365     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3366       ++NumLiterals;
3367       LiteralSize = 4;
3368     }
3369 
3370     SmallDenseSet<unsigned> SGPRsUsed;
3371     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3372     if (SGPRUsed != AMDGPU::NoRegister) {
3373       SGPRsUsed.insert(SGPRUsed);
3374       ++ConstantBusUseCount;
3375     }
3376 
3377     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3378     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3379     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3380 
3381     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3382 
3383     for (int OpIdx : OpIndices) {
3384       if (OpIdx == -1) break;
3385 
3386       const MCOperand &MO = Inst.getOperand(OpIdx);
3387       if (usesConstantBus(Inst, OpIdx)) {
3388         if (MO.isReg()) {
3389           LastSGPR = mc2PseudoReg(MO.getReg());
3390           // Pairs of registers with a partial intersections like these
3391           //   s0, s[0:1]
3392           //   flat_scratch_lo, flat_scratch
3393           //   flat_scratch_lo, flat_scratch_hi
3394           // are theoretically valid but they are disabled anyway.
3395           // Note that this code mimics SIInstrInfo::verifyInstruction
3396           if (!SGPRsUsed.count(LastSGPR)) {
3397             SGPRsUsed.insert(LastSGPR);
3398             ++ConstantBusUseCount;
3399           }
3400         } else { // Expression or a literal
3401 
3402           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3403             continue; // special operand like VINTERP attr_chan
3404 
3405           // An instruction may use only one literal.
3406           // This has been validated on the previous step.
3407           // See validateVOPLiteral.
3408           // This literal may be used as more than one operand.
3409           // If all these operands are of the same size,
3410           // this literal counts as one scalar value.
3411           // Otherwise it counts as 2 scalar values.
3412           // See "GFX10 Shader Programming", section 3.6.2.3.
3413 
3414           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3415           if (Size < 4) Size = 4;
3416 
3417           if (NumLiterals == 0) {
3418             NumLiterals = 1;
3419             LiteralSize = Size;
3420           } else if (LiteralSize != Size) {
3421             NumLiterals = 2;
3422           }
3423         }
3424       }
3425     }
3426   }
3427   ConstantBusUseCount += NumLiterals;
3428 
3429   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3430     return true;
3431 
3432   SMLoc LitLoc = getLitLoc(Operands);
3433   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3434   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3435   Error(Loc, "invalid operand (violates constant bus restrictions)");
3436   return false;
3437 }
3438 
3439 bool
3440 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3441                                                  const OperandVector &Operands) {
3442   const unsigned Opcode = Inst.getOpcode();
3443   const MCInstrDesc &Desc = MII.get(Opcode);
3444 
3445   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3446   if (DstIdx == -1 ||
3447       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3448     return true;
3449   }
3450 
3451   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3452 
3453   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3454   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3455   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3456 
3457   assert(DstIdx != -1);
3458   const MCOperand &Dst = Inst.getOperand(DstIdx);
3459   assert(Dst.isReg());
3460 
3461   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3462 
3463   for (int SrcIdx : SrcIndices) {
3464     if (SrcIdx == -1) break;
3465     const MCOperand &Src = Inst.getOperand(SrcIdx);
3466     if (Src.isReg()) {
3467       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3468         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3469         Error(getRegLoc(SrcReg, Operands),
3470           "destination must be different than all sources");
3471         return false;
3472       }
3473     }
3474   }
3475 
3476   return true;
3477 }
3478 
3479 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3480 
3481   const unsigned Opc = Inst.getOpcode();
3482   const MCInstrDesc &Desc = MII.get(Opc);
3483 
3484   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3485     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3486     assert(ClampIdx != -1);
3487     return Inst.getOperand(ClampIdx).getImm() == 0;
3488   }
3489 
3490   return true;
3491 }
3492 
3493 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3494 
3495   const unsigned Opc = Inst.getOpcode();
3496   const MCInstrDesc &Desc = MII.get(Opc);
3497 
3498   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3499     return None;
3500 
3501   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3502   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3503   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3504 
3505   assert(VDataIdx != -1);
3506 
3507   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3508     return None;
3509 
3510   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3511   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3512   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3513   if (DMask == 0)
3514     DMask = 1;
3515 
3516   bool isPackedD16 = false;
3517   unsigned DataSize =
3518     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3519   if (hasPackedD16()) {
3520     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3521     isPackedD16 = D16Idx >= 0;
3522     if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3523       DataSize = (DataSize + 1) / 2;
3524   }
3525 
3526   if ((VDataSize / 4) == DataSize + TFESize)
3527     return None;
3528 
3529   return StringRef(isPackedD16
3530                        ? "image data size does not match dmask, d16 and tfe"
3531                        : "image data size does not match dmask and tfe");
3532 }
3533 
3534 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3535   const unsigned Opc = Inst.getOpcode();
3536   const MCInstrDesc &Desc = MII.get(Opc);
3537 
3538   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3539     return true;
3540 
3541   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3542 
3543   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3544       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3545   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3546   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3547   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3548   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3549 
3550   assert(VAddr0Idx != -1);
3551   assert(SrsrcIdx != -1);
3552   assert(SrsrcIdx > VAddr0Idx);
3553 
3554   if (DimIdx == -1)
3555     return true; // intersect_ray
3556 
3557   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3558   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3559   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3560   unsigned ActualAddrSize =
3561       IsNSA ? SrsrcIdx - VAddr0Idx
3562             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3563   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3564 
3565   unsigned ExpectedAddrSize =
3566       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3567 
3568   if (!IsNSA) {
3569     if (ExpectedAddrSize > 8)
3570       ExpectedAddrSize = 16;
3571 
3572     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3573     // This provides backward compatibility for assembly created
3574     // before 160b/192b/224b types were directly supported.
3575     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3576       return true;
3577   }
3578 
3579   return ActualAddrSize == ExpectedAddrSize;
3580 }
3581 
3582 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3583 
3584   const unsigned Opc = Inst.getOpcode();
3585   const MCInstrDesc &Desc = MII.get(Opc);
3586 
3587   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3588     return true;
3589   if (!Desc.mayLoad() || !Desc.mayStore())
3590     return true; // Not atomic
3591 
3592   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3593   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3594 
3595   // This is an incomplete check because image_atomic_cmpswap
3596   // may only use 0x3 and 0xf while other atomic operations
3597   // may use 0x1 and 0x3. However these limitations are
3598   // verified when we check that dmask matches dst size.
3599   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3600 }
3601 
3602 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3603 
3604   const unsigned Opc = Inst.getOpcode();
3605   const MCInstrDesc &Desc = MII.get(Opc);
3606 
3607   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3608     return true;
3609 
3610   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3611   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3612 
3613   // GATHER4 instructions use dmask in a different fashion compared to
3614   // other MIMG instructions. The only useful DMASK values are
3615   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3616   // (red,red,red,red) etc.) The ISA document doesn't mention
3617   // this.
3618   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3619 }
3620 
3621 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3622   const unsigned Opc = Inst.getOpcode();
3623   const MCInstrDesc &Desc = MII.get(Opc);
3624 
3625   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3626     return true;
3627 
3628   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3629   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3630       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3631 
3632   if (!BaseOpcode->MSAA)
3633     return true;
3634 
3635   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3636   assert(DimIdx != -1);
3637 
3638   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3639   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3640 
3641   return DimInfo->MSAA;
3642 }
3643 
3644 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3645 {
3646   switch (Opcode) {
3647   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3648   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3649   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3650     return true;
3651   default:
3652     return false;
3653   }
3654 }
3655 
3656 // movrels* opcodes should only allow VGPRS as src0.
3657 // This is specified in .td description for vop1/vop3,
3658 // but sdwa is handled differently. See isSDWAOperand.
3659 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3660                                       const OperandVector &Operands) {
3661 
3662   const unsigned Opc = Inst.getOpcode();
3663   const MCInstrDesc &Desc = MII.get(Opc);
3664 
3665   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3666     return true;
3667 
3668   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3669   assert(Src0Idx != -1);
3670 
3671   SMLoc ErrLoc;
3672   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3673   if (Src0.isReg()) {
3674     auto Reg = mc2PseudoReg(Src0.getReg());
3675     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3676     if (!isSGPR(Reg, TRI))
3677       return true;
3678     ErrLoc = getRegLoc(Reg, Operands);
3679   } else {
3680     ErrLoc = getConstLoc(Operands);
3681   }
3682 
3683   Error(ErrLoc, "source operand must be a VGPR");
3684   return false;
3685 }
3686 
3687 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3688                                           const OperandVector &Operands) {
3689 
3690   const unsigned Opc = Inst.getOpcode();
3691 
3692   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3693     return true;
3694 
3695   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3696   assert(Src0Idx != -1);
3697 
3698   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3699   if (!Src0.isReg())
3700     return true;
3701 
3702   auto Reg = mc2PseudoReg(Src0.getReg());
3703   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3704   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3705     Error(getRegLoc(Reg, Operands),
3706           "source operand must be either a VGPR or an inline constant");
3707     return false;
3708   }
3709 
3710   return true;
3711 }
3712 
3713 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3714                                    const OperandVector &Operands) {
3715   const unsigned Opc = Inst.getOpcode();
3716   const MCInstrDesc &Desc = MII.get(Opc);
3717 
3718   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3719     return true;
3720 
3721   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3722   if (Src2Idx == -1)
3723     return true;
3724 
3725   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3726   if (!Src2.isReg())
3727     return true;
3728 
3729   MCRegister Src2Reg = Src2.getReg();
3730   MCRegister DstReg = Inst.getOperand(0).getReg();
3731   if (Src2Reg == DstReg)
3732     return true;
3733 
3734   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3735   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3736     return true;
3737 
3738   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3739     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3740           "source 2 operand must not partially overlap with dst");
3741     return false;
3742   }
3743 
3744   return true;
3745 }
3746 
3747 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3748   switch (Inst.getOpcode()) {
3749   default:
3750     return true;
3751   case V_DIV_SCALE_F32_gfx6_gfx7:
3752   case V_DIV_SCALE_F32_vi:
3753   case V_DIV_SCALE_F32_gfx10:
3754   case V_DIV_SCALE_F64_gfx6_gfx7:
3755   case V_DIV_SCALE_F64_vi:
3756   case V_DIV_SCALE_F64_gfx10:
3757     break;
3758   }
3759 
3760   // TODO: Check that src0 = src1 or src2.
3761 
3762   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3763                     AMDGPU::OpName::src2_modifiers,
3764                     AMDGPU::OpName::src2_modifiers}) {
3765     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3766             .getImm() &
3767         SISrcMods::ABS) {
3768       return false;
3769     }
3770   }
3771 
3772   return true;
3773 }
3774 
3775 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3776 
3777   const unsigned Opc = Inst.getOpcode();
3778   const MCInstrDesc &Desc = MII.get(Opc);
3779 
3780   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3781     return true;
3782 
3783   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3784   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3785     if (isCI() || isSI())
3786       return false;
3787   }
3788 
3789   return true;
3790 }
3791 
3792 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3793   const unsigned Opc = Inst.getOpcode();
3794   const MCInstrDesc &Desc = MII.get(Opc);
3795 
3796   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3797     return true;
3798 
3799   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3800   if (DimIdx < 0)
3801     return true;
3802 
3803   long Imm = Inst.getOperand(DimIdx).getImm();
3804   if (Imm < 0 || Imm >= 8)
3805     return false;
3806 
3807   return true;
3808 }
3809 
3810 static bool IsRevOpcode(const unsigned Opcode)
3811 {
3812   switch (Opcode) {
3813   case AMDGPU::V_SUBREV_F32_e32:
3814   case AMDGPU::V_SUBREV_F32_e64:
3815   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3816   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3817   case AMDGPU::V_SUBREV_F32_e32_vi:
3818   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3819   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3820   case AMDGPU::V_SUBREV_F32_e64_vi:
3821 
3822   case AMDGPU::V_SUBREV_CO_U32_e32:
3823   case AMDGPU::V_SUBREV_CO_U32_e64:
3824   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3825   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3826 
3827   case AMDGPU::V_SUBBREV_U32_e32:
3828   case AMDGPU::V_SUBBREV_U32_e64:
3829   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3830   case AMDGPU::V_SUBBREV_U32_e32_vi:
3831   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3832   case AMDGPU::V_SUBBREV_U32_e64_vi:
3833 
3834   case AMDGPU::V_SUBREV_U32_e32:
3835   case AMDGPU::V_SUBREV_U32_e64:
3836   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3837   case AMDGPU::V_SUBREV_U32_e32_vi:
3838   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3839   case AMDGPU::V_SUBREV_U32_e64_vi:
3840 
3841   case AMDGPU::V_SUBREV_F16_e32:
3842   case AMDGPU::V_SUBREV_F16_e64:
3843   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3844   case AMDGPU::V_SUBREV_F16_e32_vi:
3845   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3846   case AMDGPU::V_SUBREV_F16_e64_vi:
3847 
3848   case AMDGPU::V_SUBREV_U16_e32:
3849   case AMDGPU::V_SUBREV_U16_e64:
3850   case AMDGPU::V_SUBREV_U16_e32_vi:
3851   case AMDGPU::V_SUBREV_U16_e64_vi:
3852 
3853   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3854   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3855   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3856 
3857   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3858   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3859 
3860   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3861   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3862 
3863   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3864   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3865 
3866   case AMDGPU::V_LSHRREV_B32_e32:
3867   case AMDGPU::V_LSHRREV_B32_e64:
3868   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3869   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3870   case AMDGPU::V_LSHRREV_B32_e32_vi:
3871   case AMDGPU::V_LSHRREV_B32_e64_vi:
3872   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3873   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3874 
3875   case AMDGPU::V_ASHRREV_I32_e32:
3876   case AMDGPU::V_ASHRREV_I32_e64:
3877   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3878   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3879   case AMDGPU::V_ASHRREV_I32_e32_vi:
3880   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3881   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3882   case AMDGPU::V_ASHRREV_I32_e64_vi:
3883 
3884   case AMDGPU::V_LSHLREV_B32_e32:
3885   case AMDGPU::V_LSHLREV_B32_e64:
3886   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3887   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3888   case AMDGPU::V_LSHLREV_B32_e32_vi:
3889   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3890   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3891   case AMDGPU::V_LSHLREV_B32_e64_vi:
3892 
3893   case AMDGPU::V_LSHLREV_B16_e32:
3894   case AMDGPU::V_LSHLREV_B16_e64:
3895   case AMDGPU::V_LSHLREV_B16_e32_vi:
3896   case AMDGPU::V_LSHLREV_B16_e64_vi:
3897   case AMDGPU::V_LSHLREV_B16_gfx10:
3898 
3899   case AMDGPU::V_LSHRREV_B16_e32:
3900   case AMDGPU::V_LSHRREV_B16_e64:
3901   case AMDGPU::V_LSHRREV_B16_e32_vi:
3902   case AMDGPU::V_LSHRREV_B16_e64_vi:
3903   case AMDGPU::V_LSHRREV_B16_gfx10:
3904 
3905   case AMDGPU::V_ASHRREV_I16_e32:
3906   case AMDGPU::V_ASHRREV_I16_e64:
3907   case AMDGPU::V_ASHRREV_I16_e32_vi:
3908   case AMDGPU::V_ASHRREV_I16_e64_vi:
3909   case AMDGPU::V_ASHRREV_I16_gfx10:
3910 
3911   case AMDGPU::V_LSHLREV_B64_e64:
3912   case AMDGPU::V_LSHLREV_B64_gfx10:
3913   case AMDGPU::V_LSHLREV_B64_vi:
3914 
3915   case AMDGPU::V_LSHRREV_B64_e64:
3916   case AMDGPU::V_LSHRREV_B64_gfx10:
3917   case AMDGPU::V_LSHRREV_B64_vi:
3918 
3919   case AMDGPU::V_ASHRREV_I64_e64:
3920   case AMDGPU::V_ASHRREV_I64_gfx10:
3921   case AMDGPU::V_ASHRREV_I64_vi:
3922 
3923   case AMDGPU::V_PK_LSHLREV_B16:
3924   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3925   case AMDGPU::V_PK_LSHLREV_B16_vi:
3926 
3927   case AMDGPU::V_PK_LSHRREV_B16:
3928   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3929   case AMDGPU::V_PK_LSHRREV_B16_vi:
3930   case AMDGPU::V_PK_ASHRREV_I16:
3931   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3932   case AMDGPU::V_PK_ASHRREV_I16_vi:
3933     return true;
3934   default:
3935     return false;
3936   }
3937 }
3938 
3939 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3940 
3941   using namespace SIInstrFlags;
3942   const unsigned Opcode = Inst.getOpcode();
3943   const MCInstrDesc &Desc = MII.get(Opcode);
3944 
3945   // lds_direct register is defined so that it can be used
3946   // with 9-bit operands only. Ignore encodings which do not accept these.
3947   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3948   if ((Desc.TSFlags & Enc) == 0)
3949     return None;
3950 
3951   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3952     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3953     if (SrcIdx == -1)
3954       break;
3955     const auto &Src = Inst.getOperand(SrcIdx);
3956     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3957 
3958       if (isGFX90A())
3959         return StringRef("lds_direct is not supported on this GPU");
3960 
3961       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3962         return StringRef("lds_direct cannot be used with this instruction");
3963 
3964       if (SrcName != OpName::src0)
3965         return StringRef("lds_direct may be used as src0 only");
3966     }
3967   }
3968 
3969   return None;
3970 }
3971 
3972 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3973   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3974     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3975     if (Op.isFlatOffset())
3976       return Op.getStartLoc();
3977   }
3978   return getLoc();
3979 }
3980 
3981 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3982                                          const OperandVector &Operands) {
3983   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3984   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3985     return true;
3986 
3987   auto Opcode = Inst.getOpcode();
3988   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3989   assert(OpNum != -1);
3990 
3991   const auto &Op = Inst.getOperand(OpNum);
3992   if (!hasFlatOffsets() && Op.getImm() != 0) {
3993     Error(getFlatOffsetLoc(Operands),
3994           "flat offset modifier is not supported on this GPU");
3995     return false;
3996   }
3997 
3998   // For FLAT segment the offset must be positive;
3999   // MSB is ignored and forced to zero.
4000   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
4001     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4002     if (!isIntN(OffsetSize, Op.getImm())) {
4003       Error(getFlatOffsetLoc(Operands),
4004             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4005       return false;
4006     }
4007   } else {
4008     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4009     if (!isUIntN(OffsetSize, Op.getImm())) {
4010       Error(getFlatOffsetLoc(Operands),
4011             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4012       return false;
4013     }
4014   }
4015 
4016   return true;
4017 }
4018 
4019 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4020   // Start with second operand because SMEM Offset cannot be dst or src0.
4021   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4022     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4023     if (Op.isSMEMOffset())
4024       return Op.getStartLoc();
4025   }
4026   return getLoc();
4027 }
4028 
4029 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4030                                          const OperandVector &Operands) {
4031   if (isCI() || isSI())
4032     return true;
4033 
4034   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4035   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4036     return true;
4037 
4038   auto Opcode = Inst.getOpcode();
4039   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4040   if (OpNum == -1)
4041     return true;
4042 
4043   const auto &Op = Inst.getOperand(OpNum);
4044   if (!Op.isImm())
4045     return true;
4046 
4047   uint64_t Offset = Op.getImm();
4048   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4049   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4050       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4051     return true;
4052 
4053   Error(getSMEMOffsetLoc(Operands),
4054         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4055                                "expected a 21-bit signed offset");
4056 
4057   return false;
4058 }
4059 
4060 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4061   unsigned Opcode = Inst.getOpcode();
4062   const MCInstrDesc &Desc = MII.get(Opcode);
4063   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4064     return true;
4065 
4066   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4067   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4068 
4069   const int OpIndices[] = { Src0Idx, Src1Idx };
4070 
4071   unsigned NumExprs = 0;
4072   unsigned NumLiterals = 0;
4073   uint32_t LiteralValue;
4074 
4075   for (int OpIdx : OpIndices) {
4076     if (OpIdx == -1) break;
4077 
4078     const MCOperand &MO = Inst.getOperand(OpIdx);
4079     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4080     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4081       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4082         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4083         if (NumLiterals == 0 || LiteralValue != Value) {
4084           LiteralValue = Value;
4085           ++NumLiterals;
4086         }
4087       } else if (MO.isExpr()) {
4088         ++NumExprs;
4089       }
4090     }
4091   }
4092 
4093   return NumLiterals + NumExprs <= 1;
4094 }
4095 
4096 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4097   const unsigned Opc = Inst.getOpcode();
4098   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4099       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4100     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4101     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4102 
4103     if (OpSel & ~3)
4104       return false;
4105   }
4106 
4107   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4108     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4109     if (OpSelIdx != -1) {
4110       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4111         return false;
4112     }
4113     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4114     if (OpSelHiIdx != -1) {
4115       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4116         return false;
4117     }
4118   }
4119 
4120   return true;
4121 }
4122 
4123 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4124                                   const OperandVector &Operands) {
4125   const unsigned Opc = Inst.getOpcode();
4126   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4127   if (DppCtrlIdx < 0)
4128     return true;
4129   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4130 
4131   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4132     // DPP64 is supported for row_newbcast only.
4133     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4134     if (Src0Idx >= 0 &&
4135         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4136       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4137       Error(S, "64 bit dpp only supports row_newbcast");
4138       return false;
4139     }
4140   }
4141 
4142   return true;
4143 }
4144 
4145 // Check if VCC register matches wavefront size
4146 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4147   auto FB = getFeatureBits();
4148   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4149     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4150 }
4151 
4152 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4153 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4154                                          const OperandVector &Operands) {
4155   unsigned Opcode = Inst.getOpcode();
4156   const MCInstrDesc &Desc = MII.get(Opcode);
4157   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4158   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4159       ImmIdx == -1)
4160     return true;
4161 
4162   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4163   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4164   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4165 
4166   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4167 
4168   unsigned NumExprs = 0;
4169   unsigned NumLiterals = 0;
4170   uint32_t LiteralValue;
4171 
4172   for (int OpIdx : OpIndices) {
4173     if (OpIdx == -1)
4174       continue;
4175 
4176     const MCOperand &MO = Inst.getOperand(OpIdx);
4177     if (!MO.isImm() && !MO.isExpr())
4178       continue;
4179     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4180       continue;
4181 
4182     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4183         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4184       Error(getConstLoc(Operands),
4185             "inline constants are not allowed for this operand");
4186       return false;
4187     }
4188 
4189     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4190       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4191       if (NumLiterals == 0 || LiteralValue != Value) {
4192         LiteralValue = Value;
4193         ++NumLiterals;
4194       }
4195     } else if (MO.isExpr()) {
4196       ++NumExprs;
4197     }
4198   }
4199   NumLiterals += NumExprs;
4200 
4201   if (!NumLiterals)
4202     return true;
4203 
4204   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4205     Error(getLitLoc(Operands), "literal operands are not supported");
4206     return false;
4207   }
4208 
4209   if (NumLiterals > 1) {
4210     Error(getLitLoc(Operands), "only one literal operand is allowed");
4211     return false;
4212   }
4213 
4214   return true;
4215 }
4216 
4217 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4218 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4219                          const MCRegisterInfo *MRI) {
4220   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4221   if (OpIdx < 0)
4222     return -1;
4223 
4224   const MCOperand &Op = Inst.getOperand(OpIdx);
4225   if (!Op.isReg())
4226     return -1;
4227 
4228   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4229   auto Reg = Sub ? Sub : Op.getReg();
4230   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4231   return AGPR32.contains(Reg) ? 1 : 0;
4232 }
4233 
4234 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4235   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4236   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4237                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4238                   SIInstrFlags::DS)) == 0)
4239     return true;
4240 
4241   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4242                                                       : AMDGPU::OpName::vdata;
4243 
4244   const MCRegisterInfo *MRI = getMRI();
4245   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4246   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4247 
4248   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4249     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4250     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4251       return false;
4252   }
4253 
4254   auto FB = getFeatureBits();
4255   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4256     if (DataAreg < 0 || DstAreg < 0)
4257       return true;
4258     return DstAreg == DataAreg;
4259   }
4260 
4261   return DstAreg < 1 && DataAreg < 1;
4262 }
4263 
4264 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4265   auto FB = getFeatureBits();
4266   if (!FB[AMDGPU::FeatureGFX90AInsts])
4267     return true;
4268 
4269   const MCRegisterInfo *MRI = getMRI();
4270   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4271   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4272   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4273     const MCOperand &Op = Inst.getOperand(I);
4274     if (!Op.isReg())
4275       continue;
4276 
4277     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4278     if (!Sub)
4279       continue;
4280 
4281     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4282       return false;
4283     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4284       return false;
4285   }
4286 
4287   return true;
4288 }
4289 
4290 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4291   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4292     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4293     if (Op.isBLGP())
4294       return Op.getStartLoc();
4295   }
4296   return SMLoc();
4297 }
4298 
4299 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4300                                    const OperandVector &Operands) {
4301   unsigned Opc = Inst.getOpcode();
4302   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4303   if (BlgpIdx == -1)
4304     return true;
4305   SMLoc BLGPLoc = getBLGPLoc(Operands);
4306   if (!BLGPLoc.isValid())
4307     return true;
4308   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4309   auto FB = getFeatureBits();
4310   bool UsesNeg = false;
4311   if (FB[AMDGPU::FeatureGFX940Insts]) {
4312     switch (Opc) {
4313     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4314     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4315     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4316     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4317       UsesNeg = true;
4318     }
4319   }
4320 
4321   if (IsNeg == UsesNeg)
4322     return true;
4323 
4324   Error(BLGPLoc,
4325         UsesNeg ? "invalid modifier: blgp is not supported"
4326                 : "invalid modifier: neg is not supported");
4327 
4328   return false;
4329 }
4330 
4331 // gfx90a has an undocumented limitation:
4332 // DS_GWS opcodes must use even aligned registers.
4333 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4334                                   const OperandVector &Operands) {
4335   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4336     return true;
4337 
4338   int Opc = Inst.getOpcode();
4339   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4340       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4341     return true;
4342 
4343   const MCRegisterInfo *MRI = getMRI();
4344   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4345   int Data0Pos =
4346       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4347   assert(Data0Pos != -1);
4348   auto Reg = Inst.getOperand(Data0Pos).getReg();
4349   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4350   if (RegIdx & 1) {
4351     SMLoc RegLoc = getRegLoc(Reg, Operands);
4352     Error(RegLoc, "vgpr must be even aligned");
4353     return false;
4354   }
4355 
4356   return true;
4357 }
4358 
4359 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4360                                             const OperandVector &Operands,
4361                                             const SMLoc &IDLoc) {
4362   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4363                                            AMDGPU::OpName::cpol);
4364   if (CPolPos == -1)
4365     return true;
4366 
4367   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4368 
4369   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4370   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4371       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4372     Error(IDLoc, "invalid cache policy for SMRD instruction");
4373     return false;
4374   }
4375 
4376   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4377     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4378     StringRef CStr(S.getPointer());
4379     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4380     Error(S, "scc is not supported on this GPU");
4381     return false;
4382   }
4383 
4384   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4385     return true;
4386 
4387   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4388     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4389       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4390                               : "instruction must use glc");
4391       return false;
4392     }
4393   } else {
4394     if (CPol & CPol::GLC) {
4395       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4396       StringRef CStr(S.getPointer());
4397       S = SMLoc::getFromPointer(
4398           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4399       Error(S, isGFX940() ? "instruction must not use sc0"
4400                           : "instruction must not use glc");
4401       return false;
4402     }
4403   }
4404 
4405   return true;
4406 }
4407 
4408 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4409                                           const SMLoc &IDLoc,
4410                                           const OperandVector &Operands) {
4411   if (auto ErrMsg = validateLdsDirect(Inst)) {
4412     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4413     return false;
4414   }
4415   if (!validateSOPLiteral(Inst)) {
4416     Error(getLitLoc(Operands),
4417       "only one literal operand is allowed");
4418     return false;
4419   }
4420   if (!validateVOPLiteral(Inst, Operands)) {
4421     return false;
4422   }
4423   if (!validateConstantBusLimitations(Inst, Operands)) {
4424     return false;
4425   }
4426   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4427     return false;
4428   }
4429   if (!validateIntClampSupported(Inst)) {
4430     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4431       "integer clamping is not supported on this GPU");
4432     return false;
4433   }
4434   if (!validateOpSel(Inst)) {
4435     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4436       "invalid op_sel operand");
4437     return false;
4438   }
4439   if (!validateDPP(Inst, Operands)) {
4440     return false;
4441   }
4442   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4443   if (!validateMIMGD16(Inst)) {
4444     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4445       "d16 modifier is not supported on this GPU");
4446     return false;
4447   }
4448   if (!validateMIMGDim(Inst)) {
4449     Error(IDLoc, "dim modifier is required on this GPU");
4450     return false;
4451   }
4452   if (!validateMIMGMSAA(Inst)) {
4453     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4454           "invalid dim; must be MSAA type");
4455     return false;
4456   }
4457   if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4458     Error(IDLoc, *ErrMsg);
4459     return false;
4460   }
4461   if (!validateMIMGAddrSize(Inst)) {
4462     Error(IDLoc,
4463       "image address size does not match dim and a16");
4464     return false;
4465   }
4466   if (!validateMIMGAtomicDMask(Inst)) {
4467     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4468       "invalid atomic image dmask");
4469     return false;
4470   }
4471   if (!validateMIMGGatherDMask(Inst)) {
4472     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4473       "invalid image_gather dmask: only one bit must be set");
4474     return false;
4475   }
4476   if (!validateMovrels(Inst, Operands)) {
4477     return false;
4478   }
4479   if (!validateFlatOffset(Inst, Operands)) {
4480     return false;
4481   }
4482   if (!validateSMEMOffset(Inst, Operands)) {
4483     return false;
4484   }
4485   if (!validateMAIAccWrite(Inst, Operands)) {
4486     return false;
4487   }
4488   if (!validateMFMA(Inst, Operands)) {
4489     return false;
4490   }
4491   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4492     return false;
4493   }
4494 
4495   if (!validateAGPRLdSt(Inst)) {
4496     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4497     ? "invalid register class: data and dst should be all VGPR or AGPR"
4498     : "invalid register class: agpr loads and stores not supported on this GPU"
4499     );
4500     return false;
4501   }
4502   if (!validateVGPRAlign(Inst)) {
4503     Error(IDLoc,
4504       "invalid register class: vgpr tuples must be 64 bit aligned");
4505     return false;
4506   }
4507   if (!validateGWS(Inst, Operands)) {
4508     return false;
4509   }
4510 
4511   if (!validateBLGP(Inst, Operands)) {
4512     return false;
4513   }
4514 
4515   if (!validateDivScale(Inst)) {
4516     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4517     return false;
4518   }
4519   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4520     return false;
4521   }
4522 
4523   return true;
4524 }
4525 
4526 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4527                                             const FeatureBitset &FBS,
4528                                             unsigned VariantID = 0);
4529 
4530 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4531                                 const FeatureBitset &AvailableFeatures,
4532                                 unsigned VariantID);
4533 
4534 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4535                                        const FeatureBitset &FBS) {
4536   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4537 }
4538 
4539 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4540                                        const FeatureBitset &FBS,
4541                                        ArrayRef<unsigned> Variants) {
4542   for (auto Variant : Variants) {
4543     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4544       return true;
4545   }
4546 
4547   return false;
4548 }
4549 
4550 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4551                                                   const SMLoc &IDLoc) {
4552   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4553 
4554   // Check if requested instruction variant is supported.
4555   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4556     return false;
4557 
4558   // This instruction is not supported.
4559   // Clear any other pending errors because they are no longer relevant.
4560   getParser().clearPendingErrors();
4561 
4562   // Requested instruction variant is not supported.
4563   // Check if any other variants are supported.
4564   StringRef VariantName = getMatchedVariantName();
4565   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4566     return Error(IDLoc,
4567                  Twine(VariantName,
4568                        " variant of this instruction is not supported"));
4569   }
4570 
4571   // Finally check if this instruction is supported on any other GPU.
4572   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4573     return Error(IDLoc, "instruction not supported on this GPU");
4574   }
4575 
4576   // Instruction not supported on any GPU. Probably a typo.
4577   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4578   return Error(IDLoc, "invalid instruction" + Suggestion);
4579 }
4580 
4581 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4582                                               OperandVector &Operands,
4583                                               MCStreamer &Out,
4584                                               uint64_t &ErrorInfo,
4585                                               bool MatchingInlineAsm) {
4586   MCInst Inst;
4587   unsigned Result = Match_Success;
4588   for (auto Variant : getMatchedVariants()) {
4589     uint64_t EI;
4590     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4591                                   Variant);
4592     // We order match statuses from least to most specific. We use most specific
4593     // status as resulting
4594     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4595     if ((R == Match_Success) ||
4596         (R == Match_PreferE32) ||
4597         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4598         (R == Match_InvalidOperand && Result != Match_MissingFeature
4599                                    && Result != Match_PreferE32) ||
4600         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4601                                    && Result != Match_MissingFeature
4602                                    && Result != Match_PreferE32)) {
4603       Result = R;
4604       ErrorInfo = EI;
4605     }
4606     if (R == Match_Success)
4607       break;
4608   }
4609 
4610   if (Result == Match_Success) {
4611     if (!validateInstruction(Inst, IDLoc, Operands)) {
4612       return true;
4613     }
4614     Inst.setLoc(IDLoc);
4615     Out.emitInstruction(Inst, getSTI());
4616     return false;
4617   }
4618 
4619   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4620   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4621     return true;
4622   }
4623 
4624   switch (Result) {
4625   default: break;
4626   case Match_MissingFeature:
4627     // It has been verified that the specified instruction
4628     // mnemonic is valid. A match was found but it requires
4629     // features which are not supported on this GPU.
4630     return Error(IDLoc, "operands are not valid for this GPU or mode");
4631 
4632   case Match_InvalidOperand: {
4633     SMLoc ErrorLoc = IDLoc;
4634     if (ErrorInfo != ~0ULL) {
4635       if (ErrorInfo >= Operands.size()) {
4636         return Error(IDLoc, "too few operands for instruction");
4637       }
4638       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4639       if (ErrorLoc == SMLoc())
4640         ErrorLoc = IDLoc;
4641     }
4642     return Error(ErrorLoc, "invalid operand for instruction");
4643   }
4644 
4645   case Match_PreferE32:
4646     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4647                         "should be encoded as e32");
4648   case Match_MnemonicFail:
4649     llvm_unreachable("Invalid instructions should have been handled already");
4650   }
4651   llvm_unreachable("Implement any new match types added!");
4652 }
4653 
4654 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4655   int64_t Tmp = -1;
4656   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4657     return true;
4658   }
4659   if (getParser().parseAbsoluteExpression(Tmp)) {
4660     return true;
4661   }
4662   Ret = static_cast<uint32_t>(Tmp);
4663   return false;
4664 }
4665 
4666 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4667                                                uint32_t &Minor) {
4668   if (ParseAsAbsoluteExpression(Major))
4669     return TokError("invalid major version");
4670 
4671   if (!trySkipToken(AsmToken::Comma))
4672     return TokError("minor version number required, comma expected");
4673 
4674   if (ParseAsAbsoluteExpression(Minor))
4675     return TokError("invalid minor version");
4676 
4677   return false;
4678 }
4679 
4680 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4681   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4682     return TokError("directive only supported for amdgcn architecture");
4683 
4684   std::string TargetIDDirective;
4685   SMLoc TargetStart = getTok().getLoc();
4686   if (getParser().parseEscapedString(TargetIDDirective))
4687     return true;
4688 
4689   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4690   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4691     return getParser().Error(TargetRange.Start,
4692         (Twine(".amdgcn_target directive's target id ") +
4693          Twine(TargetIDDirective) +
4694          Twine(" does not match the specified target id ") +
4695          Twine(getTargetStreamer().getTargetID()->toString())).str());
4696 
4697   return false;
4698 }
4699 
4700 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4701   return Error(Range.Start, "value out of range", Range);
4702 }
4703 
4704 bool AMDGPUAsmParser::calculateGPRBlocks(
4705     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4706     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4707     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4708     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4709   // TODO(scott.linder): These calculations are duplicated from
4710   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4711   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4712 
4713   unsigned NumVGPRs = NextFreeVGPR;
4714   unsigned NumSGPRs = NextFreeSGPR;
4715 
4716   if (Version.Major >= 10)
4717     NumSGPRs = 0;
4718   else {
4719     unsigned MaxAddressableNumSGPRs =
4720         IsaInfo::getAddressableNumSGPRs(&getSTI());
4721 
4722     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4723         NumSGPRs > MaxAddressableNumSGPRs)
4724       return OutOfRangeError(SGPRRange);
4725 
4726     NumSGPRs +=
4727         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4728 
4729     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4730         NumSGPRs > MaxAddressableNumSGPRs)
4731       return OutOfRangeError(SGPRRange);
4732 
4733     if (Features.test(FeatureSGPRInitBug))
4734       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4735   }
4736 
4737   VGPRBlocks =
4738       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4739   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4740 
4741   return false;
4742 }
4743 
4744 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4745   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4746     return TokError("directive only supported for amdgcn architecture");
4747 
4748   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4749     return TokError("directive only supported for amdhsa OS");
4750 
4751   StringRef KernelName;
4752   if (getParser().parseIdentifier(KernelName))
4753     return true;
4754 
4755   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4756 
4757   StringSet<> Seen;
4758 
4759   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4760 
4761   SMRange VGPRRange;
4762   uint64_t NextFreeVGPR = 0;
4763   uint64_t AccumOffset = 0;
4764   uint64_t SharedVGPRCount = 0;
4765   SMRange SGPRRange;
4766   uint64_t NextFreeSGPR = 0;
4767 
4768   // Count the number of user SGPRs implied from the enabled feature bits.
4769   unsigned ImpliedUserSGPRCount = 0;
4770 
4771   // Track if the asm explicitly contains the directive for the user SGPR
4772   // count.
4773   Optional<unsigned> ExplicitUserSGPRCount;
4774   bool ReserveVCC = true;
4775   bool ReserveFlatScr = true;
4776   Optional<bool> EnableWavefrontSize32;
4777 
4778   while (true) {
4779     while (trySkipToken(AsmToken::EndOfStatement));
4780 
4781     StringRef ID;
4782     SMRange IDRange = getTok().getLocRange();
4783     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4784       return true;
4785 
4786     if (ID == ".end_amdhsa_kernel")
4787       break;
4788 
4789     if (Seen.find(ID) != Seen.end())
4790       return TokError(".amdhsa_ directives cannot be repeated");
4791     Seen.insert(ID);
4792 
4793     SMLoc ValStart = getLoc();
4794     int64_t IVal;
4795     if (getParser().parseAbsoluteExpression(IVal))
4796       return true;
4797     SMLoc ValEnd = getLoc();
4798     SMRange ValRange = SMRange(ValStart, ValEnd);
4799 
4800     if (IVal < 0)
4801       return OutOfRangeError(ValRange);
4802 
4803     uint64_t Val = IVal;
4804 
4805 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4806   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4807     return OutOfRangeError(RANGE);                                             \
4808   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4809 
4810     if (ID == ".amdhsa_group_segment_fixed_size") {
4811       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4812         return OutOfRangeError(ValRange);
4813       KD.group_segment_fixed_size = Val;
4814     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4815       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4816         return OutOfRangeError(ValRange);
4817       KD.private_segment_fixed_size = Val;
4818     } else if (ID == ".amdhsa_kernarg_size") {
4819       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4820         return OutOfRangeError(ValRange);
4821       KD.kernarg_size = Val;
4822     } else if (ID == ".amdhsa_user_sgpr_count") {
4823       ExplicitUserSGPRCount = Val;
4824     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4825       if (hasArchitectedFlatScratch())
4826         return Error(IDRange.Start,
4827                      "directive is not supported with architected flat scratch",
4828                      IDRange);
4829       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4830                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4831                        Val, ValRange);
4832       if (Val)
4833         ImpliedUserSGPRCount += 4;
4834     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4835       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4836                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4837                        ValRange);
4838       if (Val)
4839         ImpliedUserSGPRCount += 2;
4840     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4841       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4842                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4843                        ValRange);
4844       if (Val)
4845         ImpliedUserSGPRCount += 2;
4846     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4847       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4848                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4849                        Val, ValRange);
4850       if (Val)
4851         ImpliedUserSGPRCount += 2;
4852     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4853       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4854                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4855                        ValRange);
4856       if (Val)
4857         ImpliedUserSGPRCount += 2;
4858     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4859       if (hasArchitectedFlatScratch())
4860         return Error(IDRange.Start,
4861                      "directive is not supported with architected flat scratch",
4862                      IDRange);
4863       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4864                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4865                        ValRange);
4866       if (Val)
4867         ImpliedUserSGPRCount += 2;
4868     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4869       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4870                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4871                        Val, ValRange);
4872       if (Val)
4873         ImpliedUserSGPRCount += 1;
4874     } else if (ID == ".amdhsa_wavefront_size32") {
4875       if (IVersion.Major < 10)
4876         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4877       EnableWavefrontSize32 = Val;
4878       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4879                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4880                        Val, ValRange);
4881     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4882       if (hasArchitectedFlatScratch())
4883         return Error(IDRange.Start,
4884                      "directive is not supported with architected flat scratch",
4885                      IDRange);
4886       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4887                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4888     } else if (ID == ".amdhsa_enable_private_segment") {
4889       if (!hasArchitectedFlatScratch())
4890         return Error(
4891             IDRange.Start,
4892             "directive is not supported without architected flat scratch",
4893             IDRange);
4894       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4895                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4896     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4897       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4898                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4899                        ValRange);
4900     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4901       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4902                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4903                        ValRange);
4904     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4905       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4906                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4907                        ValRange);
4908     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4909       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4910                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4911                        ValRange);
4912     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4913       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4914                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4915                        ValRange);
4916     } else if (ID == ".amdhsa_next_free_vgpr") {
4917       VGPRRange = ValRange;
4918       NextFreeVGPR = Val;
4919     } else if (ID == ".amdhsa_next_free_sgpr") {
4920       SGPRRange = ValRange;
4921       NextFreeSGPR = Val;
4922     } else if (ID == ".amdhsa_accum_offset") {
4923       if (!isGFX90A())
4924         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4925       AccumOffset = Val;
4926     } else if (ID == ".amdhsa_reserve_vcc") {
4927       if (!isUInt<1>(Val))
4928         return OutOfRangeError(ValRange);
4929       ReserveVCC = Val;
4930     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4931       if (IVersion.Major < 7)
4932         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4933       if (hasArchitectedFlatScratch())
4934         return Error(IDRange.Start,
4935                      "directive is not supported with architected flat scratch",
4936                      IDRange);
4937       if (!isUInt<1>(Val))
4938         return OutOfRangeError(ValRange);
4939       ReserveFlatScr = Val;
4940     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4941       if (IVersion.Major < 8)
4942         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4943       if (!isUInt<1>(Val))
4944         return OutOfRangeError(ValRange);
4945       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4946         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4947                                  IDRange);
4948     } else if (ID == ".amdhsa_float_round_mode_32") {
4949       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4950                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4951     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4952       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4953                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4954     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4955       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4956                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4957     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4958       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4959                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4960                        ValRange);
4961     } else if (ID == ".amdhsa_dx10_clamp") {
4962       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4963                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4964     } else if (ID == ".amdhsa_ieee_mode") {
4965       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4966                        Val, ValRange);
4967     } else if (ID == ".amdhsa_fp16_overflow") {
4968       if (IVersion.Major < 9)
4969         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4970       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4971                        ValRange);
4972     } else if (ID == ".amdhsa_tg_split") {
4973       if (!isGFX90A())
4974         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4975       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4976                        ValRange);
4977     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4978       if (IVersion.Major < 10)
4979         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4980       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4981                        ValRange);
4982     } else if (ID == ".amdhsa_memory_ordered") {
4983       if (IVersion.Major < 10)
4984         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4985       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4986                        ValRange);
4987     } else if (ID == ".amdhsa_forward_progress") {
4988       if (IVersion.Major < 10)
4989         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4990       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4991                        ValRange);
4992     } else if (ID == ".amdhsa_shared_vgpr_count") {
4993       if (IVersion.Major < 10)
4994         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4995       SharedVGPRCount = Val;
4996       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
4997                        COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
4998                        ValRange);
4999     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5000       PARSE_BITS_ENTRY(
5001           KD.compute_pgm_rsrc2,
5002           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5003           ValRange);
5004     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5005       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5006                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5007                        Val, ValRange);
5008     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5009       PARSE_BITS_ENTRY(
5010           KD.compute_pgm_rsrc2,
5011           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5012           ValRange);
5013     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5014       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5015                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5016                        Val, ValRange);
5017     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5018       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5019                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5020                        Val, ValRange);
5021     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5022       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5023                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5024                        Val, ValRange);
5025     } else if (ID == ".amdhsa_exception_int_div_zero") {
5026       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5027                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5028                        Val, ValRange);
5029     } else {
5030       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5031     }
5032 
5033 #undef PARSE_BITS_ENTRY
5034   }
5035 
5036   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5037     return TokError(".amdhsa_next_free_vgpr directive is required");
5038 
5039   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5040     return TokError(".amdhsa_next_free_sgpr directive is required");
5041 
5042   unsigned VGPRBlocks;
5043   unsigned SGPRBlocks;
5044   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5045                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5046                          EnableWavefrontSize32, NextFreeVGPR,
5047                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5048                          SGPRBlocks))
5049     return true;
5050 
5051   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5052           VGPRBlocks))
5053     return OutOfRangeError(VGPRRange);
5054   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5055                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5056 
5057   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5058           SGPRBlocks))
5059     return OutOfRangeError(SGPRRange);
5060   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5061                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5062                   SGPRBlocks);
5063 
5064   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5065     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5066                     "enabled user SGPRs");
5067 
5068   unsigned UserSGPRCount =
5069       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5070 
5071   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5072     return TokError("too many user SGPRs enabled");
5073   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5074                   UserSGPRCount);
5075 
5076   if (isGFX90A()) {
5077     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5078       return TokError(".amdhsa_accum_offset directive is required");
5079     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5080       return TokError("accum_offset should be in range [4..256] in "
5081                       "increments of 4");
5082     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5083       return TokError("accum_offset exceeds total VGPR allocation");
5084     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5085                     (AccumOffset / 4 - 1));
5086   }
5087 
5088   if (IVersion.Major == 10) {
5089     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5090     if (SharedVGPRCount && EnableWavefrontSize32) {
5091       return TokError("shared_vgpr_count directive not valid on "
5092                       "wavefront size 32");
5093     }
5094     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5095       return TokError("shared_vgpr_count*2 + "
5096                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5097                       "exceed 63\n");
5098     }
5099   }
5100 
5101   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5102       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5103       ReserveFlatScr);
5104   return false;
5105 }
5106 
5107 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5108   uint32_t Major;
5109   uint32_t Minor;
5110 
5111   if (ParseDirectiveMajorMinor(Major, Minor))
5112     return true;
5113 
5114   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5115   return false;
5116 }
5117 
5118 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5119   uint32_t Major;
5120   uint32_t Minor;
5121   uint32_t Stepping;
5122   StringRef VendorName;
5123   StringRef ArchName;
5124 
5125   // If this directive has no arguments, then use the ISA version for the
5126   // targeted GPU.
5127   if (isToken(AsmToken::EndOfStatement)) {
5128     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5129     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5130                                                         ISA.Stepping,
5131                                                         "AMD", "AMDGPU");
5132     return false;
5133   }
5134 
5135   if (ParseDirectiveMajorMinor(Major, Minor))
5136     return true;
5137 
5138   if (!trySkipToken(AsmToken::Comma))
5139     return TokError("stepping version number required, comma expected");
5140 
5141   if (ParseAsAbsoluteExpression(Stepping))
5142     return TokError("invalid stepping version");
5143 
5144   if (!trySkipToken(AsmToken::Comma))
5145     return TokError("vendor name required, comma expected");
5146 
5147   if (!parseString(VendorName, "invalid vendor name"))
5148     return true;
5149 
5150   if (!trySkipToken(AsmToken::Comma))
5151     return TokError("arch name required, comma expected");
5152 
5153   if (!parseString(ArchName, "invalid arch name"))
5154     return true;
5155 
5156   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5157                                                       VendorName, ArchName);
5158   return false;
5159 }
5160 
5161 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5162                                                amd_kernel_code_t &Header) {
5163   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5164   // assembly for backwards compatibility.
5165   if (ID == "max_scratch_backing_memory_byte_size") {
5166     Parser.eatToEndOfStatement();
5167     return false;
5168   }
5169 
5170   SmallString<40> ErrStr;
5171   raw_svector_ostream Err(ErrStr);
5172   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5173     return TokError(Err.str());
5174   }
5175   Lex();
5176 
5177   if (ID == "enable_wavefront_size32") {
5178     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5179       if (!isGFX10Plus())
5180         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5181       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5182         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5183     } else {
5184       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5185         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5186     }
5187   }
5188 
5189   if (ID == "wavefront_size") {
5190     if (Header.wavefront_size == 5) {
5191       if (!isGFX10Plus())
5192         return TokError("wavefront_size=5 is only allowed on GFX10+");
5193       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5194         return TokError("wavefront_size=5 requires +WavefrontSize32");
5195     } else if (Header.wavefront_size == 6) {
5196       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5197         return TokError("wavefront_size=6 requires +WavefrontSize64");
5198     }
5199   }
5200 
5201   if (ID == "enable_wgp_mode") {
5202     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5203         !isGFX10Plus())
5204       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5205   }
5206 
5207   if (ID == "enable_mem_ordered") {
5208     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5209         !isGFX10Plus())
5210       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5211   }
5212 
5213   if (ID == "enable_fwd_progress") {
5214     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5215         !isGFX10Plus())
5216       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5217   }
5218 
5219   return false;
5220 }
5221 
5222 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5223   amd_kernel_code_t Header;
5224   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5225 
5226   while (true) {
5227     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5228     // will set the current token to EndOfStatement.
5229     while(trySkipToken(AsmToken::EndOfStatement));
5230 
5231     StringRef ID;
5232     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5233       return true;
5234 
5235     if (ID == ".end_amd_kernel_code_t")
5236       break;
5237 
5238     if (ParseAMDKernelCodeTValue(ID, Header))
5239       return true;
5240   }
5241 
5242   getTargetStreamer().EmitAMDKernelCodeT(Header);
5243 
5244   return false;
5245 }
5246 
5247 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5248   StringRef KernelName;
5249   if (!parseId(KernelName, "expected symbol name"))
5250     return true;
5251 
5252   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5253                                            ELF::STT_AMDGPU_HSA_KERNEL);
5254 
5255   KernelScope.initialize(getContext());
5256   return false;
5257 }
5258 
5259 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5260   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5261     return Error(getLoc(),
5262                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5263                  "architectures");
5264   }
5265 
5266   auto TargetIDDirective = getLexer().getTok().getStringContents();
5267   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5268     return Error(getParser().getTok().getLoc(), "target id must match options");
5269 
5270   getTargetStreamer().EmitISAVersion();
5271   Lex();
5272 
5273   return false;
5274 }
5275 
5276 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5277   const char *AssemblerDirectiveBegin;
5278   const char *AssemblerDirectiveEnd;
5279   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5280       isHsaAbiVersion3AndAbove(&getSTI())
5281           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5282                             HSAMD::V3::AssemblerDirectiveEnd)
5283           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5284                             HSAMD::AssemblerDirectiveEnd);
5285 
5286   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5287     return Error(getLoc(),
5288                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5289                  "not available on non-amdhsa OSes")).str());
5290   }
5291 
5292   std::string HSAMetadataString;
5293   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5294                           HSAMetadataString))
5295     return true;
5296 
5297   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5298     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5299       return Error(getLoc(), "invalid HSA metadata");
5300   } else {
5301     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5302       return Error(getLoc(), "invalid HSA metadata");
5303   }
5304 
5305   return false;
5306 }
5307 
5308 /// Common code to parse out a block of text (typically YAML) between start and
5309 /// end directives.
5310 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5311                                           const char *AssemblerDirectiveEnd,
5312                                           std::string &CollectString) {
5313 
5314   raw_string_ostream CollectStream(CollectString);
5315 
5316   getLexer().setSkipSpace(false);
5317 
5318   bool FoundEnd = false;
5319   while (!isToken(AsmToken::Eof)) {
5320     while (isToken(AsmToken::Space)) {
5321       CollectStream << getTokenStr();
5322       Lex();
5323     }
5324 
5325     if (trySkipId(AssemblerDirectiveEnd)) {
5326       FoundEnd = true;
5327       break;
5328     }
5329 
5330     CollectStream << Parser.parseStringToEndOfStatement()
5331                   << getContext().getAsmInfo()->getSeparatorString();
5332 
5333     Parser.eatToEndOfStatement();
5334   }
5335 
5336   getLexer().setSkipSpace(true);
5337 
5338   if (isToken(AsmToken::Eof) && !FoundEnd) {
5339     return TokError(Twine("expected directive ") +
5340                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5341   }
5342 
5343   CollectStream.flush();
5344   return false;
5345 }
5346 
5347 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5348 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5349   std::string String;
5350   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5351                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5352     return true;
5353 
5354   auto PALMetadata = getTargetStreamer().getPALMetadata();
5355   if (!PALMetadata->setFromString(String))
5356     return Error(getLoc(), "invalid PAL metadata");
5357   return false;
5358 }
5359 
5360 /// Parse the assembler directive for old linear-format PAL metadata.
5361 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5362   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5363     return Error(getLoc(),
5364                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5365                  "not available on non-amdpal OSes")).str());
5366   }
5367 
5368   auto PALMetadata = getTargetStreamer().getPALMetadata();
5369   PALMetadata->setLegacy();
5370   for (;;) {
5371     uint32_t Key, Value;
5372     if (ParseAsAbsoluteExpression(Key)) {
5373       return TokError(Twine("invalid value in ") +
5374                       Twine(PALMD::AssemblerDirective));
5375     }
5376     if (!trySkipToken(AsmToken::Comma)) {
5377       return TokError(Twine("expected an even number of values in ") +
5378                       Twine(PALMD::AssemblerDirective));
5379     }
5380     if (ParseAsAbsoluteExpression(Value)) {
5381       return TokError(Twine("invalid value in ") +
5382                       Twine(PALMD::AssemblerDirective));
5383     }
5384     PALMetadata->setRegister(Key, Value);
5385     if (!trySkipToken(AsmToken::Comma))
5386       break;
5387   }
5388   return false;
5389 }
5390 
5391 /// ParseDirectiveAMDGPULDS
5392 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5393 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5394   if (getParser().checkForValidSection())
5395     return true;
5396 
5397   StringRef Name;
5398   SMLoc NameLoc = getLoc();
5399   if (getParser().parseIdentifier(Name))
5400     return TokError("expected identifier in directive");
5401 
5402   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5403   if (parseToken(AsmToken::Comma, "expected ','"))
5404     return true;
5405 
5406   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5407 
5408   int64_t Size;
5409   SMLoc SizeLoc = getLoc();
5410   if (getParser().parseAbsoluteExpression(Size))
5411     return true;
5412   if (Size < 0)
5413     return Error(SizeLoc, "size must be non-negative");
5414   if (Size > LocalMemorySize)
5415     return Error(SizeLoc, "size is too large");
5416 
5417   int64_t Alignment = 4;
5418   if (trySkipToken(AsmToken::Comma)) {
5419     SMLoc AlignLoc = getLoc();
5420     if (getParser().parseAbsoluteExpression(Alignment))
5421       return true;
5422     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5423       return Error(AlignLoc, "alignment must be a power of two");
5424 
5425     // Alignment larger than the size of LDS is possible in theory, as long
5426     // as the linker manages to place to symbol at address 0, but we do want
5427     // to make sure the alignment fits nicely into a 32-bit integer.
5428     if (Alignment >= 1u << 31)
5429       return Error(AlignLoc, "alignment is too large");
5430   }
5431 
5432   if (parseToken(AsmToken::EndOfStatement,
5433                  "unexpected token in '.amdgpu_lds' directive"))
5434     return true;
5435 
5436   Symbol->redefineIfPossible();
5437   if (!Symbol->isUndefined())
5438     return Error(NameLoc, "invalid symbol redefinition");
5439 
5440   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5441   return false;
5442 }
5443 
5444 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5445   StringRef IDVal = DirectiveID.getString();
5446 
5447   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5448     if (IDVal == ".amdhsa_kernel")
5449      return ParseDirectiveAMDHSAKernel();
5450 
5451     // TODO: Restructure/combine with PAL metadata directive.
5452     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5453       return ParseDirectiveHSAMetadata();
5454   } else {
5455     if (IDVal == ".hsa_code_object_version")
5456       return ParseDirectiveHSACodeObjectVersion();
5457 
5458     if (IDVal == ".hsa_code_object_isa")
5459       return ParseDirectiveHSACodeObjectISA();
5460 
5461     if (IDVal == ".amd_kernel_code_t")
5462       return ParseDirectiveAMDKernelCodeT();
5463 
5464     if (IDVal == ".amdgpu_hsa_kernel")
5465       return ParseDirectiveAMDGPUHsaKernel();
5466 
5467     if (IDVal == ".amd_amdgpu_isa")
5468       return ParseDirectiveISAVersion();
5469 
5470     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5471       return ParseDirectiveHSAMetadata();
5472   }
5473 
5474   if (IDVal == ".amdgcn_target")
5475     return ParseDirectiveAMDGCNTarget();
5476 
5477   if (IDVal == ".amdgpu_lds")
5478     return ParseDirectiveAMDGPULDS();
5479 
5480   if (IDVal == PALMD::AssemblerDirectiveBegin)
5481     return ParseDirectivePALMetadataBegin();
5482 
5483   if (IDVal == PALMD::AssemblerDirective)
5484     return ParseDirectivePALMetadata();
5485 
5486   return true;
5487 }
5488 
5489 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5490                                            unsigned RegNo) {
5491 
5492   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5493     return isGFX9Plus();
5494 
5495   // GFX10 has 2 more SGPRs 104 and 105.
5496   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5497     return hasSGPR104_SGPR105();
5498 
5499   switch (RegNo) {
5500   case AMDGPU::SRC_SHARED_BASE:
5501   case AMDGPU::SRC_SHARED_LIMIT:
5502   case AMDGPU::SRC_PRIVATE_BASE:
5503   case AMDGPU::SRC_PRIVATE_LIMIT:
5504   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5505     return isGFX9Plus();
5506   case AMDGPU::TBA:
5507   case AMDGPU::TBA_LO:
5508   case AMDGPU::TBA_HI:
5509   case AMDGPU::TMA:
5510   case AMDGPU::TMA_LO:
5511   case AMDGPU::TMA_HI:
5512     return !isGFX9Plus();
5513   case AMDGPU::XNACK_MASK:
5514   case AMDGPU::XNACK_MASK_LO:
5515   case AMDGPU::XNACK_MASK_HI:
5516     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5517   case AMDGPU::SGPR_NULL:
5518     return isGFX10Plus();
5519   default:
5520     break;
5521   }
5522 
5523   if (isCI())
5524     return true;
5525 
5526   if (isSI() || isGFX10Plus()) {
5527     // No flat_scr on SI.
5528     // On GFX10 flat scratch is not a valid register operand and can only be
5529     // accessed with s_setreg/s_getreg.
5530     switch (RegNo) {
5531     case AMDGPU::FLAT_SCR:
5532     case AMDGPU::FLAT_SCR_LO:
5533     case AMDGPU::FLAT_SCR_HI:
5534       return false;
5535     default:
5536       return true;
5537     }
5538   }
5539 
5540   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5541   // SI/CI have.
5542   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5543     return hasSGPR102_SGPR103();
5544 
5545   return true;
5546 }
5547 
5548 OperandMatchResultTy
5549 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5550                               OperandMode Mode) {
5551   // Try to parse with a custom parser
5552   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5553 
5554   // If we successfully parsed the operand or if there as an error parsing,
5555   // we are done.
5556   //
5557   // If we are parsing after we reach EndOfStatement then this means we
5558   // are appending default values to the Operands list.  This is only done
5559   // by custom parser, so we shouldn't continue on to the generic parsing.
5560   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5561       isToken(AsmToken::EndOfStatement))
5562     return ResTy;
5563 
5564   SMLoc RBraceLoc;
5565   SMLoc LBraceLoc = getLoc();
5566   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5567     unsigned Prefix = Operands.size();
5568 
5569     for (;;) {
5570       auto Loc = getLoc();
5571       ResTy = parseReg(Operands);
5572       if (ResTy == MatchOperand_NoMatch)
5573         Error(Loc, "expected a register");
5574       if (ResTy != MatchOperand_Success)
5575         return MatchOperand_ParseFail;
5576 
5577       RBraceLoc = getLoc();
5578       if (trySkipToken(AsmToken::RBrac))
5579         break;
5580 
5581       if (!skipToken(AsmToken::Comma,
5582                      "expected a comma or a closing square bracket")) {
5583         return MatchOperand_ParseFail;
5584       }
5585     }
5586 
5587     if (Operands.size() - Prefix > 1) {
5588       Operands.insert(Operands.begin() + Prefix,
5589                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5590       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5591     }
5592 
5593     return MatchOperand_Success;
5594   }
5595 
5596   return parseRegOrImm(Operands);
5597 }
5598 
5599 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5600   // Clear any forced encodings from the previous instruction.
5601   setForcedEncodingSize(0);
5602   setForcedDPP(false);
5603   setForcedSDWA(false);
5604 
5605   if (Name.endswith("_e64")) {
5606     setForcedEncodingSize(64);
5607     return Name.substr(0, Name.size() - 4);
5608   } else if (Name.endswith("_e32")) {
5609     setForcedEncodingSize(32);
5610     return Name.substr(0, Name.size() - 4);
5611   } else if (Name.endswith("_dpp")) {
5612     setForcedDPP(true);
5613     return Name.substr(0, Name.size() - 4);
5614   } else if (Name.endswith("_sdwa")) {
5615     setForcedSDWA(true);
5616     return Name.substr(0, Name.size() - 5);
5617   }
5618   return Name;
5619 }
5620 
5621 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5622                                        StringRef Name,
5623                                        SMLoc NameLoc, OperandVector &Operands) {
5624   // Add the instruction mnemonic
5625   Name = parseMnemonicSuffix(Name);
5626   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5627 
5628   bool IsMIMG = Name.startswith("image_");
5629 
5630   while (!trySkipToken(AsmToken::EndOfStatement)) {
5631     OperandMode Mode = OperandMode_Default;
5632     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5633       Mode = OperandMode_NSA;
5634     CPolSeen = 0;
5635     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5636 
5637     if (Res != MatchOperand_Success) {
5638       checkUnsupportedInstruction(Name, NameLoc);
5639       if (!Parser.hasPendingError()) {
5640         // FIXME: use real operand location rather than the current location.
5641         StringRef Msg =
5642           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5643                                             "not a valid operand.";
5644         Error(getLoc(), Msg);
5645       }
5646       while (!trySkipToken(AsmToken::EndOfStatement)) {
5647         lex();
5648       }
5649       return true;
5650     }
5651 
5652     // Eat the comma or space if there is one.
5653     trySkipToken(AsmToken::Comma);
5654   }
5655 
5656   return false;
5657 }
5658 
5659 //===----------------------------------------------------------------------===//
5660 // Utility functions
5661 //===----------------------------------------------------------------------===//
5662 
5663 OperandMatchResultTy
5664 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5665 
5666   if (!trySkipId(Prefix, AsmToken::Colon))
5667     return MatchOperand_NoMatch;
5668 
5669   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5670 }
5671 
5672 OperandMatchResultTy
5673 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5674                                     AMDGPUOperand::ImmTy ImmTy,
5675                                     bool (*ConvertResult)(int64_t&)) {
5676   SMLoc S = getLoc();
5677   int64_t Value = 0;
5678 
5679   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5680   if (Res != MatchOperand_Success)
5681     return Res;
5682 
5683   if (ConvertResult && !ConvertResult(Value)) {
5684     Error(S, "invalid " + StringRef(Prefix) + " value.");
5685   }
5686 
5687   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5688   return MatchOperand_Success;
5689 }
5690 
5691 OperandMatchResultTy
5692 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5693                                              OperandVector &Operands,
5694                                              AMDGPUOperand::ImmTy ImmTy,
5695                                              bool (*ConvertResult)(int64_t&)) {
5696   SMLoc S = getLoc();
5697   if (!trySkipId(Prefix, AsmToken::Colon))
5698     return MatchOperand_NoMatch;
5699 
5700   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5701     return MatchOperand_ParseFail;
5702 
5703   unsigned Val = 0;
5704   const unsigned MaxSize = 4;
5705 
5706   // FIXME: How to verify the number of elements matches the number of src
5707   // operands?
5708   for (int I = 0; ; ++I) {
5709     int64_t Op;
5710     SMLoc Loc = getLoc();
5711     if (!parseExpr(Op))
5712       return MatchOperand_ParseFail;
5713 
5714     if (Op != 0 && Op != 1) {
5715       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5716       return MatchOperand_ParseFail;
5717     }
5718 
5719     Val |= (Op << I);
5720 
5721     if (trySkipToken(AsmToken::RBrac))
5722       break;
5723 
5724     if (I + 1 == MaxSize) {
5725       Error(getLoc(), "expected a closing square bracket");
5726       return MatchOperand_ParseFail;
5727     }
5728 
5729     if (!skipToken(AsmToken::Comma, "expected a comma"))
5730       return MatchOperand_ParseFail;
5731   }
5732 
5733   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5734   return MatchOperand_Success;
5735 }
5736 
5737 OperandMatchResultTy
5738 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5739                                AMDGPUOperand::ImmTy ImmTy) {
5740   int64_t Bit;
5741   SMLoc S = getLoc();
5742 
5743   if (trySkipId(Name)) {
5744     Bit = 1;
5745   } else if (trySkipId("no", Name)) {
5746     Bit = 0;
5747   } else {
5748     return MatchOperand_NoMatch;
5749   }
5750 
5751   if (Name == "r128" && !hasMIMG_R128()) {
5752     Error(S, "r128 modifier is not supported on this GPU");
5753     return MatchOperand_ParseFail;
5754   }
5755   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5756     Error(S, "a16 modifier is not supported on this GPU");
5757     return MatchOperand_ParseFail;
5758   }
5759 
5760   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5761     ImmTy = AMDGPUOperand::ImmTyR128A16;
5762 
5763   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5764   return MatchOperand_Success;
5765 }
5766 
5767 OperandMatchResultTy
5768 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5769   unsigned CPolOn = 0;
5770   unsigned CPolOff = 0;
5771   SMLoc S = getLoc();
5772 
5773   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5774   if (isGFX940() && !Mnemo.startswith("s_")) {
5775     if (trySkipId("sc0"))
5776       CPolOn = AMDGPU::CPol::SC0;
5777     else if (trySkipId("nosc0"))
5778       CPolOff = AMDGPU::CPol::SC0;
5779     else if (trySkipId("nt"))
5780       CPolOn = AMDGPU::CPol::NT;
5781     else if (trySkipId("nont"))
5782       CPolOff = AMDGPU::CPol::NT;
5783     else if (trySkipId("sc1"))
5784       CPolOn = AMDGPU::CPol::SC1;
5785     else if (trySkipId("nosc1"))
5786       CPolOff = AMDGPU::CPol::SC1;
5787     else
5788       return MatchOperand_NoMatch;
5789   }
5790   else if (trySkipId("glc"))
5791     CPolOn = AMDGPU::CPol::GLC;
5792   else if (trySkipId("noglc"))
5793     CPolOff = AMDGPU::CPol::GLC;
5794   else if (trySkipId("slc"))
5795     CPolOn = AMDGPU::CPol::SLC;
5796   else if (trySkipId("noslc"))
5797     CPolOff = AMDGPU::CPol::SLC;
5798   else if (trySkipId("dlc"))
5799     CPolOn = AMDGPU::CPol::DLC;
5800   else if (trySkipId("nodlc"))
5801     CPolOff = AMDGPU::CPol::DLC;
5802   else if (trySkipId("scc"))
5803     CPolOn = AMDGPU::CPol::SCC;
5804   else if (trySkipId("noscc"))
5805     CPolOff = AMDGPU::CPol::SCC;
5806   else
5807     return MatchOperand_NoMatch;
5808 
5809   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5810     Error(S, "dlc modifier is not supported on this GPU");
5811     return MatchOperand_ParseFail;
5812   }
5813 
5814   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5815     Error(S, "scc modifier is not supported on this GPU");
5816     return MatchOperand_ParseFail;
5817   }
5818 
5819   if (CPolSeen & (CPolOn | CPolOff)) {
5820     Error(S, "duplicate cache policy modifier");
5821     return MatchOperand_ParseFail;
5822   }
5823 
5824   CPolSeen |= (CPolOn | CPolOff);
5825 
5826   for (unsigned I = 1; I != Operands.size(); ++I) {
5827     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5828     if (Op.isCPol()) {
5829       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5830       return MatchOperand_Success;
5831     }
5832   }
5833 
5834   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5835                                               AMDGPUOperand::ImmTyCPol));
5836 
5837   return MatchOperand_Success;
5838 }
5839 
5840 static void addOptionalImmOperand(
5841   MCInst& Inst, const OperandVector& Operands,
5842   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5843   AMDGPUOperand::ImmTy ImmT,
5844   int64_t Default = 0) {
5845   auto i = OptionalIdx.find(ImmT);
5846   if (i != OptionalIdx.end()) {
5847     unsigned Idx = i->second;
5848     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5849   } else {
5850     Inst.addOperand(MCOperand::createImm(Default));
5851   }
5852 }
5853 
5854 OperandMatchResultTy
5855 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5856                                        StringRef &Value,
5857                                        SMLoc &StringLoc) {
5858   if (!trySkipId(Prefix, AsmToken::Colon))
5859     return MatchOperand_NoMatch;
5860 
5861   StringLoc = getLoc();
5862   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5863                                                   : MatchOperand_ParseFail;
5864 }
5865 
5866 //===----------------------------------------------------------------------===//
5867 // MTBUF format
5868 //===----------------------------------------------------------------------===//
5869 
5870 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5871                                   int64_t MaxVal,
5872                                   int64_t &Fmt) {
5873   int64_t Val;
5874   SMLoc Loc = getLoc();
5875 
5876   auto Res = parseIntWithPrefix(Pref, Val);
5877   if (Res == MatchOperand_ParseFail)
5878     return false;
5879   if (Res == MatchOperand_NoMatch)
5880     return true;
5881 
5882   if (Val < 0 || Val > MaxVal) {
5883     Error(Loc, Twine("out of range ", StringRef(Pref)));
5884     return false;
5885   }
5886 
5887   Fmt = Val;
5888   return true;
5889 }
5890 
5891 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5892 // values to live in a joint format operand in the MCInst encoding.
5893 OperandMatchResultTy
5894 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5895   using namespace llvm::AMDGPU::MTBUFFormat;
5896 
5897   int64_t Dfmt = DFMT_UNDEF;
5898   int64_t Nfmt = NFMT_UNDEF;
5899 
5900   // dfmt and nfmt can appear in either order, and each is optional.
5901   for (int I = 0; I < 2; ++I) {
5902     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5903       return MatchOperand_ParseFail;
5904 
5905     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5906       return MatchOperand_ParseFail;
5907     }
5908     // Skip optional comma between dfmt/nfmt
5909     // but guard against 2 commas following each other.
5910     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5911         !peekToken().is(AsmToken::Comma)) {
5912       trySkipToken(AsmToken::Comma);
5913     }
5914   }
5915 
5916   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5917     return MatchOperand_NoMatch;
5918 
5919   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5920   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5921 
5922   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5923   return MatchOperand_Success;
5924 }
5925 
5926 OperandMatchResultTy
5927 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5928   using namespace llvm::AMDGPU::MTBUFFormat;
5929 
5930   int64_t Fmt = UFMT_UNDEF;
5931 
5932   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5933     return MatchOperand_ParseFail;
5934 
5935   if (Fmt == UFMT_UNDEF)
5936     return MatchOperand_NoMatch;
5937 
5938   Format = Fmt;
5939   return MatchOperand_Success;
5940 }
5941 
5942 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5943                                     int64_t &Nfmt,
5944                                     StringRef FormatStr,
5945                                     SMLoc Loc) {
5946   using namespace llvm::AMDGPU::MTBUFFormat;
5947   int64_t Format;
5948 
5949   Format = getDfmt(FormatStr);
5950   if (Format != DFMT_UNDEF) {
5951     Dfmt = Format;
5952     return true;
5953   }
5954 
5955   Format = getNfmt(FormatStr, getSTI());
5956   if (Format != NFMT_UNDEF) {
5957     Nfmt = Format;
5958     return true;
5959   }
5960 
5961   Error(Loc, "unsupported format");
5962   return false;
5963 }
5964 
5965 OperandMatchResultTy
5966 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5967                                           SMLoc FormatLoc,
5968                                           int64_t &Format) {
5969   using namespace llvm::AMDGPU::MTBUFFormat;
5970 
5971   int64_t Dfmt = DFMT_UNDEF;
5972   int64_t Nfmt = NFMT_UNDEF;
5973   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5974     return MatchOperand_ParseFail;
5975 
5976   if (trySkipToken(AsmToken::Comma)) {
5977     StringRef Str;
5978     SMLoc Loc = getLoc();
5979     if (!parseId(Str, "expected a format string") ||
5980         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5981       return MatchOperand_ParseFail;
5982     }
5983     if (Dfmt == DFMT_UNDEF) {
5984       Error(Loc, "duplicate numeric format");
5985       return MatchOperand_ParseFail;
5986     } else if (Nfmt == NFMT_UNDEF) {
5987       Error(Loc, "duplicate data format");
5988       return MatchOperand_ParseFail;
5989     }
5990   }
5991 
5992   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5993   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5994 
5995   if (isGFX10Plus()) {
5996     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5997     if (Ufmt == UFMT_UNDEF) {
5998       Error(FormatLoc, "unsupported format");
5999       return MatchOperand_ParseFail;
6000     }
6001     Format = Ufmt;
6002   } else {
6003     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6004   }
6005 
6006   return MatchOperand_Success;
6007 }
6008 
6009 OperandMatchResultTy
6010 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6011                                             SMLoc Loc,
6012                                             int64_t &Format) {
6013   using namespace llvm::AMDGPU::MTBUFFormat;
6014 
6015   auto Id = getUnifiedFormat(FormatStr);
6016   if (Id == UFMT_UNDEF)
6017     return MatchOperand_NoMatch;
6018 
6019   if (!isGFX10Plus()) {
6020     Error(Loc, "unified format is not supported on this GPU");
6021     return MatchOperand_ParseFail;
6022   }
6023 
6024   Format = Id;
6025   return MatchOperand_Success;
6026 }
6027 
6028 OperandMatchResultTy
6029 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6030   using namespace llvm::AMDGPU::MTBUFFormat;
6031   SMLoc Loc = getLoc();
6032 
6033   if (!parseExpr(Format))
6034     return MatchOperand_ParseFail;
6035   if (!isValidFormatEncoding(Format, getSTI())) {
6036     Error(Loc, "out of range format");
6037     return MatchOperand_ParseFail;
6038   }
6039 
6040   return MatchOperand_Success;
6041 }
6042 
6043 OperandMatchResultTy
6044 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6045   using namespace llvm::AMDGPU::MTBUFFormat;
6046 
6047   if (!trySkipId("format", AsmToken::Colon))
6048     return MatchOperand_NoMatch;
6049 
6050   if (trySkipToken(AsmToken::LBrac)) {
6051     StringRef FormatStr;
6052     SMLoc Loc = getLoc();
6053     if (!parseId(FormatStr, "expected a format string"))
6054       return MatchOperand_ParseFail;
6055 
6056     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6057     if (Res == MatchOperand_NoMatch)
6058       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6059     if (Res != MatchOperand_Success)
6060       return Res;
6061 
6062     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6063       return MatchOperand_ParseFail;
6064 
6065     return MatchOperand_Success;
6066   }
6067 
6068   return parseNumericFormat(Format);
6069 }
6070 
6071 OperandMatchResultTy
6072 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6073   using namespace llvm::AMDGPU::MTBUFFormat;
6074 
6075   int64_t Format = getDefaultFormatEncoding(getSTI());
6076   OperandMatchResultTy Res;
6077   SMLoc Loc = getLoc();
6078 
6079   // Parse legacy format syntax.
6080   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6081   if (Res == MatchOperand_ParseFail)
6082     return Res;
6083 
6084   bool FormatFound = (Res == MatchOperand_Success);
6085 
6086   Operands.push_back(
6087     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6088 
6089   if (FormatFound)
6090     trySkipToken(AsmToken::Comma);
6091 
6092   if (isToken(AsmToken::EndOfStatement)) {
6093     // We are expecting an soffset operand,
6094     // but let matcher handle the error.
6095     return MatchOperand_Success;
6096   }
6097 
6098   // Parse soffset.
6099   Res = parseRegOrImm(Operands);
6100   if (Res != MatchOperand_Success)
6101     return Res;
6102 
6103   trySkipToken(AsmToken::Comma);
6104 
6105   if (!FormatFound) {
6106     Res = parseSymbolicOrNumericFormat(Format);
6107     if (Res == MatchOperand_ParseFail)
6108       return Res;
6109     if (Res == MatchOperand_Success) {
6110       auto Size = Operands.size();
6111       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6112       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6113       Op.setImm(Format);
6114     }
6115     return MatchOperand_Success;
6116   }
6117 
6118   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6119     Error(getLoc(), "duplicate format");
6120     return MatchOperand_ParseFail;
6121   }
6122   return MatchOperand_Success;
6123 }
6124 
6125 //===----------------------------------------------------------------------===//
6126 // ds
6127 //===----------------------------------------------------------------------===//
6128 
6129 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6130                                     const OperandVector &Operands) {
6131   OptionalImmIndexMap OptionalIdx;
6132 
6133   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6134     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6135 
6136     // Add the register arguments
6137     if (Op.isReg()) {
6138       Op.addRegOperands(Inst, 1);
6139       continue;
6140     }
6141 
6142     // Handle optional arguments
6143     OptionalIdx[Op.getImmTy()] = i;
6144   }
6145 
6146   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6147   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6148   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6149 
6150   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6151 }
6152 
6153 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6154                                 bool IsGdsHardcoded) {
6155   OptionalImmIndexMap OptionalIdx;
6156 
6157   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6158     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6159 
6160     // Add the register arguments
6161     if (Op.isReg()) {
6162       Op.addRegOperands(Inst, 1);
6163       continue;
6164     }
6165 
6166     if (Op.isToken() && Op.getToken() == "gds") {
6167       IsGdsHardcoded = true;
6168       continue;
6169     }
6170 
6171     // Handle optional arguments
6172     OptionalIdx[Op.getImmTy()] = i;
6173   }
6174 
6175   AMDGPUOperand::ImmTy OffsetType =
6176     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6177      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6178      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6179                                                       AMDGPUOperand::ImmTyOffset;
6180 
6181   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6182 
6183   if (!IsGdsHardcoded) {
6184     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6185   }
6186   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6187 }
6188 
6189 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6190   OptionalImmIndexMap OptionalIdx;
6191 
6192   unsigned OperandIdx[4];
6193   unsigned EnMask = 0;
6194   int SrcIdx = 0;
6195 
6196   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6197     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6198 
6199     // Add the register arguments
6200     if (Op.isReg()) {
6201       assert(SrcIdx < 4);
6202       OperandIdx[SrcIdx] = Inst.size();
6203       Op.addRegOperands(Inst, 1);
6204       ++SrcIdx;
6205       continue;
6206     }
6207 
6208     if (Op.isOff()) {
6209       assert(SrcIdx < 4);
6210       OperandIdx[SrcIdx] = Inst.size();
6211       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6212       ++SrcIdx;
6213       continue;
6214     }
6215 
6216     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6217       Op.addImmOperands(Inst, 1);
6218       continue;
6219     }
6220 
6221     if (Op.isToken() && Op.getToken() == "done")
6222       continue;
6223 
6224     // Handle optional arguments
6225     OptionalIdx[Op.getImmTy()] = i;
6226   }
6227 
6228   assert(SrcIdx == 4);
6229 
6230   bool Compr = false;
6231   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6232     Compr = true;
6233     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6234     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6235     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6236   }
6237 
6238   for (auto i = 0; i < SrcIdx; ++i) {
6239     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6240       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6241     }
6242   }
6243 
6244   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6245   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6246 
6247   Inst.addOperand(MCOperand::createImm(EnMask));
6248 }
6249 
6250 //===----------------------------------------------------------------------===//
6251 // s_waitcnt
6252 //===----------------------------------------------------------------------===//
6253 
6254 static bool
6255 encodeCnt(
6256   const AMDGPU::IsaVersion ISA,
6257   int64_t &IntVal,
6258   int64_t CntVal,
6259   bool Saturate,
6260   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6261   unsigned (*decode)(const IsaVersion &Version, unsigned))
6262 {
6263   bool Failed = false;
6264 
6265   IntVal = encode(ISA, IntVal, CntVal);
6266   if (CntVal != decode(ISA, IntVal)) {
6267     if (Saturate) {
6268       IntVal = encode(ISA, IntVal, -1);
6269     } else {
6270       Failed = true;
6271     }
6272   }
6273   return Failed;
6274 }
6275 
6276 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6277 
6278   SMLoc CntLoc = getLoc();
6279   StringRef CntName = getTokenStr();
6280 
6281   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6282       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6283     return false;
6284 
6285   int64_t CntVal;
6286   SMLoc ValLoc = getLoc();
6287   if (!parseExpr(CntVal))
6288     return false;
6289 
6290   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6291 
6292   bool Failed = true;
6293   bool Sat = CntName.endswith("_sat");
6294 
6295   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6296     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6297   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6298     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6299   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6300     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6301   } else {
6302     Error(CntLoc, "invalid counter name " + CntName);
6303     return false;
6304   }
6305 
6306   if (Failed) {
6307     Error(ValLoc, "too large value for " + CntName);
6308     return false;
6309   }
6310 
6311   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6312     return false;
6313 
6314   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6315     if (isToken(AsmToken::EndOfStatement)) {
6316       Error(getLoc(), "expected a counter name");
6317       return false;
6318     }
6319   }
6320 
6321   return true;
6322 }
6323 
6324 OperandMatchResultTy
6325 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6326   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6327   int64_t Waitcnt = getWaitcntBitMask(ISA);
6328   SMLoc S = getLoc();
6329 
6330   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6331     while (!isToken(AsmToken::EndOfStatement)) {
6332       if (!parseCnt(Waitcnt))
6333         return MatchOperand_ParseFail;
6334     }
6335   } else {
6336     if (!parseExpr(Waitcnt))
6337       return MatchOperand_ParseFail;
6338   }
6339 
6340   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6341   return MatchOperand_Success;
6342 }
6343 
6344 bool
6345 AMDGPUOperand::isSWaitCnt() const {
6346   return isImm();
6347 }
6348 
6349 //===----------------------------------------------------------------------===//
6350 // DepCtr
6351 //===----------------------------------------------------------------------===//
6352 
6353 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6354                                   StringRef DepCtrName) {
6355   switch (ErrorId) {
6356   case OPR_ID_UNKNOWN:
6357     Error(Loc, Twine("invalid counter name ", DepCtrName));
6358     return;
6359   case OPR_ID_UNSUPPORTED:
6360     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6361     return;
6362   case OPR_ID_DUPLICATE:
6363     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6364     return;
6365   case OPR_VAL_INVALID:
6366     Error(Loc, Twine("invalid value for ", DepCtrName));
6367     return;
6368   default:
6369     assert(false);
6370   }
6371 }
6372 
6373 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6374 
6375   using namespace llvm::AMDGPU::DepCtr;
6376 
6377   SMLoc DepCtrLoc = getLoc();
6378   StringRef DepCtrName = getTokenStr();
6379 
6380   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6381       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6382     return false;
6383 
6384   int64_t ExprVal;
6385   if (!parseExpr(ExprVal))
6386     return false;
6387 
6388   unsigned PrevOprMask = UsedOprMask;
6389   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6390 
6391   if (CntVal < 0) {
6392     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6393     return false;
6394   }
6395 
6396   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6397     return false;
6398 
6399   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6400     if (isToken(AsmToken::EndOfStatement)) {
6401       Error(getLoc(), "expected a counter name");
6402       return false;
6403     }
6404   }
6405 
6406   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6407   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6408   return true;
6409 }
6410 
6411 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6412   using namespace llvm::AMDGPU::DepCtr;
6413 
6414   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6415   SMLoc Loc = getLoc();
6416 
6417   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6418     unsigned UsedOprMask = 0;
6419     while (!isToken(AsmToken::EndOfStatement)) {
6420       if (!parseDepCtr(DepCtr, UsedOprMask))
6421         return MatchOperand_ParseFail;
6422     }
6423   } else {
6424     if (!parseExpr(DepCtr))
6425       return MatchOperand_ParseFail;
6426   }
6427 
6428   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6429   return MatchOperand_Success;
6430 }
6431 
6432 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6433 
6434 //===----------------------------------------------------------------------===//
6435 // hwreg
6436 //===----------------------------------------------------------------------===//
6437 
6438 bool
6439 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6440                                 OperandInfoTy &Offset,
6441                                 OperandInfoTy &Width) {
6442   using namespace llvm::AMDGPU::Hwreg;
6443 
6444   // The register may be specified by name or using a numeric code
6445   HwReg.Loc = getLoc();
6446   if (isToken(AsmToken::Identifier) &&
6447       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6448     HwReg.IsSymbolic = true;
6449     lex(); // skip register name
6450   } else if (!parseExpr(HwReg.Id, "a register name")) {
6451     return false;
6452   }
6453 
6454   if (trySkipToken(AsmToken::RParen))
6455     return true;
6456 
6457   // parse optional params
6458   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6459     return false;
6460 
6461   Offset.Loc = getLoc();
6462   if (!parseExpr(Offset.Id))
6463     return false;
6464 
6465   if (!skipToken(AsmToken::Comma, "expected a comma"))
6466     return false;
6467 
6468   Width.Loc = getLoc();
6469   return parseExpr(Width.Id) &&
6470          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6471 }
6472 
6473 bool
6474 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6475                                const OperandInfoTy &Offset,
6476                                const OperandInfoTy &Width) {
6477 
6478   using namespace llvm::AMDGPU::Hwreg;
6479 
6480   if (HwReg.IsSymbolic) {
6481     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6482       Error(HwReg.Loc,
6483             "specified hardware register is not supported on this GPU");
6484       return false;
6485     }
6486   } else {
6487     if (!isValidHwreg(HwReg.Id)) {
6488       Error(HwReg.Loc,
6489             "invalid code of hardware register: only 6-bit values are legal");
6490       return false;
6491     }
6492   }
6493   if (!isValidHwregOffset(Offset.Id)) {
6494     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6495     return false;
6496   }
6497   if (!isValidHwregWidth(Width.Id)) {
6498     Error(Width.Loc,
6499           "invalid bitfield width: only values from 1 to 32 are legal");
6500     return false;
6501   }
6502   return true;
6503 }
6504 
6505 OperandMatchResultTy
6506 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6507   using namespace llvm::AMDGPU::Hwreg;
6508 
6509   int64_t ImmVal = 0;
6510   SMLoc Loc = getLoc();
6511 
6512   if (trySkipId("hwreg", AsmToken::LParen)) {
6513     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6514     OperandInfoTy Offset(OFFSET_DEFAULT_);
6515     OperandInfoTy Width(WIDTH_DEFAULT_);
6516     if (parseHwregBody(HwReg, Offset, Width) &&
6517         validateHwreg(HwReg, Offset, Width)) {
6518       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6519     } else {
6520       return MatchOperand_ParseFail;
6521     }
6522   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6523     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6524       Error(Loc, "invalid immediate: only 16-bit values are legal");
6525       return MatchOperand_ParseFail;
6526     }
6527   } else {
6528     return MatchOperand_ParseFail;
6529   }
6530 
6531   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6532   return MatchOperand_Success;
6533 }
6534 
6535 bool AMDGPUOperand::isHwreg() const {
6536   return isImmTy(ImmTyHwreg);
6537 }
6538 
6539 //===----------------------------------------------------------------------===//
6540 // sendmsg
6541 //===----------------------------------------------------------------------===//
6542 
6543 bool
6544 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6545                                   OperandInfoTy &Op,
6546                                   OperandInfoTy &Stream) {
6547   using namespace llvm::AMDGPU::SendMsg;
6548 
6549   Msg.Loc = getLoc();
6550   if (isToken(AsmToken::Identifier) &&
6551       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6552     Msg.IsSymbolic = true;
6553     lex(); // skip message name
6554   } else if (!parseExpr(Msg.Id, "a message name")) {
6555     return false;
6556   }
6557 
6558   if (trySkipToken(AsmToken::Comma)) {
6559     Op.IsDefined = true;
6560     Op.Loc = getLoc();
6561     if (isToken(AsmToken::Identifier) &&
6562         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6563       lex(); // skip operation name
6564     } else if (!parseExpr(Op.Id, "an operation name")) {
6565       return false;
6566     }
6567 
6568     if (trySkipToken(AsmToken::Comma)) {
6569       Stream.IsDefined = true;
6570       Stream.Loc = getLoc();
6571       if (!parseExpr(Stream.Id))
6572         return false;
6573     }
6574   }
6575 
6576   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6577 }
6578 
6579 bool
6580 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6581                                  const OperandInfoTy &Op,
6582                                  const OperandInfoTy &Stream) {
6583   using namespace llvm::AMDGPU::SendMsg;
6584 
6585   // Validation strictness depends on whether message is specified
6586   // in a symbolic or in a numeric form. In the latter case
6587   // only encoding possibility is checked.
6588   bool Strict = Msg.IsSymbolic;
6589 
6590   if (Strict) {
6591     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6592       Error(Msg.Loc, "specified message id is not supported on this GPU");
6593       return false;
6594     }
6595   } else {
6596     if (!isValidMsgId(Msg.Id)) {
6597       Error(Msg.Loc, "invalid message id");
6598       return false;
6599     }
6600   }
6601   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6602     if (Op.IsDefined) {
6603       Error(Op.Loc, "message does not support operations");
6604     } else {
6605       Error(Msg.Loc, "missing message operation");
6606     }
6607     return false;
6608   }
6609   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6610     Error(Op.Loc, "invalid operation id");
6611     return false;
6612   }
6613   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6614     Error(Stream.Loc, "message operation does not support streams");
6615     return false;
6616   }
6617   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6618     Error(Stream.Loc, "invalid message stream id");
6619     return false;
6620   }
6621   return true;
6622 }
6623 
6624 OperandMatchResultTy
6625 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6626   using namespace llvm::AMDGPU::SendMsg;
6627 
6628   int64_t ImmVal = 0;
6629   SMLoc Loc = getLoc();
6630 
6631   if (trySkipId("sendmsg", AsmToken::LParen)) {
6632     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6633     OperandInfoTy Op(OP_NONE_);
6634     OperandInfoTy Stream(STREAM_ID_NONE_);
6635     if (parseSendMsgBody(Msg, Op, Stream) &&
6636         validateSendMsg(Msg, Op, Stream)) {
6637       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6638     } else {
6639       return MatchOperand_ParseFail;
6640     }
6641   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6642     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6643       Error(Loc, "invalid immediate: only 16-bit values are legal");
6644       return MatchOperand_ParseFail;
6645     }
6646   } else {
6647     return MatchOperand_ParseFail;
6648   }
6649 
6650   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6651   return MatchOperand_Success;
6652 }
6653 
6654 bool AMDGPUOperand::isSendMsg() const {
6655   return isImmTy(ImmTySendMsg);
6656 }
6657 
6658 //===----------------------------------------------------------------------===//
6659 // v_interp
6660 //===----------------------------------------------------------------------===//
6661 
6662 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6663   StringRef Str;
6664   SMLoc S = getLoc();
6665 
6666   if (!parseId(Str))
6667     return MatchOperand_NoMatch;
6668 
6669   int Slot = StringSwitch<int>(Str)
6670     .Case("p10", 0)
6671     .Case("p20", 1)
6672     .Case("p0", 2)
6673     .Default(-1);
6674 
6675   if (Slot == -1) {
6676     Error(S, "invalid interpolation slot");
6677     return MatchOperand_ParseFail;
6678   }
6679 
6680   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6681                                               AMDGPUOperand::ImmTyInterpSlot));
6682   return MatchOperand_Success;
6683 }
6684 
6685 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6686   StringRef Str;
6687   SMLoc S = getLoc();
6688 
6689   if (!parseId(Str))
6690     return MatchOperand_NoMatch;
6691 
6692   if (!Str.startswith("attr")) {
6693     Error(S, "invalid interpolation attribute");
6694     return MatchOperand_ParseFail;
6695   }
6696 
6697   StringRef Chan = Str.take_back(2);
6698   int AttrChan = StringSwitch<int>(Chan)
6699     .Case(".x", 0)
6700     .Case(".y", 1)
6701     .Case(".z", 2)
6702     .Case(".w", 3)
6703     .Default(-1);
6704   if (AttrChan == -1) {
6705     Error(S, "invalid or missing interpolation attribute channel");
6706     return MatchOperand_ParseFail;
6707   }
6708 
6709   Str = Str.drop_back(2).drop_front(4);
6710 
6711   uint8_t Attr;
6712   if (Str.getAsInteger(10, Attr)) {
6713     Error(S, "invalid or missing interpolation attribute number");
6714     return MatchOperand_ParseFail;
6715   }
6716 
6717   if (Attr > 63) {
6718     Error(S, "out of bounds interpolation attribute number");
6719     return MatchOperand_ParseFail;
6720   }
6721 
6722   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6723 
6724   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6725                                               AMDGPUOperand::ImmTyInterpAttr));
6726   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6727                                               AMDGPUOperand::ImmTyAttrChan));
6728   return MatchOperand_Success;
6729 }
6730 
6731 //===----------------------------------------------------------------------===//
6732 // exp
6733 //===----------------------------------------------------------------------===//
6734 
6735 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6736   using namespace llvm::AMDGPU::Exp;
6737 
6738   StringRef Str;
6739   SMLoc S = getLoc();
6740 
6741   if (!parseId(Str))
6742     return MatchOperand_NoMatch;
6743 
6744   unsigned Id = getTgtId(Str);
6745   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6746     Error(S, (Id == ET_INVALID) ?
6747                 "invalid exp target" :
6748                 "exp target is not supported on this GPU");
6749     return MatchOperand_ParseFail;
6750   }
6751 
6752   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6753                                               AMDGPUOperand::ImmTyExpTgt));
6754   return MatchOperand_Success;
6755 }
6756 
6757 //===----------------------------------------------------------------------===//
6758 // parser helpers
6759 //===----------------------------------------------------------------------===//
6760 
6761 bool
6762 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6763   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6764 }
6765 
6766 bool
6767 AMDGPUAsmParser::isId(const StringRef Id) const {
6768   return isId(getToken(), Id);
6769 }
6770 
6771 bool
6772 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6773   return getTokenKind() == Kind;
6774 }
6775 
6776 bool
6777 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6778   if (isId(Id)) {
6779     lex();
6780     return true;
6781   }
6782   return false;
6783 }
6784 
6785 bool
6786 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6787   if (isToken(AsmToken::Identifier)) {
6788     StringRef Tok = getTokenStr();
6789     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6790       lex();
6791       return true;
6792     }
6793   }
6794   return false;
6795 }
6796 
6797 bool
6798 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6799   if (isId(Id) && peekToken().is(Kind)) {
6800     lex();
6801     lex();
6802     return true;
6803   }
6804   return false;
6805 }
6806 
6807 bool
6808 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6809   if (isToken(Kind)) {
6810     lex();
6811     return true;
6812   }
6813   return false;
6814 }
6815 
6816 bool
6817 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6818                            const StringRef ErrMsg) {
6819   if (!trySkipToken(Kind)) {
6820     Error(getLoc(), ErrMsg);
6821     return false;
6822   }
6823   return true;
6824 }
6825 
6826 bool
6827 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6828   SMLoc S = getLoc();
6829 
6830   const MCExpr *Expr;
6831   if (Parser.parseExpression(Expr))
6832     return false;
6833 
6834   if (Expr->evaluateAsAbsolute(Imm))
6835     return true;
6836 
6837   if (Expected.empty()) {
6838     Error(S, "expected absolute expression");
6839   } else {
6840     Error(S, Twine("expected ", Expected) +
6841              Twine(" or an absolute expression"));
6842   }
6843   return false;
6844 }
6845 
6846 bool
6847 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6848   SMLoc S = getLoc();
6849 
6850   const MCExpr *Expr;
6851   if (Parser.parseExpression(Expr))
6852     return false;
6853 
6854   int64_t IntVal;
6855   if (Expr->evaluateAsAbsolute(IntVal)) {
6856     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6857   } else {
6858     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6859   }
6860   return true;
6861 }
6862 
6863 bool
6864 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6865   if (isToken(AsmToken::String)) {
6866     Val = getToken().getStringContents();
6867     lex();
6868     return true;
6869   } else {
6870     Error(getLoc(), ErrMsg);
6871     return false;
6872   }
6873 }
6874 
6875 bool
6876 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6877   if (isToken(AsmToken::Identifier)) {
6878     Val = getTokenStr();
6879     lex();
6880     return true;
6881   } else {
6882     if (!ErrMsg.empty())
6883       Error(getLoc(), ErrMsg);
6884     return false;
6885   }
6886 }
6887 
6888 AsmToken
6889 AMDGPUAsmParser::getToken() const {
6890   return Parser.getTok();
6891 }
6892 
6893 AsmToken
6894 AMDGPUAsmParser::peekToken() {
6895   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6896 }
6897 
6898 void
6899 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6900   auto TokCount = getLexer().peekTokens(Tokens);
6901 
6902   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6903     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6904 }
6905 
6906 AsmToken::TokenKind
6907 AMDGPUAsmParser::getTokenKind() const {
6908   return getLexer().getKind();
6909 }
6910 
6911 SMLoc
6912 AMDGPUAsmParser::getLoc() const {
6913   return getToken().getLoc();
6914 }
6915 
6916 StringRef
6917 AMDGPUAsmParser::getTokenStr() const {
6918   return getToken().getString();
6919 }
6920 
6921 void
6922 AMDGPUAsmParser::lex() {
6923   Parser.Lex();
6924 }
6925 
6926 SMLoc
6927 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6928                                const OperandVector &Operands) const {
6929   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6930     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6931     if (Test(Op))
6932       return Op.getStartLoc();
6933   }
6934   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6935 }
6936 
6937 SMLoc
6938 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6939                            const OperandVector &Operands) const {
6940   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6941   return getOperandLoc(Test, Operands);
6942 }
6943 
6944 SMLoc
6945 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6946                            const OperandVector &Operands) const {
6947   auto Test = [=](const AMDGPUOperand& Op) {
6948     return Op.isRegKind() && Op.getReg() == Reg;
6949   };
6950   return getOperandLoc(Test, Operands);
6951 }
6952 
6953 SMLoc
6954 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6955   auto Test = [](const AMDGPUOperand& Op) {
6956     return Op.IsImmKindLiteral() || Op.isExpr();
6957   };
6958   return getOperandLoc(Test, Operands);
6959 }
6960 
6961 SMLoc
6962 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6963   auto Test = [](const AMDGPUOperand& Op) {
6964     return Op.isImmKindConst();
6965   };
6966   return getOperandLoc(Test, Operands);
6967 }
6968 
6969 //===----------------------------------------------------------------------===//
6970 // swizzle
6971 //===----------------------------------------------------------------------===//
6972 
6973 LLVM_READNONE
6974 static unsigned
6975 encodeBitmaskPerm(const unsigned AndMask,
6976                   const unsigned OrMask,
6977                   const unsigned XorMask) {
6978   using namespace llvm::AMDGPU::Swizzle;
6979 
6980   return BITMASK_PERM_ENC |
6981          (AndMask << BITMASK_AND_SHIFT) |
6982          (OrMask  << BITMASK_OR_SHIFT)  |
6983          (XorMask << BITMASK_XOR_SHIFT);
6984 }
6985 
6986 bool
6987 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6988                                      const unsigned MinVal,
6989                                      const unsigned MaxVal,
6990                                      const StringRef ErrMsg,
6991                                      SMLoc &Loc) {
6992   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6993     return false;
6994   }
6995   Loc = getLoc();
6996   if (!parseExpr(Op)) {
6997     return false;
6998   }
6999   if (Op < MinVal || Op > MaxVal) {
7000     Error(Loc, ErrMsg);
7001     return false;
7002   }
7003 
7004   return true;
7005 }
7006 
7007 bool
7008 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7009                                       const unsigned MinVal,
7010                                       const unsigned MaxVal,
7011                                       const StringRef ErrMsg) {
7012   SMLoc Loc;
7013   for (unsigned i = 0; i < OpNum; ++i) {
7014     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7015       return false;
7016   }
7017 
7018   return true;
7019 }
7020 
7021 bool
7022 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7023   using namespace llvm::AMDGPU::Swizzle;
7024 
7025   int64_t Lane[LANE_NUM];
7026   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7027                            "expected a 2-bit lane id")) {
7028     Imm = QUAD_PERM_ENC;
7029     for (unsigned I = 0; I < LANE_NUM; ++I) {
7030       Imm |= Lane[I] << (LANE_SHIFT * I);
7031     }
7032     return true;
7033   }
7034   return false;
7035 }
7036 
7037 bool
7038 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7039   using namespace llvm::AMDGPU::Swizzle;
7040 
7041   SMLoc Loc;
7042   int64_t GroupSize;
7043   int64_t LaneIdx;
7044 
7045   if (!parseSwizzleOperand(GroupSize,
7046                            2, 32,
7047                            "group size must be in the interval [2,32]",
7048                            Loc)) {
7049     return false;
7050   }
7051   if (!isPowerOf2_64(GroupSize)) {
7052     Error(Loc, "group size must be a power of two");
7053     return false;
7054   }
7055   if (parseSwizzleOperand(LaneIdx,
7056                           0, GroupSize - 1,
7057                           "lane id must be in the interval [0,group size - 1]",
7058                           Loc)) {
7059     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7060     return true;
7061   }
7062   return false;
7063 }
7064 
7065 bool
7066 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7067   using namespace llvm::AMDGPU::Swizzle;
7068 
7069   SMLoc Loc;
7070   int64_t GroupSize;
7071 
7072   if (!parseSwizzleOperand(GroupSize,
7073                            2, 32,
7074                            "group size must be in the interval [2,32]",
7075                            Loc)) {
7076     return false;
7077   }
7078   if (!isPowerOf2_64(GroupSize)) {
7079     Error(Loc, "group size must be a power of two");
7080     return false;
7081   }
7082 
7083   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7084   return true;
7085 }
7086 
7087 bool
7088 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7089   using namespace llvm::AMDGPU::Swizzle;
7090 
7091   SMLoc Loc;
7092   int64_t GroupSize;
7093 
7094   if (!parseSwizzleOperand(GroupSize,
7095                            1, 16,
7096                            "group size must be in the interval [1,16]",
7097                            Loc)) {
7098     return false;
7099   }
7100   if (!isPowerOf2_64(GroupSize)) {
7101     Error(Loc, "group size must be a power of two");
7102     return false;
7103   }
7104 
7105   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7106   return true;
7107 }
7108 
7109 bool
7110 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7111   using namespace llvm::AMDGPU::Swizzle;
7112 
7113   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7114     return false;
7115   }
7116 
7117   StringRef Ctl;
7118   SMLoc StrLoc = getLoc();
7119   if (!parseString(Ctl)) {
7120     return false;
7121   }
7122   if (Ctl.size() != BITMASK_WIDTH) {
7123     Error(StrLoc, "expected a 5-character mask");
7124     return false;
7125   }
7126 
7127   unsigned AndMask = 0;
7128   unsigned OrMask = 0;
7129   unsigned XorMask = 0;
7130 
7131   for (size_t i = 0; i < Ctl.size(); ++i) {
7132     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7133     switch(Ctl[i]) {
7134     default:
7135       Error(StrLoc, "invalid mask");
7136       return false;
7137     case '0':
7138       break;
7139     case '1':
7140       OrMask |= Mask;
7141       break;
7142     case 'p':
7143       AndMask |= Mask;
7144       break;
7145     case 'i':
7146       AndMask |= Mask;
7147       XorMask |= Mask;
7148       break;
7149     }
7150   }
7151 
7152   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7153   return true;
7154 }
7155 
7156 bool
7157 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7158 
7159   SMLoc OffsetLoc = getLoc();
7160 
7161   if (!parseExpr(Imm, "a swizzle macro")) {
7162     return false;
7163   }
7164   if (!isUInt<16>(Imm)) {
7165     Error(OffsetLoc, "expected a 16-bit offset");
7166     return false;
7167   }
7168   return true;
7169 }
7170 
7171 bool
7172 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7173   using namespace llvm::AMDGPU::Swizzle;
7174 
7175   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7176 
7177     SMLoc ModeLoc = getLoc();
7178     bool Ok = false;
7179 
7180     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7181       Ok = parseSwizzleQuadPerm(Imm);
7182     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7183       Ok = parseSwizzleBitmaskPerm(Imm);
7184     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7185       Ok = parseSwizzleBroadcast(Imm);
7186     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7187       Ok = parseSwizzleSwap(Imm);
7188     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7189       Ok = parseSwizzleReverse(Imm);
7190     } else {
7191       Error(ModeLoc, "expected a swizzle mode");
7192     }
7193 
7194     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7195   }
7196 
7197   return false;
7198 }
7199 
7200 OperandMatchResultTy
7201 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7202   SMLoc S = getLoc();
7203   int64_t Imm = 0;
7204 
7205   if (trySkipId("offset")) {
7206 
7207     bool Ok = false;
7208     if (skipToken(AsmToken::Colon, "expected a colon")) {
7209       if (trySkipId("swizzle")) {
7210         Ok = parseSwizzleMacro(Imm);
7211       } else {
7212         Ok = parseSwizzleOffset(Imm);
7213       }
7214     }
7215 
7216     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7217 
7218     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7219   } else {
7220     // Swizzle "offset" operand is optional.
7221     // If it is omitted, try parsing other optional operands.
7222     return parseOptionalOpr(Operands);
7223   }
7224 }
7225 
7226 bool
7227 AMDGPUOperand::isSwizzle() const {
7228   return isImmTy(ImmTySwizzle);
7229 }
7230 
7231 //===----------------------------------------------------------------------===//
7232 // VGPR Index Mode
7233 //===----------------------------------------------------------------------===//
7234 
7235 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7236 
7237   using namespace llvm::AMDGPU::VGPRIndexMode;
7238 
7239   if (trySkipToken(AsmToken::RParen)) {
7240     return OFF;
7241   }
7242 
7243   int64_t Imm = 0;
7244 
7245   while (true) {
7246     unsigned Mode = 0;
7247     SMLoc S = getLoc();
7248 
7249     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7250       if (trySkipId(IdSymbolic[ModeId])) {
7251         Mode = 1 << ModeId;
7252         break;
7253       }
7254     }
7255 
7256     if (Mode == 0) {
7257       Error(S, (Imm == 0)?
7258                "expected a VGPR index mode or a closing parenthesis" :
7259                "expected a VGPR index mode");
7260       return UNDEF;
7261     }
7262 
7263     if (Imm & Mode) {
7264       Error(S, "duplicate VGPR index mode");
7265       return UNDEF;
7266     }
7267     Imm |= Mode;
7268 
7269     if (trySkipToken(AsmToken::RParen))
7270       break;
7271     if (!skipToken(AsmToken::Comma,
7272                    "expected a comma or a closing parenthesis"))
7273       return UNDEF;
7274   }
7275 
7276   return Imm;
7277 }
7278 
7279 OperandMatchResultTy
7280 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7281 
7282   using namespace llvm::AMDGPU::VGPRIndexMode;
7283 
7284   int64_t Imm = 0;
7285   SMLoc S = getLoc();
7286 
7287   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7288     Imm = parseGPRIdxMacro();
7289     if (Imm == UNDEF)
7290       return MatchOperand_ParseFail;
7291   } else {
7292     if (getParser().parseAbsoluteExpression(Imm))
7293       return MatchOperand_ParseFail;
7294     if (Imm < 0 || !isUInt<4>(Imm)) {
7295       Error(S, "invalid immediate: only 4-bit values are legal");
7296       return MatchOperand_ParseFail;
7297     }
7298   }
7299 
7300   Operands.push_back(
7301       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7302   return MatchOperand_Success;
7303 }
7304 
7305 bool AMDGPUOperand::isGPRIdxMode() const {
7306   return isImmTy(ImmTyGprIdxMode);
7307 }
7308 
7309 //===----------------------------------------------------------------------===//
7310 // sopp branch targets
7311 //===----------------------------------------------------------------------===//
7312 
7313 OperandMatchResultTy
7314 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7315 
7316   // Make sure we are not parsing something
7317   // that looks like a label or an expression but is not.
7318   // This will improve error messages.
7319   if (isRegister() || isModifier())
7320     return MatchOperand_NoMatch;
7321 
7322   if (!parseExpr(Operands))
7323     return MatchOperand_ParseFail;
7324 
7325   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7326   assert(Opr.isImm() || Opr.isExpr());
7327   SMLoc Loc = Opr.getStartLoc();
7328 
7329   // Currently we do not support arbitrary expressions as branch targets.
7330   // Only labels and absolute expressions are accepted.
7331   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7332     Error(Loc, "expected an absolute expression or a label");
7333   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7334     Error(Loc, "expected a 16-bit signed jump offset");
7335   }
7336 
7337   return MatchOperand_Success;
7338 }
7339 
7340 //===----------------------------------------------------------------------===//
7341 // Boolean holding registers
7342 //===----------------------------------------------------------------------===//
7343 
7344 OperandMatchResultTy
7345 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7346   return parseReg(Operands);
7347 }
7348 
7349 //===----------------------------------------------------------------------===//
7350 // mubuf
7351 //===----------------------------------------------------------------------===//
7352 
7353 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7354   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7355 }
7356 
7357 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7358                                    const OperandVector &Operands,
7359                                    bool IsAtomic,
7360                                    bool IsLds) {
7361   OptionalImmIndexMap OptionalIdx;
7362   unsigned FirstOperandIdx = 1;
7363   bool IsAtomicReturn = false;
7364 
7365   if (IsAtomic) {
7366     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7367       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7368       if (!Op.isCPol())
7369         continue;
7370       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7371       break;
7372     }
7373 
7374     if (!IsAtomicReturn) {
7375       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7376       if (NewOpc != -1)
7377         Inst.setOpcode(NewOpc);
7378     }
7379 
7380     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7381                       SIInstrFlags::IsAtomicRet;
7382   }
7383 
7384   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7385     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7386 
7387     // Add the register arguments
7388     if (Op.isReg()) {
7389       Op.addRegOperands(Inst, 1);
7390       // Insert a tied src for atomic return dst.
7391       // This cannot be postponed as subsequent calls to
7392       // addImmOperands rely on correct number of MC operands.
7393       if (IsAtomicReturn && i == FirstOperandIdx)
7394         Op.addRegOperands(Inst, 1);
7395       continue;
7396     }
7397 
7398     // Handle the case where soffset is an immediate
7399     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7400       Op.addImmOperands(Inst, 1);
7401       continue;
7402     }
7403 
7404     // Handle tokens like 'offen' which are sometimes hard-coded into the
7405     // asm string.  There are no MCInst operands for these.
7406     if (Op.isToken()) {
7407       continue;
7408     }
7409     assert(Op.isImm());
7410 
7411     // Handle optional arguments
7412     OptionalIdx[Op.getImmTy()] = i;
7413   }
7414 
7415   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7416   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7417 
7418   if (!IsLds) { // tfe is not legal with lds opcodes
7419     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7420   }
7421   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7422 }
7423 
7424 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7425   OptionalImmIndexMap OptionalIdx;
7426 
7427   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7428     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7429 
7430     // Add the register arguments
7431     if (Op.isReg()) {
7432       Op.addRegOperands(Inst, 1);
7433       continue;
7434     }
7435 
7436     // Handle the case where soffset is an immediate
7437     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7438       Op.addImmOperands(Inst, 1);
7439       continue;
7440     }
7441 
7442     // Handle tokens like 'offen' which are sometimes hard-coded into the
7443     // asm string.  There are no MCInst operands for these.
7444     if (Op.isToken()) {
7445       continue;
7446     }
7447     assert(Op.isImm());
7448 
7449     // Handle optional arguments
7450     OptionalIdx[Op.getImmTy()] = i;
7451   }
7452 
7453   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7454                         AMDGPUOperand::ImmTyOffset);
7455   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7456   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7457   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7458   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7459 }
7460 
7461 //===----------------------------------------------------------------------===//
7462 // mimg
7463 //===----------------------------------------------------------------------===//
7464 
7465 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7466                               bool IsAtomic) {
7467   unsigned I = 1;
7468   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7469   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7470     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7471   }
7472 
7473   if (IsAtomic) {
7474     // Add src, same as dst
7475     assert(Desc.getNumDefs() == 1);
7476     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7477   }
7478 
7479   OptionalImmIndexMap OptionalIdx;
7480 
7481   for (unsigned E = Operands.size(); I != E; ++I) {
7482     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7483 
7484     // Add the register arguments
7485     if (Op.isReg()) {
7486       Op.addRegOperands(Inst, 1);
7487     } else if (Op.isImmModifier()) {
7488       OptionalIdx[Op.getImmTy()] = I;
7489     } else if (!Op.isToken()) {
7490       llvm_unreachable("unexpected operand type");
7491     }
7492   }
7493 
7494   bool IsGFX10Plus = isGFX10Plus();
7495 
7496   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7497   if (IsGFX10Plus)
7498     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7499   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7500   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7501   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7502   if (IsGFX10Plus)
7503     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7504   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7505     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7506   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7507   if (!IsGFX10Plus)
7508     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7509   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7510 }
7511 
7512 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7513   cvtMIMG(Inst, Operands, true);
7514 }
7515 
7516 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7517   OptionalImmIndexMap OptionalIdx;
7518   bool IsAtomicReturn = false;
7519 
7520   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7521     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7522     if (!Op.isCPol())
7523       continue;
7524     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7525     break;
7526   }
7527 
7528   if (!IsAtomicReturn) {
7529     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7530     if (NewOpc != -1)
7531       Inst.setOpcode(NewOpc);
7532   }
7533 
7534   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7535                     SIInstrFlags::IsAtomicRet;
7536 
7537   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7538     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7539 
7540     // Add the register arguments
7541     if (Op.isReg()) {
7542       Op.addRegOperands(Inst, 1);
7543       if (IsAtomicReturn && i == 1)
7544         Op.addRegOperands(Inst, 1);
7545       continue;
7546     }
7547 
7548     // Handle the case where soffset is an immediate
7549     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7550       Op.addImmOperands(Inst, 1);
7551       continue;
7552     }
7553 
7554     // Handle tokens like 'offen' which are sometimes hard-coded into the
7555     // asm string.  There are no MCInst operands for these.
7556     if (Op.isToken()) {
7557       continue;
7558     }
7559     assert(Op.isImm());
7560 
7561     // Handle optional arguments
7562     OptionalIdx[Op.getImmTy()] = i;
7563   }
7564 
7565   if ((int)Inst.getNumOperands() <=
7566       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7567     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7568   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7569 }
7570 
7571 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7572                                       const OperandVector &Operands) {
7573   for (unsigned I = 1; I < Operands.size(); ++I) {
7574     auto &Operand = (AMDGPUOperand &)*Operands[I];
7575     if (Operand.isReg())
7576       Operand.addRegOperands(Inst, 1);
7577   }
7578 
7579   Inst.addOperand(MCOperand::createImm(1)); // a16
7580 }
7581 
7582 //===----------------------------------------------------------------------===//
7583 // smrd
7584 //===----------------------------------------------------------------------===//
7585 
7586 bool AMDGPUOperand::isSMRDOffset8() const {
7587   return isImm() && isUInt<8>(getImm());
7588 }
7589 
7590 bool AMDGPUOperand::isSMEMOffset() const {
7591   return isImm(); // Offset range is checked later by validator.
7592 }
7593 
7594 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7595   // 32-bit literals are only supported on CI and we only want to use them
7596   // when the offset is > 8-bits.
7597   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7598 }
7599 
7600 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7601   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7602 }
7603 
7604 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7605   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7606 }
7607 
7608 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7609   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7610 }
7611 
7612 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7613   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7614 }
7615 
7616 //===----------------------------------------------------------------------===//
7617 // vop3
7618 //===----------------------------------------------------------------------===//
7619 
7620 static bool ConvertOmodMul(int64_t &Mul) {
7621   if (Mul != 1 && Mul != 2 && Mul != 4)
7622     return false;
7623 
7624   Mul >>= 1;
7625   return true;
7626 }
7627 
7628 static bool ConvertOmodDiv(int64_t &Div) {
7629   if (Div == 1) {
7630     Div = 0;
7631     return true;
7632   }
7633 
7634   if (Div == 2) {
7635     Div = 3;
7636     return true;
7637   }
7638 
7639   return false;
7640 }
7641 
7642 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7643 // This is intentional and ensures compatibility with sp3.
7644 // See bug 35397 for details.
7645 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7646   if (BoundCtrl == 0 || BoundCtrl == 1) {
7647     BoundCtrl = 1;
7648     return true;
7649   }
7650   return false;
7651 }
7652 
7653 // Note: the order in this table matches the order of operands in AsmString.
7654 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7655   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7656   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7657   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7658   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7659   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7660   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7661   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7662   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7663   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7664   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7665   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7666   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7667   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7668   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7669   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7670   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7671   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7672   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7673   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7674   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7675   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7676   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7677   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7678   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7679   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7680   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7681   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7682   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7683   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7684   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7685   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7686   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7687   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7688   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7689   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7690   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7691   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7692   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7693   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7694   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7695   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7696 };
7697 
7698 void AMDGPUAsmParser::onBeginOfFile() {
7699   if (!getParser().getStreamer().getTargetStreamer() ||
7700       getSTI().getTargetTriple().getArch() == Triple::r600)
7701     return;
7702 
7703   if (!getTargetStreamer().getTargetID())
7704     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7705 
7706   if (isHsaAbiVersion3AndAbove(&getSTI()))
7707     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7708 }
7709 
7710 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7711 
7712   OperandMatchResultTy res = parseOptionalOpr(Operands);
7713 
7714   // This is a hack to enable hardcoded mandatory operands which follow
7715   // optional operands.
7716   //
7717   // Current design assumes that all operands after the first optional operand
7718   // are also optional. However implementation of some instructions violates
7719   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7720   //
7721   // To alleviate this problem, we have to (implicitly) parse extra operands
7722   // to make sure autogenerated parser of custom operands never hit hardcoded
7723   // mandatory operands.
7724 
7725   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7726     if (res != MatchOperand_Success ||
7727         isToken(AsmToken::EndOfStatement))
7728       break;
7729 
7730     trySkipToken(AsmToken::Comma);
7731     res = parseOptionalOpr(Operands);
7732   }
7733 
7734   return res;
7735 }
7736 
7737 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7738   OperandMatchResultTy res;
7739   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7740     // try to parse any optional operand here
7741     if (Op.IsBit) {
7742       res = parseNamedBit(Op.Name, Operands, Op.Type);
7743     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7744       res = parseOModOperand(Operands);
7745     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7746                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7747                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7748       res = parseSDWASel(Operands, Op.Name, Op.Type);
7749     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7750       res = parseSDWADstUnused(Operands);
7751     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7752                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7753                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7754                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7755       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7756                                         Op.ConvertResult);
7757     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7758       res = parseDim(Operands);
7759     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7760       res = parseCPol(Operands);
7761     } else {
7762       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7763       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
7764         res = parseOperandArrayWithPrefix("neg", Operands,
7765                                           AMDGPUOperand::ImmTyBLGP,
7766                                           nullptr);
7767       }
7768     }
7769     if (res != MatchOperand_NoMatch) {
7770       return res;
7771     }
7772   }
7773   return MatchOperand_NoMatch;
7774 }
7775 
7776 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7777   StringRef Name = getTokenStr();
7778   if (Name == "mul") {
7779     return parseIntWithPrefix("mul", Operands,
7780                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7781   }
7782 
7783   if (Name == "div") {
7784     return parseIntWithPrefix("div", Operands,
7785                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7786   }
7787 
7788   return MatchOperand_NoMatch;
7789 }
7790 
7791 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7792   cvtVOP3P(Inst, Operands);
7793 
7794   int Opc = Inst.getOpcode();
7795 
7796   int SrcNum;
7797   const int Ops[] = { AMDGPU::OpName::src0,
7798                       AMDGPU::OpName::src1,
7799                       AMDGPU::OpName::src2 };
7800   for (SrcNum = 0;
7801        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7802        ++SrcNum);
7803   assert(SrcNum > 0);
7804 
7805   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7806   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7807 
7808   if ((OpSel & (1 << SrcNum)) != 0) {
7809     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7810     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7811     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7812   }
7813 }
7814 
7815 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7816       // 1. This operand is input modifiers
7817   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7818       // 2. This is not last operand
7819       && Desc.NumOperands > (OpNum + 1)
7820       // 3. Next operand is register class
7821       && Desc.OpInfo[OpNum + 1].RegClass != -1
7822       // 4. Next register is not tied to any other operand
7823       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7824 }
7825 
7826 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7827 {
7828   OptionalImmIndexMap OptionalIdx;
7829   unsigned Opc = Inst.getOpcode();
7830 
7831   unsigned I = 1;
7832   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7833   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7834     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7835   }
7836 
7837   for (unsigned E = Operands.size(); I != E; ++I) {
7838     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7839     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7840       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7841     } else if (Op.isInterpSlot() ||
7842                Op.isInterpAttr() ||
7843                Op.isAttrChan()) {
7844       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7845     } else if (Op.isImmModifier()) {
7846       OptionalIdx[Op.getImmTy()] = I;
7847     } else {
7848       llvm_unreachable("unhandled operand type");
7849     }
7850   }
7851 
7852   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7853     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7854   }
7855 
7856   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7857     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7858   }
7859 
7860   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7861     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7862   }
7863 }
7864 
7865 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7866                               OptionalImmIndexMap &OptionalIdx) {
7867   unsigned Opc = Inst.getOpcode();
7868 
7869   unsigned I = 1;
7870   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7871   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7872     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7873   }
7874 
7875   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7876     // This instruction has src modifiers
7877     for (unsigned E = Operands.size(); I != E; ++I) {
7878       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7879       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7880         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7881       } else if (Op.isImmModifier()) {
7882         OptionalIdx[Op.getImmTy()] = I;
7883       } else if (Op.isRegOrImm()) {
7884         Op.addRegOrImmOperands(Inst, 1);
7885       } else {
7886         llvm_unreachable("unhandled operand type");
7887       }
7888     }
7889   } else {
7890     // No src modifiers
7891     for (unsigned E = Operands.size(); I != E; ++I) {
7892       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7893       if (Op.isMod()) {
7894         OptionalIdx[Op.getImmTy()] = I;
7895       } else {
7896         Op.addRegOrImmOperands(Inst, 1);
7897       }
7898     }
7899   }
7900 
7901   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7902     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7903   }
7904 
7905   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7906     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7907   }
7908 
7909   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7910   // it has src2 register operand that is tied to dst operand
7911   // we don't allow modifiers for this operand in assembler so src2_modifiers
7912   // should be 0.
7913   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7914       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7915       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7916       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7917       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7918       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7919       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7920       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7921       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7922       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7923       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7924     auto it = Inst.begin();
7925     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7926     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7927     ++it;
7928     // Copy the operand to ensure it's not invalidated when Inst grows.
7929     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7930   }
7931 }
7932 
7933 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7934   OptionalImmIndexMap OptionalIdx;
7935   cvtVOP3(Inst, Operands, OptionalIdx);
7936 }
7937 
7938 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7939                                OptionalImmIndexMap &OptIdx) {
7940   const int Opc = Inst.getOpcode();
7941   const MCInstrDesc &Desc = MII.get(Opc);
7942 
7943   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7944 
7945   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7946     assert(!IsPacked);
7947     Inst.addOperand(Inst.getOperand(0));
7948   }
7949 
7950   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7951   // instruction, and then figure out where to actually put the modifiers
7952 
7953   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7954   if (OpSelIdx != -1) {
7955     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7956   }
7957 
7958   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7959   if (OpSelHiIdx != -1) {
7960     int DefaultVal = IsPacked ? -1 : 0;
7961     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7962                           DefaultVal);
7963   }
7964 
7965   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7966   if (NegLoIdx != -1) {
7967     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7968     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7969   }
7970 
7971   const int Ops[] = { AMDGPU::OpName::src0,
7972                       AMDGPU::OpName::src1,
7973                       AMDGPU::OpName::src2 };
7974   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7975                          AMDGPU::OpName::src1_modifiers,
7976                          AMDGPU::OpName::src2_modifiers };
7977 
7978   unsigned OpSel = 0;
7979   unsigned OpSelHi = 0;
7980   unsigned NegLo = 0;
7981   unsigned NegHi = 0;
7982 
7983   if (OpSelIdx != -1)
7984     OpSel = Inst.getOperand(OpSelIdx).getImm();
7985 
7986   if (OpSelHiIdx != -1)
7987     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7988 
7989   if (NegLoIdx != -1) {
7990     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7991     NegLo = Inst.getOperand(NegLoIdx).getImm();
7992     NegHi = Inst.getOperand(NegHiIdx).getImm();
7993   }
7994 
7995   for (int J = 0; J < 3; ++J) {
7996     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7997     if (OpIdx == -1)
7998       break;
7999 
8000     uint32_t ModVal = 0;
8001 
8002     if ((OpSel & (1 << J)) != 0)
8003       ModVal |= SISrcMods::OP_SEL_0;
8004 
8005     if ((OpSelHi & (1 << J)) != 0)
8006       ModVal |= SISrcMods::OP_SEL_1;
8007 
8008     if ((NegLo & (1 << J)) != 0)
8009       ModVal |= SISrcMods::NEG;
8010 
8011     if ((NegHi & (1 << J)) != 0)
8012       ModVal |= SISrcMods::NEG_HI;
8013 
8014     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8015 
8016     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8017   }
8018 }
8019 
8020 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8021   OptionalImmIndexMap OptIdx;
8022   cvtVOP3(Inst, Operands, OptIdx);
8023   cvtVOP3P(Inst, Operands, OptIdx);
8024 }
8025 
8026 //===----------------------------------------------------------------------===//
8027 // dpp
8028 //===----------------------------------------------------------------------===//
8029 
8030 bool AMDGPUOperand::isDPP8() const {
8031   return isImmTy(ImmTyDPP8);
8032 }
8033 
8034 bool AMDGPUOperand::isDPPCtrl() const {
8035   using namespace AMDGPU::DPP;
8036 
8037   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8038   if (result) {
8039     int64_t Imm = getImm();
8040     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8041            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8042            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8043            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8044            (Imm == DppCtrl::WAVE_SHL1) ||
8045            (Imm == DppCtrl::WAVE_ROL1) ||
8046            (Imm == DppCtrl::WAVE_SHR1) ||
8047            (Imm == DppCtrl::WAVE_ROR1) ||
8048            (Imm == DppCtrl::ROW_MIRROR) ||
8049            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8050            (Imm == DppCtrl::BCAST15) ||
8051            (Imm == DppCtrl::BCAST31) ||
8052            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8053            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8054   }
8055   return false;
8056 }
8057 
8058 //===----------------------------------------------------------------------===//
8059 // mAI
8060 //===----------------------------------------------------------------------===//
8061 
8062 bool AMDGPUOperand::isBLGP() const {
8063   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8064 }
8065 
8066 bool AMDGPUOperand::isCBSZ() const {
8067   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8068 }
8069 
8070 bool AMDGPUOperand::isABID() const {
8071   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8072 }
8073 
8074 bool AMDGPUOperand::isS16Imm() const {
8075   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8076 }
8077 
8078 bool AMDGPUOperand::isU16Imm() const {
8079   return isImm() && isUInt<16>(getImm());
8080 }
8081 
8082 //===----------------------------------------------------------------------===//
8083 // dim
8084 //===----------------------------------------------------------------------===//
8085 
8086 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8087   // We want to allow "dim:1D" etc.,
8088   // but the initial 1 is tokenized as an integer.
8089   std::string Token;
8090   if (isToken(AsmToken::Integer)) {
8091     SMLoc Loc = getToken().getEndLoc();
8092     Token = std::string(getTokenStr());
8093     lex();
8094     if (getLoc() != Loc)
8095       return false;
8096   }
8097 
8098   StringRef Suffix;
8099   if (!parseId(Suffix))
8100     return false;
8101   Token += Suffix;
8102 
8103   StringRef DimId = Token;
8104   if (DimId.startswith("SQ_RSRC_IMG_"))
8105     DimId = DimId.drop_front(12);
8106 
8107   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8108   if (!DimInfo)
8109     return false;
8110 
8111   Encoding = DimInfo->Encoding;
8112   return true;
8113 }
8114 
8115 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8116   if (!isGFX10Plus())
8117     return MatchOperand_NoMatch;
8118 
8119   SMLoc S = getLoc();
8120 
8121   if (!trySkipId("dim", AsmToken::Colon))
8122     return MatchOperand_NoMatch;
8123 
8124   unsigned Encoding;
8125   SMLoc Loc = getLoc();
8126   if (!parseDimId(Encoding)) {
8127     Error(Loc, "invalid dim value");
8128     return MatchOperand_ParseFail;
8129   }
8130 
8131   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8132                                               AMDGPUOperand::ImmTyDim));
8133   return MatchOperand_Success;
8134 }
8135 
8136 //===----------------------------------------------------------------------===//
8137 // dpp
8138 //===----------------------------------------------------------------------===//
8139 
8140 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8141   SMLoc S = getLoc();
8142 
8143   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8144     return MatchOperand_NoMatch;
8145 
8146   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8147 
8148   int64_t Sels[8];
8149 
8150   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8151     return MatchOperand_ParseFail;
8152 
8153   for (size_t i = 0; i < 8; ++i) {
8154     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8155       return MatchOperand_ParseFail;
8156 
8157     SMLoc Loc = getLoc();
8158     if (getParser().parseAbsoluteExpression(Sels[i]))
8159       return MatchOperand_ParseFail;
8160     if (0 > Sels[i] || 7 < Sels[i]) {
8161       Error(Loc, "expected a 3-bit value");
8162       return MatchOperand_ParseFail;
8163     }
8164   }
8165 
8166   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8167     return MatchOperand_ParseFail;
8168 
8169   unsigned DPP8 = 0;
8170   for (size_t i = 0; i < 8; ++i)
8171     DPP8 |= (Sels[i] << (i * 3));
8172 
8173   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8174   return MatchOperand_Success;
8175 }
8176 
8177 bool
8178 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8179                                     const OperandVector &Operands) {
8180   if (Ctrl == "row_newbcast")
8181     return isGFX90A();
8182 
8183   if (Ctrl == "row_share" ||
8184       Ctrl == "row_xmask")
8185     return isGFX10Plus();
8186 
8187   if (Ctrl == "wave_shl" ||
8188       Ctrl == "wave_shr" ||
8189       Ctrl == "wave_rol" ||
8190       Ctrl == "wave_ror" ||
8191       Ctrl == "row_bcast")
8192     return isVI() || isGFX9();
8193 
8194   return Ctrl == "row_mirror" ||
8195          Ctrl == "row_half_mirror" ||
8196          Ctrl == "quad_perm" ||
8197          Ctrl == "row_shl" ||
8198          Ctrl == "row_shr" ||
8199          Ctrl == "row_ror";
8200 }
8201 
8202 int64_t
8203 AMDGPUAsmParser::parseDPPCtrlPerm() {
8204   // quad_perm:[%d,%d,%d,%d]
8205 
8206   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8207     return -1;
8208 
8209   int64_t Val = 0;
8210   for (int i = 0; i < 4; ++i) {
8211     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8212       return -1;
8213 
8214     int64_t Temp;
8215     SMLoc Loc = getLoc();
8216     if (getParser().parseAbsoluteExpression(Temp))
8217       return -1;
8218     if (Temp < 0 || Temp > 3) {
8219       Error(Loc, "expected a 2-bit value");
8220       return -1;
8221     }
8222 
8223     Val += (Temp << i * 2);
8224   }
8225 
8226   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8227     return -1;
8228 
8229   return Val;
8230 }
8231 
8232 int64_t
8233 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8234   using namespace AMDGPU::DPP;
8235 
8236   // sel:%d
8237 
8238   int64_t Val;
8239   SMLoc Loc = getLoc();
8240 
8241   if (getParser().parseAbsoluteExpression(Val))
8242     return -1;
8243 
8244   struct DppCtrlCheck {
8245     int64_t Ctrl;
8246     int Lo;
8247     int Hi;
8248   };
8249 
8250   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8251     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8252     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8253     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8254     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8255     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8256     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8257     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8258     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8259     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8260     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8261     .Default({-1, 0, 0});
8262 
8263   bool Valid;
8264   if (Check.Ctrl == -1) {
8265     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8266     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8267   } else {
8268     Valid = Check.Lo <= Val && Val <= Check.Hi;
8269     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8270   }
8271 
8272   if (!Valid) {
8273     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8274     return -1;
8275   }
8276 
8277   return Val;
8278 }
8279 
8280 OperandMatchResultTy
8281 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8282   using namespace AMDGPU::DPP;
8283 
8284   if (!isToken(AsmToken::Identifier) ||
8285       !isSupportedDPPCtrl(getTokenStr(), Operands))
8286     return MatchOperand_NoMatch;
8287 
8288   SMLoc S = getLoc();
8289   int64_t Val = -1;
8290   StringRef Ctrl;
8291 
8292   parseId(Ctrl);
8293 
8294   if (Ctrl == "row_mirror") {
8295     Val = DppCtrl::ROW_MIRROR;
8296   } else if (Ctrl == "row_half_mirror") {
8297     Val = DppCtrl::ROW_HALF_MIRROR;
8298   } else {
8299     if (skipToken(AsmToken::Colon, "expected a colon")) {
8300       if (Ctrl == "quad_perm") {
8301         Val = parseDPPCtrlPerm();
8302       } else {
8303         Val = parseDPPCtrlSel(Ctrl);
8304       }
8305     }
8306   }
8307 
8308   if (Val == -1)
8309     return MatchOperand_ParseFail;
8310 
8311   Operands.push_back(
8312     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8313   return MatchOperand_Success;
8314 }
8315 
8316 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8317   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8318 }
8319 
8320 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8321   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8322 }
8323 
8324 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8325   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8326 }
8327 
8328 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8329   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8330 }
8331 
8332 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8333   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8334 }
8335 
8336 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8337   OptionalImmIndexMap OptionalIdx;
8338 
8339   unsigned Opc = Inst.getOpcode();
8340   bool HasModifiers =
8341       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8342   unsigned I = 1;
8343   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8344   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8345     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8346   }
8347 
8348   int Fi = 0;
8349   for (unsigned E = Operands.size(); I != E; ++I) {
8350     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8351                                             MCOI::TIED_TO);
8352     if (TiedTo != -1) {
8353       assert((unsigned)TiedTo < Inst.getNumOperands());
8354       // handle tied old or src2 for MAC instructions
8355       Inst.addOperand(Inst.getOperand(TiedTo));
8356     }
8357     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8358     // Add the register arguments
8359     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8360       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8361       // Skip it.
8362       continue;
8363     }
8364 
8365     if (IsDPP8) {
8366       if (Op.isDPP8()) {
8367         Op.addImmOperands(Inst, 1);
8368       } else if (HasModifiers &&
8369                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8370         Op.addRegWithFPInputModsOperands(Inst, 2);
8371       } else if (Op.isFI()) {
8372         Fi = Op.getImm();
8373       } else if (Op.isReg()) {
8374         Op.addRegOperands(Inst, 1);
8375       } else {
8376         llvm_unreachable("Invalid operand type");
8377       }
8378     } else {
8379       if (HasModifiers &&
8380           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8381         Op.addRegWithFPInputModsOperands(Inst, 2);
8382       } else if (Op.isReg()) {
8383         Op.addRegOperands(Inst, 1);
8384       } else if (Op.isDPPCtrl()) {
8385         Op.addImmOperands(Inst, 1);
8386       } else if (Op.isImm()) {
8387         // Handle optional arguments
8388         OptionalIdx[Op.getImmTy()] = I;
8389       } else {
8390         llvm_unreachable("Invalid operand type");
8391       }
8392     }
8393   }
8394 
8395   if (IsDPP8) {
8396     using namespace llvm::AMDGPU::DPP;
8397     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8398   } else {
8399     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8400     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8401     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8402     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8403       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8404     }
8405   }
8406 }
8407 
8408 //===----------------------------------------------------------------------===//
8409 // sdwa
8410 //===----------------------------------------------------------------------===//
8411 
8412 OperandMatchResultTy
8413 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8414                               AMDGPUOperand::ImmTy Type) {
8415   using namespace llvm::AMDGPU::SDWA;
8416 
8417   SMLoc S = getLoc();
8418   StringRef Value;
8419   OperandMatchResultTy res;
8420 
8421   SMLoc StringLoc;
8422   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8423   if (res != MatchOperand_Success) {
8424     return res;
8425   }
8426 
8427   int64_t Int;
8428   Int = StringSwitch<int64_t>(Value)
8429         .Case("BYTE_0", SdwaSel::BYTE_0)
8430         .Case("BYTE_1", SdwaSel::BYTE_1)
8431         .Case("BYTE_2", SdwaSel::BYTE_2)
8432         .Case("BYTE_3", SdwaSel::BYTE_3)
8433         .Case("WORD_0", SdwaSel::WORD_0)
8434         .Case("WORD_1", SdwaSel::WORD_1)
8435         .Case("DWORD", SdwaSel::DWORD)
8436         .Default(0xffffffff);
8437 
8438   if (Int == 0xffffffff) {
8439     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8440     return MatchOperand_ParseFail;
8441   }
8442 
8443   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8444   return MatchOperand_Success;
8445 }
8446 
8447 OperandMatchResultTy
8448 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8449   using namespace llvm::AMDGPU::SDWA;
8450 
8451   SMLoc S = getLoc();
8452   StringRef Value;
8453   OperandMatchResultTy res;
8454 
8455   SMLoc StringLoc;
8456   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8457   if (res != MatchOperand_Success) {
8458     return res;
8459   }
8460 
8461   int64_t Int;
8462   Int = StringSwitch<int64_t>(Value)
8463         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8464         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8465         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8466         .Default(0xffffffff);
8467 
8468   if (Int == 0xffffffff) {
8469     Error(StringLoc, "invalid dst_unused value");
8470     return MatchOperand_ParseFail;
8471   }
8472 
8473   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8474   return MatchOperand_Success;
8475 }
8476 
8477 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8478   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8479 }
8480 
8481 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8482   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8483 }
8484 
8485 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8486   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8487 }
8488 
8489 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8490   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8491 }
8492 
8493 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8494   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8495 }
8496 
8497 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8498                               uint64_t BasicInstType,
8499                               bool SkipDstVcc,
8500                               bool SkipSrcVcc) {
8501   using namespace llvm::AMDGPU::SDWA;
8502 
8503   OptionalImmIndexMap OptionalIdx;
8504   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8505   bool SkippedVcc = false;
8506 
8507   unsigned I = 1;
8508   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8509   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8510     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8511   }
8512 
8513   for (unsigned E = Operands.size(); I != E; ++I) {
8514     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8515     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8516         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8517       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8518       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8519       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8520       // Skip VCC only if we didn't skip it on previous iteration.
8521       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8522       if (BasicInstType == SIInstrFlags::VOP2 &&
8523           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8524            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8525         SkippedVcc = true;
8526         continue;
8527       } else if (BasicInstType == SIInstrFlags::VOPC &&
8528                  Inst.getNumOperands() == 0) {
8529         SkippedVcc = true;
8530         continue;
8531       }
8532     }
8533     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8534       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8535     } else if (Op.isImm()) {
8536       // Handle optional arguments
8537       OptionalIdx[Op.getImmTy()] = I;
8538     } else {
8539       llvm_unreachable("Invalid operand type");
8540     }
8541     SkippedVcc = false;
8542   }
8543 
8544   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8545       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8546       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8547     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8548     switch (BasicInstType) {
8549     case SIInstrFlags::VOP1:
8550       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8551       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8552         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8553       }
8554       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8555       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8556       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8557       break;
8558 
8559     case SIInstrFlags::VOP2:
8560       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8561       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8562         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8563       }
8564       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8565       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8566       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8567       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8568       break;
8569 
8570     case SIInstrFlags::VOPC:
8571       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8572         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8573       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8574       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8575       break;
8576 
8577     default:
8578       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8579     }
8580   }
8581 
8582   // special case v_mac_{f16, f32}:
8583   // it has src2 register operand that is tied to dst operand
8584   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8585       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8586     auto it = Inst.begin();
8587     std::advance(
8588       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8589     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8590   }
8591 }
8592 
8593 //===----------------------------------------------------------------------===//
8594 // mAI
8595 //===----------------------------------------------------------------------===//
8596 
8597 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8598   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8599 }
8600 
8601 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8602   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8603 }
8604 
8605 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8606   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8607 }
8608 
8609 /// Force static initialization.
8610 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8611   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8612   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8613 }
8614 
8615 #define GET_REGISTER_MATCHER
8616 #define GET_MATCHER_IMPLEMENTATION
8617 #define GET_MNEMONIC_SPELL_CHECKER
8618 #define GET_MNEMONIC_CHECKER
8619 #include "AMDGPUGenAsmMatcher.inc"
8620 
8621 // This function should be defined after auto-generated include so that we have
8622 // MatchClassKind enum defined
8623 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8624                                                      unsigned Kind) {
8625   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8626   // But MatchInstructionImpl() expects to meet token and fails to validate
8627   // operand. This method checks if we are given immediate operand but expect to
8628   // get corresponding token.
8629   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8630   switch (Kind) {
8631   case MCK_addr64:
8632     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8633   case MCK_gds:
8634     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8635   case MCK_lds:
8636     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8637   case MCK_idxen:
8638     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8639   case MCK_offen:
8640     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8641   case MCK_SSrcB32:
8642     // When operands have expression values, they will return true for isToken,
8643     // because it is not possible to distinguish between a token and an
8644     // expression at parse time. MatchInstructionImpl() will always try to
8645     // match an operand as a token, when isToken returns true, and when the
8646     // name of the expression is not a valid token, the match will fail,
8647     // so we need to handle it here.
8648     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8649   case MCK_SSrcF32:
8650     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8651   case MCK_SoppBrTarget:
8652     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8653   case MCK_VReg32OrOff:
8654     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8655   case MCK_InterpSlot:
8656     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8657   case MCK_Attr:
8658     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8659   case MCK_AttrChan:
8660     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8661   case MCK_ImmSMEMOffset:
8662     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8663   case MCK_SReg_64:
8664   case MCK_SReg_64_XEXEC:
8665     // Null is defined as a 32-bit register but
8666     // it should also be enabled with 64-bit operands.
8667     // The following code enables it for SReg_64 operands
8668     // used as source and destination. Remaining source
8669     // operands are handled in isInlinableImm.
8670     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8671   default:
8672     return Match_InvalidOperand;
8673   }
8674 }
8675 
8676 //===----------------------------------------------------------------------===//
8677 // endpgm
8678 //===----------------------------------------------------------------------===//
8679 
8680 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8681   SMLoc S = getLoc();
8682   int64_t Imm = 0;
8683 
8684   if (!parseExpr(Imm)) {
8685     // The operand is optional, if not present default to 0
8686     Imm = 0;
8687   }
8688 
8689   if (!isUInt<16>(Imm)) {
8690     Error(S, "expected a 16-bit value");
8691     return MatchOperand_ParseFail;
8692   }
8693 
8694   Operands.push_back(
8695       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8696   return MatchOperand_Success;
8697 }
8698 
8699 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8700