1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/TargetParser.h"
40 
41 using namespace llvm;
42 using namespace llvm::AMDGPU;
43 using namespace llvm::amdhsa;
44 
45 namespace {
46 
47 class AMDGPUAsmParser;
48 
49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
50 
51 //===----------------------------------------------------------------------===//
52 // Operand
53 //===----------------------------------------------------------------------===//
54 
55 class AMDGPUOperand : public MCParsedAsmOperand {
56   enum KindTy {
57     Token,
58     Immediate,
59     Register,
60     Expression
61   } Kind;
62 
63   SMLoc StartLoc, EndLoc;
64   const AMDGPUAsmParser *AsmParser;
65 
66 public:
67   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
68       : Kind(Kind_), AsmParser(AsmParser_) {}
69 
70   using Ptr = std::unique_ptr<AMDGPUOperand>;
71 
72   struct Modifiers {
73     bool Abs = false;
74     bool Neg = false;
75     bool Sext = false;
76 
77     bool hasFPModifiers() const { return Abs || Neg; }
78     bool hasIntModifiers() const { return Sext; }
79     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
80 
81     int64_t getFPModifiersOperand() const {
82       int64_t Operand = 0;
83       Operand |= Abs ? SISrcMods::ABS : 0u;
84       Operand |= Neg ? SISrcMods::NEG : 0u;
85       return Operand;
86     }
87 
88     int64_t getIntModifiersOperand() const {
89       int64_t Operand = 0;
90       Operand |= Sext ? SISrcMods::SEXT : 0u;
91       return Operand;
92     }
93 
94     int64_t getModifiersOperand() const {
95       assert(!(hasFPModifiers() && hasIntModifiers())
96            && "fp and int modifiers should not be used simultaneously");
97       if (hasFPModifiers()) {
98         return getFPModifiersOperand();
99       } else if (hasIntModifiers()) {
100         return getIntModifiersOperand();
101       } else {
102         return 0;
103       }
104     }
105 
106     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
107   };
108 
109   enum ImmTy {
110     ImmTyNone,
111     ImmTyGDS,
112     ImmTyLDS,
113     ImmTyOffen,
114     ImmTyIdxen,
115     ImmTyAddr64,
116     ImmTyOffset,
117     ImmTyInstOffset,
118     ImmTyOffset0,
119     ImmTyOffset1,
120     ImmTyCPol,
121     ImmTySWZ,
122     ImmTyTFE,
123     ImmTyD16,
124     ImmTyClampSI,
125     ImmTyOModSI,
126     ImmTyDPP8,
127     ImmTyDppCtrl,
128     ImmTyDppRowMask,
129     ImmTyDppBankMask,
130     ImmTyDppBoundCtrl,
131     ImmTyDppFi,
132     ImmTySdwaDstSel,
133     ImmTySdwaSrc0Sel,
134     ImmTySdwaSrc1Sel,
135     ImmTySdwaDstUnused,
136     ImmTyDMask,
137     ImmTyDim,
138     ImmTyUNorm,
139     ImmTyDA,
140     ImmTyR128A16,
141     ImmTyA16,
142     ImmTyLWE,
143     ImmTyExpTgt,
144     ImmTyExpCompr,
145     ImmTyExpVM,
146     ImmTyFORMAT,
147     ImmTyHwreg,
148     ImmTyOff,
149     ImmTySendMsg,
150     ImmTyInterpSlot,
151     ImmTyInterpAttr,
152     ImmTyAttrChan,
153     ImmTyOpSel,
154     ImmTyOpSelHi,
155     ImmTyNegLo,
156     ImmTyNegHi,
157     ImmTySwizzle,
158     ImmTyGprIdxMode,
159     ImmTyHigh,
160     ImmTyBLGP,
161     ImmTyCBSZ,
162     ImmTyABID,
163     ImmTyEndpgm,
164   };
165 
166   enum ImmKindTy {
167     ImmKindTyNone,
168     ImmKindTyLiteral,
169     ImmKindTyConst,
170   };
171 
172 private:
173   struct TokOp {
174     const char *Data;
175     unsigned Length;
176   };
177 
178   struct ImmOp {
179     int64_t Val;
180     ImmTy Type;
181     bool IsFPImm;
182     mutable ImmKindTy Kind;
183     Modifiers Mods;
184   };
185 
186   struct RegOp {
187     unsigned RegNo;
188     Modifiers Mods;
189   };
190 
191   union {
192     TokOp Tok;
193     ImmOp Imm;
194     RegOp Reg;
195     const MCExpr *Expr;
196   };
197 
198 public:
199   bool isToken() const override {
200     if (Kind == Token)
201       return true;
202 
203     // When parsing operands, we can't always tell if something was meant to be
204     // a token, like 'gds', or an expression that references a global variable.
205     // In this case, we assume the string is an expression, and if we need to
206     // interpret is a token, then we treat the symbol name as the token.
207     return isSymbolRefExpr();
208   }
209 
210   bool isSymbolRefExpr() const {
211     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
212   }
213 
214   bool isImm() const override {
215     return Kind == Immediate;
216   }
217 
218   void setImmKindNone() const {
219     assert(isImm());
220     Imm.Kind = ImmKindTyNone;
221   }
222 
223   void setImmKindLiteral() const {
224     assert(isImm());
225     Imm.Kind = ImmKindTyLiteral;
226   }
227 
228   void setImmKindConst() const {
229     assert(isImm());
230     Imm.Kind = ImmKindTyConst;
231   }
232 
233   bool IsImmKindLiteral() const {
234     return isImm() && Imm.Kind == ImmKindTyLiteral;
235   }
236 
237   bool isImmKindConst() const {
238     return isImm() && Imm.Kind == ImmKindTyConst;
239   }
240 
241   bool isInlinableImm(MVT type) const;
242   bool isLiteralImm(MVT type) const;
243 
244   bool isRegKind() const {
245     return Kind == Register;
246   }
247 
248   bool isReg() const override {
249     return isRegKind() && !hasModifiers();
250   }
251 
252   bool isRegOrInline(unsigned RCID, MVT type) const {
253     return isRegClass(RCID) || isInlinableImm(type);
254   }
255 
256   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
257     return isRegOrInline(RCID, type) || isLiteralImm(type);
258   }
259 
260   bool isRegOrImmWithInt16InputMods() const {
261     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
262   }
263 
264   bool isRegOrImmWithInt32InputMods() const {
265     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
266   }
267 
268   bool isRegOrImmWithInt64InputMods() const {
269     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
270   }
271 
272   bool isRegOrImmWithFP16InputMods() const {
273     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
274   }
275 
276   bool isRegOrImmWithFP32InputMods() const {
277     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
278   }
279 
280   bool isRegOrImmWithFP64InputMods() const {
281     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
282   }
283 
284   bool isVReg() const {
285     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
286            isRegClass(AMDGPU::VReg_64RegClassID) ||
287            isRegClass(AMDGPU::VReg_96RegClassID) ||
288            isRegClass(AMDGPU::VReg_128RegClassID) ||
289            isRegClass(AMDGPU::VReg_160RegClassID) ||
290            isRegClass(AMDGPU::VReg_192RegClassID) ||
291            isRegClass(AMDGPU::VReg_256RegClassID) ||
292            isRegClass(AMDGPU::VReg_512RegClassID) ||
293            isRegClass(AMDGPU::VReg_1024RegClassID);
294   }
295 
296   bool isVReg32() const {
297     return isRegClass(AMDGPU::VGPR_32RegClassID);
298   }
299 
300   bool isVReg32OrOff() const {
301     return isOff() || isVReg32();
302   }
303 
304   bool isNull() const {
305     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
306   }
307 
308   bool isVRegWithInputMods() const;
309 
310   bool isSDWAOperand(MVT type) const;
311   bool isSDWAFP16Operand() const;
312   bool isSDWAFP32Operand() const;
313   bool isSDWAInt16Operand() const;
314   bool isSDWAInt32Operand() const;
315 
316   bool isImmTy(ImmTy ImmT) const {
317     return isImm() && Imm.Type == ImmT;
318   }
319 
320   bool isImmModifier() const {
321     return isImm() && Imm.Type != ImmTyNone;
322   }
323 
324   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
325   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
326   bool isDMask() const { return isImmTy(ImmTyDMask); }
327   bool isDim() const { return isImmTy(ImmTyDim); }
328   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
329   bool isDA() const { return isImmTy(ImmTyDA); }
330   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
331   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
332   bool isLWE() const { return isImmTy(ImmTyLWE); }
333   bool isOff() const { return isImmTy(ImmTyOff); }
334   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
335   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
336   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
337   bool isOffen() const { return isImmTy(ImmTyOffen); }
338   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
339   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
340   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
341   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
342   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
343 
344   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
345   bool isGDS() const { return isImmTy(ImmTyGDS); }
346   bool isLDS() const { return isImmTy(ImmTyLDS); }
347   bool isCPol() const { return isImmTy(ImmTyCPol); }
348   bool isSWZ() const { return isImmTy(ImmTySWZ); }
349   bool isTFE() const { return isImmTy(ImmTyTFE); }
350   bool isD16() const { return isImmTy(ImmTyD16); }
351   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
352   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
353   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
354   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
355   bool isFI() const { return isImmTy(ImmTyDppFi); }
356   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
357   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
358   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
359   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
360   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
361   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
362   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
363   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
364   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
365   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
366   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
367   bool isHigh() const { return isImmTy(ImmTyHigh); }
368 
369   bool isMod() const {
370     return isClampSI() || isOModSI();
371   }
372 
373   bool isRegOrImm() const {
374     return isReg() || isImm();
375   }
376 
377   bool isRegClass(unsigned RCID) const;
378 
379   bool isInlineValue() const;
380 
381   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
382     return isRegOrInline(RCID, type) && !hasModifiers();
383   }
384 
385   bool isSCSrcB16() const {
386     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
387   }
388 
389   bool isSCSrcV2B16() const {
390     return isSCSrcB16();
391   }
392 
393   bool isSCSrcB32() const {
394     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
395   }
396 
397   bool isSCSrcB64() const {
398     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
399   }
400 
401   bool isBoolReg() const;
402 
403   bool isSCSrcF16() const {
404     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
405   }
406 
407   bool isSCSrcV2F16() const {
408     return isSCSrcF16();
409   }
410 
411   bool isSCSrcF32() const {
412     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
413   }
414 
415   bool isSCSrcF64() const {
416     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
417   }
418 
419   bool isSSrcB32() const {
420     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
421   }
422 
423   bool isSSrcB16() const {
424     return isSCSrcB16() || isLiteralImm(MVT::i16);
425   }
426 
427   bool isSSrcV2B16() const {
428     llvm_unreachable("cannot happen");
429     return isSSrcB16();
430   }
431 
432   bool isSSrcB64() const {
433     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
434     // See isVSrc64().
435     return isSCSrcB64() || isLiteralImm(MVT::i64);
436   }
437 
438   bool isSSrcF32() const {
439     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
440   }
441 
442   bool isSSrcF64() const {
443     return isSCSrcB64() || isLiteralImm(MVT::f64);
444   }
445 
446   bool isSSrcF16() const {
447     return isSCSrcB16() || isLiteralImm(MVT::f16);
448   }
449 
450   bool isSSrcV2F16() const {
451     llvm_unreachable("cannot happen");
452     return isSSrcF16();
453   }
454 
455   bool isSSrcV2FP32() const {
456     llvm_unreachable("cannot happen");
457     return isSSrcF32();
458   }
459 
460   bool isSCSrcV2FP32() const {
461     llvm_unreachable("cannot happen");
462     return isSCSrcF32();
463   }
464 
465   bool isSSrcV2INT32() const {
466     llvm_unreachable("cannot happen");
467     return isSSrcB32();
468   }
469 
470   bool isSCSrcV2INT32() const {
471     llvm_unreachable("cannot happen");
472     return isSCSrcB32();
473   }
474 
475   bool isSSrcOrLdsB32() const {
476     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
477            isLiteralImm(MVT::i32) || isExpr();
478   }
479 
480   bool isVCSrcB32() const {
481     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
482   }
483 
484   bool isVCSrcB64() const {
485     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
486   }
487 
488   bool isVCSrcB16() const {
489     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
490   }
491 
492   bool isVCSrcV2B16() const {
493     return isVCSrcB16();
494   }
495 
496   bool isVCSrcF32() const {
497     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
498   }
499 
500   bool isVCSrcF64() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
502   }
503 
504   bool isVCSrcF16() const {
505     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
506   }
507 
508   bool isVCSrcV2F16() const {
509     return isVCSrcF16();
510   }
511 
512   bool isVSrcB32() const {
513     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
514   }
515 
516   bool isVSrcB64() const {
517     return isVCSrcF64() || isLiteralImm(MVT::i64);
518   }
519 
520   bool isVSrcB16() const {
521     return isVCSrcB16() || isLiteralImm(MVT::i16);
522   }
523 
524   bool isVSrcV2B16() const {
525     return isVSrcB16() || isLiteralImm(MVT::v2i16);
526   }
527 
528   bool isVCSrcV2FP32() const {
529     return isVCSrcF64();
530   }
531 
532   bool isVSrcV2FP32() const {
533     return isVSrcF64() || isLiteralImm(MVT::v2f32);
534   }
535 
536   bool isVCSrcV2INT32() const {
537     return isVCSrcB64();
538   }
539 
540   bool isVSrcV2INT32() const {
541     return isVSrcB64() || isLiteralImm(MVT::v2i32);
542   }
543 
544   bool isVSrcF32() const {
545     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
546   }
547 
548   bool isVSrcF64() const {
549     return isVCSrcF64() || isLiteralImm(MVT::f64);
550   }
551 
552   bool isVSrcF16() const {
553     return isVCSrcF16() || isLiteralImm(MVT::f16);
554   }
555 
556   bool isVSrcV2F16() const {
557     return isVSrcF16() || isLiteralImm(MVT::v2f16);
558   }
559 
560   bool isVISrcB32() const {
561     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
562   }
563 
564   bool isVISrcB16() const {
565     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
566   }
567 
568   bool isVISrcV2B16() const {
569     return isVISrcB16();
570   }
571 
572   bool isVISrcF32() const {
573     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
574   }
575 
576   bool isVISrcF16() const {
577     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
578   }
579 
580   bool isVISrcV2F16() const {
581     return isVISrcF16() || isVISrcB32();
582   }
583 
584   bool isVISrc_64B64() const {
585     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
586   }
587 
588   bool isVISrc_64F64() const {
589     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
590   }
591 
592   bool isVISrc_64V2FP32() const {
593     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
594   }
595 
596   bool isVISrc_64V2INT32() const {
597     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
598   }
599 
600   bool isVISrc_256B64() const {
601     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
602   }
603 
604   bool isVISrc_256F64() const {
605     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
606   }
607 
608   bool isVISrc_128B16() const {
609     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
610   }
611 
612   bool isVISrc_128V2B16() const {
613     return isVISrc_128B16();
614   }
615 
616   bool isVISrc_128B32() const {
617     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
618   }
619 
620   bool isVISrc_128F32() const {
621     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
622   }
623 
624   bool isVISrc_256V2FP32() const {
625     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
626   }
627 
628   bool isVISrc_256V2INT32() const {
629     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
630   }
631 
632   bool isVISrc_512B32() const {
633     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
634   }
635 
636   bool isVISrc_512B16() const {
637     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
638   }
639 
640   bool isVISrc_512V2B16() const {
641     return isVISrc_512B16();
642   }
643 
644   bool isVISrc_512F32() const {
645     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
646   }
647 
648   bool isVISrc_512F16() const {
649     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
650   }
651 
652   bool isVISrc_512V2F16() const {
653     return isVISrc_512F16() || isVISrc_512B32();
654   }
655 
656   bool isVISrc_1024B32() const {
657     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
658   }
659 
660   bool isVISrc_1024B16() const {
661     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
662   }
663 
664   bool isVISrc_1024V2B16() const {
665     return isVISrc_1024B16();
666   }
667 
668   bool isVISrc_1024F32() const {
669     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
670   }
671 
672   bool isVISrc_1024F16() const {
673     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
674   }
675 
676   bool isVISrc_1024V2F16() const {
677     return isVISrc_1024F16() || isVISrc_1024B32();
678   }
679 
680   bool isAISrcB32() const {
681     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
682   }
683 
684   bool isAISrcB16() const {
685     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
686   }
687 
688   bool isAISrcV2B16() const {
689     return isAISrcB16();
690   }
691 
692   bool isAISrcF32() const {
693     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
694   }
695 
696   bool isAISrcF16() const {
697     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
698   }
699 
700   bool isAISrcV2F16() const {
701     return isAISrcF16() || isAISrcB32();
702   }
703 
704   bool isAISrc_64B64() const {
705     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
706   }
707 
708   bool isAISrc_64F64() const {
709     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
710   }
711 
712   bool isAISrc_128B32() const {
713     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
714   }
715 
716   bool isAISrc_128B16() const {
717     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
718   }
719 
720   bool isAISrc_128V2B16() const {
721     return isAISrc_128B16();
722   }
723 
724   bool isAISrc_128F32() const {
725     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
726   }
727 
728   bool isAISrc_128F16() const {
729     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
730   }
731 
732   bool isAISrc_128V2F16() const {
733     return isAISrc_128F16() || isAISrc_128B32();
734   }
735 
736   bool isVISrc_128F16() const {
737     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
738   }
739 
740   bool isVISrc_128V2F16() const {
741     return isVISrc_128F16() || isVISrc_128B32();
742   }
743 
744   bool isAISrc_256B64() const {
745     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
746   }
747 
748   bool isAISrc_256F64() const {
749     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
750   }
751 
752   bool isAISrc_512B32() const {
753     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
754   }
755 
756   bool isAISrc_512B16() const {
757     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
758   }
759 
760   bool isAISrc_512V2B16() const {
761     return isAISrc_512B16();
762   }
763 
764   bool isAISrc_512F32() const {
765     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
766   }
767 
768   bool isAISrc_512F16() const {
769     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
770   }
771 
772   bool isAISrc_512V2F16() const {
773     return isAISrc_512F16() || isAISrc_512B32();
774   }
775 
776   bool isAISrc_1024B32() const {
777     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
778   }
779 
780   bool isAISrc_1024B16() const {
781     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
782   }
783 
784   bool isAISrc_1024V2B16() const {
785     return isAISrc_1024B16();
786   }
787 
788   bool isAISrc_1024F32() const {
789     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
790   }
791 
792   bool isAISrc_1024F16() const {
793     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
794   }
795 
796   bool isAISrc_1024V2F16() const {
797     return isAISrc_1024F16() || isAISrc_1024B32();
798   }
799 
800   bool isKImmFP32() const {
801     return isLiteralImm(MVT::f32);
802   }
803 
804   bool isKImmFP16() const {
805     return isLiteralImm(MVT::f16);
806   }
807 
808   bool isMem() const override {
809     return false;
810   }
811 
812   bool isExpr() const {
813     return Kind == Expression;
814   }
815 
816   bool isSoppBrTarget() const {
817     return isExpr() || isImm();
818   }
819 
820   bool isSWaitCnt() const;
821   bool isHwreg() const;
822   bool isSendMsg() const;
823   bool isSwizzle() const;
824   bool isSMRDOffset8() const;
825   bool isSMEMOffset() const;
826   bool isSMRDLiteralOffset() const;
827   bool isDPP8() const;
828   bool isDPPCtrl() const;
829   bool isBLGP() const;
830   bool isCBSZ() const;
831   bool isABID() const;
832   bool isGPRIdxMode() const;
833   bool isS16Imm() const;
834   bool isU16Imm() const;
835   bool isEndpgm() const;
836 
837   StringRef getExpressionAsToken() const {
838     assert(isExpr());
839     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
840     return S->getSymbol().getName();
841   }
842 
843   StringRef getToken() const {
844     assert(isToken());
845 
846     if (Kind == Expression)
847       return getExpressionAsToken();
848 
849     return StringRef(Tok.Data, Tok.Length);
850   }
851 
852   int64_t getImm() const {
853     assert(isImm());
854     return Imm.Val;
855   }
856 
857   void setImm(int64_t Val) {
858     assert(isImm());
859     Imm.Val = Val;
860   }
861 
862   ImmTy getImmTy() const {
863     assert(isImm());
864     return Imm.Type;
865   }
866 
867   unsigned getReg() const override {
868     assert(isRegKind());
869     return Reg.RegNo;
870   }
871 
872   SMLoc getStartLoc() const override {
873     return StartLoc;
874   }
875 
876   SMLoc getEndLoc() const override {
877     return EndLoc;
878   }
879 
880   SMRange getLocRange() const {
881     return SMRange(StartLoc, EndLoc);
882   }
883 
884   Modifiers getModifiers() const {
885     assert(isRegKind() || isImmTy(ImmTyNone));
886     return isRegKind() ? Reg.Mods : Imm.Mods;
887   }
888 
889   void setModifiers(Modifiers Mods) {
890     assert(isRegKind() || isImmTy(ImmTyNone));
891     if (isRegKind())
892       Reg.Mods = Mods;
893     else
894       Imm.Mods = Mods;
895   }
896 
897   bool hasModifiers() const {
898     return getModifiers().hasModifiers();
899   }
900 
901   bool hasFPModifiers() const {
902     return getModifiers().hasFPModifiers();
903   }
904 
905   bool hasIntModifiers() const {
906     return getModifiers().hasIntModifiers();
907   }
908 
909   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
910 
911   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
912 
913   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
914 
915   template <unsigned Bitwidth>
916   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
917 
918   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
919     addKImmFPOperands<16>(Inst, N);
920   }
921 
922   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
923     addKImmFPOperands<32>(Inst, N);
924   }
925 
926   void addRegOperands(MCInst &Inst, unsigned N) const;
927 
928   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
929     addRegOperands(Inst, N);
930   }
931 
932   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
933     if (isRegKind())
934       addRegOperands(Inst, N);
935     else if (isExpr())
936       Inst.addOperand(MCOperand::createExpr(Expr));
937     else
938       addImmOperands(Inst, N);
939   }
940 
941   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
942     Modifiers Mods = getModifiers();
943     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
944     if (isRegKind()) {
945       addRegOperands(Inst, N);
946     } else {
947       addImmOperands(Inst, N, false);
948     }
949   }
950 
951   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
952     assert(!hasIntModifiers());
953     addRegOrImmWithInputModsOperands(Inst, N);
954   }
955 
956   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
957     assert(!hasFPModifiers());
958     addRegOrImmWithInputModsOperands(Inst, N);
959   }
960 
961   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
962     Modifiers Mods = getModifiers();
963     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
964     assert(isRegKind());
965     addRegOperands(Inst, N);
966   }
967 
968   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
969     assert(!hasIntModifiers());
970     addRegWithInputModsOperands(Inst, N);
971   }
972 
973   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
974     assert(!hasFPModifiers());
975     addRegWithInputModsOperands(Inst, N);
976   }
977 
978   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
979     if (isImm())
980       addImmOperands(Inst, N);
981     else {
982       assert(isExpr());
983       Inst.addOperand(MCOperand::createExpr(Expr));
984     }
985   }
986 
987   static void printImmTy(raw_ostream& OS, ImmTy Type) {
988     switch (Type) {
989     case ImmTyNone: OS << "None"; break;
990     case ImmTyGDS: OS << "GDS"; break;
991     case ImmTyLDS: OS << "LDS"; break;
992     case ImmTyOffen: OS << "Offen"; break;
993     case ImmTyIdxen: OS << "Idxen"; break;
994     case ImmTyAddr64: OS << "Addr64"; break;
995     case ImmTyOffset: OS << "Offset"; break;
996     case ImmTyInstOffset: OS << "InstOffset"; break;
997     case ImmTyOffset0: OS << "Offset0"; break;
998     case ImmTyOffset1: OS << "Offset1"; break;
999     case ImmTyCPol: OS << "CPol"; break;
1000     case ImmTySWZ: OS << "SWZ"; break;
1001     case ImmTyTFE: OS << "TFE"; break;
1002     case ImmTyD16: OS << "D16"; break;
1003     case ImmTyFORMAT: OS << "FORMAT"; break;
1004     case ImmTyClampSI: OS << "ClampSI"; break;
1005     case ImmTyOModSI: OS << "OModSI"; break;
1006     case ImmTyDPP8: OS << "DPP8"; break;
1007     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1008     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1009     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1010     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1011     case ImmTyDppFi: OS << "FI"; break;
1012     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1013     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1014     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1015     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1016     case ImmTyDMask: OS << "DMask"; break;
1017     case ImmTyDim: OS << "Dim"; break;
1018     case ImmTyUNorm: OS << "UNorm"; break;
1019     case ImmTyDA: OS << "DA"; break;
1020     case ImmTyR128A16: OS << "R128A16"; break;
1021     case ImmTyA16: OS << "A16"; break;
1022     case ImmTyLWE: OS << "LWE"; break;
1023     case ImmTyOff: OS << "Off"; break;
1024     case ImmTyExpTgt: OS << "ExpTgt"; break;
1025     case ImmTyExpCompr: OS << "ExpCompr"; break;
1026     case ImmTyExpVM: OS << "ExpVM"; break;
1027     case ImmTyHwreg: OS << "Hwreg"; break;
1028     case ImmTySendMsg: OS << "SendMsg"; break;
1029     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1030     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1031     case ImmTyAttrChan: OS << "AttrChan"; break;
1032     case ImmTyOpSel: OS << "OpSel"; break;
1033     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1034     case ImmTyNegLo: OS << "NegLo"; break;
1035     case ImmTyNegHi: OS << "NegHi"; break;
1036     case ImmTySwizzle: OS << "Swizzle"; break;
1037     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1038     case ImmTyHigh: OS << "High"; break;
1039     case ImmTyBLGP: OS << "BLGP"; break;
1040     case ImmTyCBSZ: OS << "CBSZ"; break;
1041     case ImmTyABID: OS << "ABID"; break;
1042     case ImmTyEndpgm: OS << "Endpgm"; break;
1043     }
1044   }
1045 
1046   void print(raw_ostream &OS) const override {
1047     switch (Kind) {
1048     case Register:
1049       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1050       break;
1051     case Immediate:
1052       OS << '<' << getImm();
1053       if (getImmTy() != ImmTyNone) {
1054         OS << " type: "; printImmTy(OS, getImmTy());
1055       }
1056       OS << " mods: " << Imm.Mods << '>';
1057       break;
1058     case Token:
1059       OS << '\'' << getToken() << '\'';
1060       break;
1061     case Expression:
1062       OS << "<expr " << *Expr << '>';
1063       break;
1064     }
1065   }
1066 
1067   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1068                                       int64_t Val, SMLoc Loc,
1069                                       ImmTy Type = ImmTyNone,
1070                                       bool IsFPImm = false) {
1071     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1072     Op->Imm.Val = Val;
1073     Op->Imm.IsFPImm = IsFPImm;
1074     Op->Imm.Kind = ImmKindTyNone;
1075     Op->Imm.Type = Type;
1076     Op->Imm.Mods = Modifiers();
1077     Op->StartLoc = Loc;
1078     Op->EndLoc = Loc;
1079     return Op;
1080   }
1081 
1082   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1083                                         StringRef Str, SMLoc Loc,
1084                                         bool HasExplicitEncodingSize = true) {
1085     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1086     Res->Tok.Data = Str.data();
1087     Res->Tok.Length = Str.size();
1088     Res->StartLoc = Loc;
1089     Res->EndLoc = Loc;
1090     return Res;
1091   }
1092 
1093   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1094                                       unsigned RegNo, SMLoc S,
1095                                       SMLoc E) {
1096     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1097     Op->Reg.RegNo = RegNo;
1098     Op->Reg.Mods = Modifiers();
1099     Op->StartLoc = S;
1100     Op->EndLoc = E;
1101     return Op;
1102   }
1103 
1104   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1105                                        const class MCExpr *Expr, SMLoc S) {
1106     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1107     Op->Expr = Expr;
1108     Op->StartLoc = S;
1109     Op->EndLoc = S;
1110     return Op;
1111   }
1112 };
1113 
1114 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1115   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1116   return OS;
1117 }
1118 
1119 //===----------------------------------------------------------------------===//
1120 // AsmParser
1121 //===----------------------------------------------------------------------===//
1122 
1123 // Holds info related to the current kernel, e.g. count of SGPRs used.
1124 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1125 // .amdgpu_hsa_kernel or at EOF.
1126 class KernelScopeInfo {
1127   int SgprIndexUnusedMin = -1;
1128   int VgprIndexUnusedMin = -1;
1129   int AgprIndexUnusedMin = -1;
1130   MCContext *Ctx = nullptr;
1131   MCSubtargetInfo const *MSTI = nullptr;
1132 
1133   void usesSgprAt(int i) {
1134     if (i >= SgprIndexUnusedMin) {
1135       SgprIndexUnusedMin = ++i;
1136       if (Ctx) {
1137         MCSymbol* const Sym =
1138           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1139         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1140       }
1141     }
1142   }
1143 
1144   void usesVgprAt(int i) {
1145     if (i >= VgprIndexUnusedMin) {
1146       VgprIndexUnusedMin = ++i;
1147       if (Ctx) {
1148         MCSymbol* const Sym =
1149           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1150         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1151                                          VgprIndexUnusedMin);
1152         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1153       }
1154     }
1155   }
1156 
1157   void usesAgprAt(int i) {
1158     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1159     if (!hasMAIInsts(*MSTI))
1160       return;
1161 
1162     if (i >= AgprIndexUnusedMin) {
1163       AgprIndexUnusedMin = ++i;
1164       if (Ctx) {
1165         MCSymbol* const Sym =
1166           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1167         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1168 
1169         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1170         MCSymbol* const vSym =
1171           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1172         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1173                                          VgprIndexUnusedMin);
1174         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1175       }
1176     }
1177   }
1178 
1179 public:
1180   KernelScopeInfo() = default;
1181 
1182   void initialize(MCContext &Context) {
1183     Ctx = &Context;
1184     MSTI = Ctx->getSubtargetInfo();
1185 
1186     usesSgprAt(SgprIndexUnusedMin = -1);
1187     usesVgprAt(VgprIndexUnusedMin = -1);
1188     if (hasMAIInsts(*MSTI)) {
1189       usesAgprAt(AgprIndexUnusedMin = -1);
1190     }
1191   }
1192 
1193   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1194                     unsigned RegWidth) {
1195     switch (RegKind) {
1196     case IS_SGPR:
1197       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1198       break;
1199     case IS_AGPR:
1200       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1201       break;
1202     case IS_VGPR:
1203       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1204       break;
1205     default:
1206       break;
1207     }
1208   }
1209 };
1210 
1211 class AMDGPUAsmParser : public MCTargetAsmParser {
1212   MCAsmParser &Parser;
1213 
1214   // Number of extra operands parsed after the first optional operand.
1215   // This may be necessary to skip hardcoded mandatory operands.
1216   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1217 
1218   unsigned ForcedEncodingSize = 0;
1219   bool ForcedDPP = false;
1220   bool ForcedSDWA = false;
1221   KernelScopeInfo KernelScope;
1222   unsigned CPolSeen;
1223 
1224   /// @name Auto-generated Match Functions
1225   /// {
1226 
1227 #define GET_ASSEMBLER_HEADER
1228 #include "AMDGPUGenAsmMatcher.inc"
1229 
1230   /// }
1231 
1232 private:
1233   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1234   bool OutOfRangeError(SMRange Range);
1235   /// Calculate VGPR/SGPR blocks required for given target, reserved
1236   /// registers, and user-specified NextFreeXGPR values.
1237   ///
1238   /// \param Features [in] Target features, used for bug corrections.
1239   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1240   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1241   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1242   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1243   /// descriptor field, if valid.
1244   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1245   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1246   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1247   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1248   /// \param VGPRBlocks [out] Result VGPR block count.
1249   /// \param SGPRBlocks [out] Result SGPR block count.
1250   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1251                           bool FlatScrUsed, bool XNACKUsed,
1252                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1253                           SMRange VGPRRange, unsigned NextFreeSGPR,
1254                           SMRange SGPRRange, unsigned &VGPRBlocks,
1255                           unsigned &SGPRBlocks);
1256   bool ParseDirectiveAMDGCNTarget();
1257   bool ParseDirectiveAMDHSAKernel();
1258   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1259   bool ParseDirectiveHSACodeObjectVersion();
1260   bool ParseDirectiveHSACodeObjectISA();
1261   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1262   bool ParseDirectiveAMDKernelCodeT();
1263   // TODO: Possibly make subtargetHasRegister const.
1264   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1265   bool ParseDirectiveAMDGPUHsaKernel();
1266 
1267   bool ParseDirectiveISAVersion();
1268   bool ParseDirectiveHSAMetadata();
1269   bool ParseDirectivePALMetadataBegin();
1270   bool ParseDirectivePALMetadata();
1271   bool ParseDirectiveAMDGPULDS();
1272 
1273   /// Common code to parse out a block of text (typically YAML) between start and
1274   /// end directives.
1275   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1276                            const char *AssemblerDirectiveEnd,
1277                            std::string &CollectString);
1278 
1279   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1280                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1281   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1282                            unsigned &RegNum, unsigned &RegWidth,
1283                            bool RestoreOnFailure = false);
1284   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1285                            unsigned &RegNum, unsigned &RegWidth,
1286                            SmallVectorImpl<AsmToken> &Tokens);
1287   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1288                            unsigned &RegWidth,
1289                            SmallVectorImpl<AsmToken> &Tokens);
1290   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1291                            unsigned &RegWidth,
1292                            SmallVectorImpl<AsmToken> &Tokens);
1293   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1294                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1295   bool ParseRegRange(unsigned& Num, unsigned& Width);
1296   unsigned getRegularReg(RegisterKind RegKind,
1297                          unsigned RegNum,
1298                          unsigned RegWidth,
1299                          SMLoc Loc);
1300 
1301   bool isRegister();
1302   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1303   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1304   void initializeGprCountSymbol(RegisterKind RegKind);
1305   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1306                              unsigned RegWidth);
1307   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1308                     bool IsAtomic, bool IsLds = false);
1309   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1310                  bool IsGdsHardcoded);
1311 
1312 public:
1313   enum AMDGPUMatchResultTy {
1314     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1315   };
1316   enum OperandMode {
1317     OperandMode_Default,
1318     OperandMode_NSA,
1319   };
1320 
1321   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1322 
1323   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1324                const MCInstrInfo &MII,
1325                const MCTargetOptions &Options)
1326       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1327     MCAsmParserExtension::Initialize(Parser);
1328 
1329     if (getFeatureBits().none()) {
1330       // Set default features.
1331       copySTI().ToggleFeature("southern-islands");
1332     }
1333 
1334     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1335 
1336     {
1337       // TODO: make those pre-defined variables read-only.
1338       // Currently there is none suitable machinery in the core llvm-mc for this.
1339       // MCSymbol::isRedefinable is intended for another purpose, and
1340       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1341       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1342       MCContext &Ctx = getContext();
1343       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1344         MCSymbol *Sym =
1345             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1346         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1347         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1348         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1349         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1350         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1351       } else {
1352         MCSymbol *Sym =
1353             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1354         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1355         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1356         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1357         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1358         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1359       }
1360       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1361         initializeGprCountSymbol(IS_VGPR);
1362         initializeGprCountSymbol(IS_SGPR);
1363       } else
1364         KernelScope.initialize(getContext());
1365     }
1366   }
1367 
1368   bool hasMIMG_R128() const {
1369     return AMDGPU::hasMIMG_R128(getSTI());
1370   }
1371 
1372   bool hasPackedD16() const {
1373     return AMDGPU::hasPackedD16(getSTI());
1374   }
1375 
1376   bool hasGFX10A16() const {
1377     return AMDGPU::hasGFX10A16(getSTI());
1378   }
1379 
1380   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1381 
1382   bool isSI() const {
1383     return AMDGPU::isSI(getSTI());
1384   }
1385 
1386   bool isCI() const {
1387     return AMDGPU::isCI(getSTI());
1388   }
1389 
1390   bool isVI() const {
1391     return AMDGPU::isVI(getSTI());
1392   }
1393 
1394   bool isGFX9() const {
1395     return AMDGPU::isGFX9(getSTI());
1396   }
1397 
1398   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1399   bool isGFX90A() const {
1400     return AMDGPU::isGFX90A(getSTI());
1401   }
1402 
1403   bool isGFX940() const {
1404     return AMDGPU::isGFX940(getSTI());
1405   }
1406 
1407   bool isGFX9Plus() const {
1408     return AMDGPU::isGFX9Plus(getSTI());
1409   }
1410 
1411   bool isGFX10() const {
1412     return AMDGPU::isGFX10(getSTI());
1413   }
1414 
1415   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1416 
1417   bool isGFX10_BEncoding() const {
1418     return AMDGPU::isGFX10_BEncoding(getSTI());
1419   }
1420 
1421   bool hasInv2PiInlineImm() const {
1422     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1423   }
1424 
1425   bool hasFlatOffsets() const {
1426     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1427   }
1428 
1429   bool hasArchitectedFlatScratch() const {
1430     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1431   }
1432 
1433   bool hasSGPR102_SGPR103() const {
1434     return !isVI() && !isGFX9();
1435   }
1436 
1437   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1438 
1439   bool hasIntClamp() const {
1440     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1441   }
1442 
1443   AMDGPUTargetStreamer &getTargetStreamer() {
1444     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1445     return static_cast<AMDGPUTargetStreamer &>(TS);
1446   }
1447 
1448   const MCRegisterInfo *getMRI() const {
1449     // We need this const_cast because for some reason getContext() is not const
1450     // in MCAsmParser.
1451     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1452   }
1453 
1454   const MCInstrInfo *getMII() const {
1455     return &MII;
1456   }
1457 
1458   const FeatureBitset &getFeatureBits() const {
1459     return getSTI().getFeatureBits();
1460   }
1461 
1462   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1463   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1464   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1465 
1466   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1467   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1468   bool isForcedDPP() const { return ForcedDPP; }
1469   bool isForcedSDWA() const { return ForcedSDWA; }
1470   ArrayRef<unsigned> getMatchedVariants() const;
1471   StringRef getMatchedVariantName() const;
1472 
1473   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1474   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1475                      bool RestoreOnFailure);
1476   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1477   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1478                                         SMLoc &EndLoc) override;
1479   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1480   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1481                                       unsigned Kind) override;
1482   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1483                                OperandVector &Operands, MCStreamer &Out,
1484                                uint64_t &ErrorInfo,
1485                                bool MatchingInlineAsm) override;
1486   bool ParseDirective(AsmToken DirectiveID) override;
1487   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1488                                     OperandMode Mode = OperandMode_Default);
1489   StringRef parseMnemonicSuffix(StringRef Name);
1490   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1491                         SMLoc NameLoc, OperandVector &Operands) override;
1492   //bool ProcessInstruction(MCInst &Inst);
1493 
1494   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1495 
1496   OperandMatchResultTy
1497   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1498                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1499                      bool (*ConvertResult)(int64_t &) = nullptr);
1500 
1501   OperandMatchResultTy
1502   parseOperandArrayWithPrefix(const char *Prefix,
1503                               OperandVector &Operands,
1504                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1505                               bool (*ConvertResult)(int64_t&) = nullptr);
1506 
1507   OperandMatchResultTy
1508   parseNamedBit(StringRef Name, OperandVector &Operands,
1509                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1510   OperandMatchResultTy parseCPol(OperandVector &Operands);
1511   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1512                                              StringRef &Value,
1513                                              SMLoc &StringLoc);
1514 
1515   bool isModifier();
1516   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1517   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1518   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1519   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1520   bool parseSP3NegModifier();
1521   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1522   OperandMatchResultTy parseReg(OperandVector &Operands);
1523   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1524   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1525   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1526   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1527   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1528   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1529   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1530   OperandMatchResultTy parseUfmt(int64_t &Format);
1531   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1532   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1533   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1534   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1535   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1536   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1537   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1538 
1539   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1540   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1541   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1542   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1543 
1544   bool parseCnt(int64_t &IntVal);
1545   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1546   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1547 
1548 private:
1549   struct OperandInfoTy {
1550     SMLoc Loc;
1551     int64_t Id;
1552     bool IsSymbolic = false;
1553     bool IsDefined = false;
1554 
1555     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1556   };
1557 
1558   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1559   bool validateSendMsg(const OperandInfoTy &Msg,
1560                        const OperandInfoTy &Op,
1561                        const OperandInfoTy &Stream);
1562 
1563   bool parseHwregBody(OperandInfoTy &HwReg,
1564                       OperandInfoTy &Offset,
1565                       OperandInfoTy &Width);
1566   bool validateHwreg(const OperandInfoTy &HwReg,
1567                      const OperandInfoTy &Offset,
1568                      const OperandInfoTy &Width);
1569 
1570   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1571   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1572 
1573   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1574                       const OperandVector &Operands) const;
1575   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1576   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1577   SMLoc getLitLoc(const OperandVector &Operands) const;
1578   SMLoc getConstLoc(const OperandVector &Operands) const;
1579 
1580   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1581   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1582   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1583   bool validateSOPLiteral(const MCInst &Inst) const;
1584   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1585   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1586   bool validateIntClampSupported(const MCInst &Inst);
1587   bool validateMIMGAtomicDMask(const MCInst &Inst);
1588   bool validateMIMGGatherDMask(const MCInst &Inst);
1589   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1590   bool validateMIMGDataSize(const MCInst &Inst);
1591   bool validateMIMGAddrSize(const MCInst &Inst);
1592   bool validateMIMGD16(const MCInst &Inst);
1593   bool validateMIMGDim(const MCInst &Inst);
1594   bool validateMIMGMSAA(const MCInst &Inst);
1595   bool validateOpSel(const MCInst &Inst);
1596   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1597   bool validateVccOperand(unsigned Reg) const;
1598   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1599   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1600   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1601   bool validateAGPRLdSt(const MCInst &Inst) const;
1602   bool validateVGPRAlign(const MCInst &Inst) const;
1603   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1604   bool validateDivScale(const MCInst &Inst);
1605   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1606                              const SMLoc &IDLoc);
1607   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1608   unsigned getConstantBusLimit(unsigned Opcode) const;
1609   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1610   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1611   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1612 
1613   bool isSupportedMnemo(StringRef Mnemo,
1614                         const FeatureBitset &FBS);
1615   bool isSupportedMnemo(StringRef Mnemo,
1616                         const FeatureBitset &FBS,
1617                         ArrayRef<unsigned> Variants);
1618   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1619 
1620   bool isId(const StringRef Id) const;
1621   bool isId(const AsmToken &Token, const StringRef Id) const;
1622   bool isToken(const AsmToken::TokenKind Kind) const;
1623   bool trySkipId(const StringRef Id);
1624   bool trySkipId(const StringRef Pref, const StringRef Id);
1625   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1626   bool trySkipToken(const AsmToken::TokenKind Kind);
1627   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1628   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1629   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1630 
1631   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1632   AsmToken::TokenKind getTokenKind() const;
1633   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1634   bool parseExpr(OperandVector &Operands);
1635   StringRef getTokenStr() const;
1636   AsmToken peekToken();
1637   AsmToken getToken() const;
1638   SMLoc getLoc() const;
1639   void lex();
1640 
1641 public:
1642   void onBeginOfFile() override;
1643 
1644   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1645   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1646 
1647   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1648   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1649   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1650   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1651   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1652   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1653 
1654   bool parseSwizzleOperand(int64_t &Op,
1655                            const unsigned MinVal,
1656                            const unsigned MaxVal,
1657                            const StringRef ErrMsg,
1658                            SMLoc &Loc);
1659   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1660                             const unsigned MinVal,
1661                             const unsigned MaxVal,
1662                             const StringRef ErrMsg);
1663   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1664   bool parseSwizzleOffset(int64_t &Imm);
1665   bool parseSwizzleMacro(int64_t &Imm);
1666   bool parseSwizzleQuadPerm(int64_t &Imm);
1667   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1668   bool parseSwizzleBroadcast(int64_t &Imm);
1669   bool parseSwizzleSwap(int64_t &Imm);
1670   bool parseSwizzleReverse(int64_t &Imm);
1671 
1672   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1673   int64_t parseGPRIdxMacro();
1674 
1675   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1676   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1677   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1678   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1679 
1680   AMDGPUOperand::Ptr defaultCPol() const;
1681 
1682   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1683   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1684   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1685   AMDGPUOperand::Ptr defaultFlatOffset() const;
1686 
1687   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1688 
1689   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1690                OptionalImmIndexMap &OptionalIdx);
1691   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1692   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1693   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1694   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1695                 OptionalImmIndexMap &OptionalIdx);
1696 
1697   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1698 
1699   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1700                bool IsAtomic = false);
1701   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1702   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1703 
1704   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1705 
1706   bool parseDimId(unsigned &Encoding);
1707   OperandMatchResultTy parseDim(OperandVector &Operands);
1708   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1709   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1710   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1711   int64_t parseDPPCtrlSel(StringRef Ctrl);
1712   int64_t parseDPPCtrlPerm();
1713   AMDGPUOperand::Ptr defaultRowMask() const;
1714   AMDGPUOperand::Ptr defaultBankMask() const;
1715   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1716   AMDGPUOperand::Ptr defaultFI() const;
1717   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1718   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1719 
1720   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1721                                     AMDGPUOperand::ImmTy Type);
1722   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1723   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1724   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1725   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1726   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1727   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1728   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1729                uint64_t BasicInstType,
1730                bool SkipDstVcc = false,
1731                bool SkipSrcVcc = false);
1732 
1733   AMDGPUOperand::Ptr defaultBLGP() const;
1734   AMDGPUOperand::Ptr defaultCBSZ() const;
1735   AMDGPUOperand::Ptr defaultABID() const;
1736 
1737   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1738   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1739 };
1740 
1741 struct OptionalOperand {
1742   const char *Name;
1743   AMDGPUOperand::ImmTy Type;
1744   bool IsBit;
1745   bool (*ConvertResult)(int64_t&);
1746 };
1747 
1748 } // end anonymous namespace
1749 
1750 // May be called with integer type with equivalent bitwidth.
1751 static const fltSemantics *getFltSemantics(unsigned Size) {
1752   switch (Size) {
1753   case 4:
1754     return &APFloat::IEEEsingle();
1755   case 8:
1756     return &APFloat::IEEEdouble();
1757   case 2:
1758     return &APFloat::IEEEhalf();
1759   default:
1760     llvm_unreachable("unsupported fp type");
1761   }
1762 }
1763 
1764 static const fltSemantics *getFltSemantics(MVT VT) {
1765   return getFltSemantics(VT.getSizeInBits() / 8);
1766 }
1767 
1768 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1769   switch (OperandType) {
1770   case AMDGPU::OPERAND_REG_IMM_INT32:
1771   case AMDGPU::OPERAND_REG_IMM_FP32:
1772   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1773   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1774   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1775   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1776   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1777   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1778   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1779   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1780   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1781   case AMDGPU::OPERAND_KIMM32:
1782     return &APFloat::IEEEsingle();
1783   case AMDGPU::OPERAND_REG_IMM_INT64:
1784   case AMDGPU::OPERAND_REG_IMM_FP64:
1785   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1786   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1787   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1788     return &APFloat::IEEEdouble();
1789   case AMDGPU::OPERAND_REG_IMM_INT16:
1790   case AMDGPU::OPERAND_REG_IMM_FP16:
1791   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1792   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1793   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1794   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1795   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1796   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1797   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1798   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1799   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1800   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1801   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1802   case AMDGPU::OPERAND_KIMM16:
1803     return &APFloat::IEEEhalf();
1804   default:
1805     llvm_unreachable("unsupported fp type");
1806   }
1807 }
1808 
1809 //===----------------------------------------------------------------------===//
1810 // Operand
1811 //===----------------------------------------------------------------------===//
1812 
1813 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1814   bool Lost;
1815 
1816   // Convert literal to single precision
1817   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1818                                                APFloat::rmNearestTiesToEven,
1819                                                &Lost);
1820   // We allow precision lost but not overflow or underflow
1821   if (Status != APFloat::opOK &&
1822       Lost &&
1823       ((Status & APFloat::opOverflow)  != 0 ||
1824        (Status & APFloat::opUnderflow) != 0)) {
1825     return false;
1826   }
1827 
1828   return true;
1829 }
1830 
1831 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1832   return isUIntN(Size, Val) || isIntN(Size, Val);
1833 }
1834 
1835 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1836   if (VT.getScalarType() == MVT::i16) {
1837     // FP immediate values are broken.
1838     return isInlinableIntLiteral(Val);
1839   }
1840 
1841   // f16/v2f16 operands work correctly for all values.
1842   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1843 }
1844 
1845 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1846 
1847   // This is a hack to enable named inline values like
1848   // shared_base with both 32-bit and 64-bit operands.
1849   // Note that these values are defined as
1850   // 32-bit operands only.
1851   if (isInlineValue()) {
1852     return true;
1853   }
1854 
1855   if (!isImmTy(ImmTyNone)) {
1856     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1857     return false;
1858   }
1859   // TODO: We should avoid using host float here. It would be better to
1860   // check the float bit values which is what a few other places do.
1861   // We've had bot failures before due to weird NaN support on mips hosts.
1862 
1863   APInt Literal(64, Imm.Val);
1864 
1865   if (Imm.IsFPImm) { // We got fp literal token
1866     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1867       return AMDGPU::isInlinableLiteral64(Imm.Val,
1868                                           AsmParser->hasInv2PiInlineImm());
1869     }
1870 
1871     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1872     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1873       return false;
1874 
1875     if (type.getScalarSizeInBits() == 16) {
1876       return isInlineableLiteralOp16(
1877         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1878         type, AsmParser->hasInv2PiInlineImm());
1879     }
1880 
1881     // Check if single precision literal is inlinable
1882     return AMDGPU::isInlinableLiteral32(
1883       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1884       AsmParser->hasInv2PiInlineImm());
1885   }
1886 
1887   // We got int literal token.
1888   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1889     return AMDGPU::isInlinableLiteral64(Imm.Val,
1890                                         AsmParser->hasInv2PiInlineImm());
1891   }
1892 
1893   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1894     return false;
1895   }
1896 
1897   if (type.getScalarSizeInBits() == 16) {
1898     return isInlineableLiteralOp16(
1899       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1900       type, AsmParser->hasInv2PiInlineImm());
1901   }
1902 
1903   return AMDGPU::isInlinableLiteral32(
1904     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1905     AsmParser->hasInv2PiInlineImm());
1906 }
1907 
1908 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1909   // Check that this immediate can be added as literal
1910   if (!isImmTy(ImmTyNone)) {
1911     return false;
1912   }
1913 
1914   if (!Imm.IsFPImm) {
1915     // We got int literal token.
1916 
1917     if (type == MVT::f64 && hasFPModifiers()) {
1918       // Cannot apply fp modifiers to int literals preserving the same semantics
1919       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1920       // disable these cases.
1921       return false;
1922     }
1923 
1924     unsigned Size = type.getSizeInBits();
1925     if (Size == 64)
1926       Size = 32;
1927 
1928     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1929     // types.
1930     return isSafeTruncation(Imm.Val, Size);
1931   }
1932 
1933   // We got fp literal token
1934   if (type == MVT::f64) { // Expected 64-bit fp operand
1935     // We would set low 64-bits of literal to zeroes but we accept this literals
1936     return true;
1937   }
1938 
1939   if (type == MVT::i64) { // Expected 64-bit int operand
1940     // We don't allow fp literals in 64-bit integer instructions. It is
1941     // unclear how we should encode them.
1942     return false;
1943   }
1944 
1945   // We allow fp literals with f16x2 operands assuming that the specified
1946   // literal goes into the lower half and the upper half is zero. We also
1947   // require that the literal may be losslessly converted to f16.
1948   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1949                      (type == MVT::v2i16)? MVT::i16 :
1950                      (type == MVT::v2f32)? MVT::f32 : type;
1951 
1952   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1953   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1954 }
1955 
1956 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1957   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1958 }
1959 
1960 bool AMDGPUOperand::isVRegWithInputMods() const {
1961   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1962          // GFX90A allows DPP on 64-bit operands.
1963          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1964           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1965 }
1966 
1967 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1968   if (AsmParser->isVI())
1969     return isVReg32();
1970   else if (AsmParser->isGFX9Plus())
1971     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1972   else
1973     return false;
1974 }
1975 
1976 bool AMDGPUOperand::isSDWAFP16Operand() const {
1977   return isSDWAOperand(MVT::f16);
1978 }
1979 
1980 bool AMDGPUOperand::isSDWAFP32Operand() const {
1981   return isSDWAOperand(MVT::f32);
1982 }
1983 
1984 bool AMDGPUOperand::isSDWAInt16Operand() const {
1985   return isSDWAOperand(MVT::i16);
1986 }
1987 
1988 bool AMDGPUOperand::isSDWAInt32Operand() const {
1989   return isSDWAOperand(MVT::i32);
1990 }
1991 
1992 bool AMDGPUOperand::isBoolReg() const {
1993   auto FB = AsmParser->getFeatureBits();
1994   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1995                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1996 }
1997 
1998 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1999 {
2000   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2001   assert(Size == 2 || Size == 4 || Size == 8);
2002 
2003   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2004 
2005   if (Imm.Mods.Abs) {
2006     Val &= ~FpSignMask;
2007   }
2008   if (Imm.Mods.Neg) {
2009     Val ^= FpSignMask;
2010   }
2011 
2012   return Val;
2013 }
2014 
2015 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2016   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2017                              Inst.getNumOperands())) {
2018     addLiteralImmOperand(Inst, Imm.Val,
2019                          ApplyModifiers &
2020                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2021   } else {
2022     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2023     Inst.addOperand(MCOperand::createImm(Imm.Val));
2024     setImmKindNone();
2025   }
2026 }
2027 
2028 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2029   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2030   auto OpNum = Inst.getNumOperands();
2031   // Check that this operand accepts literals
2032   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2033 
2034   if (ApplyModifiers) {
2035     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2036     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2037     Val = applyInputFPModifiers(Val, Size);
2038   }
2039 
2040   APInt Literal(64, Val);
2041   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2042 
2043   if (Imm.IsFPImm) { // We got fp literal token
2044     switch (OpTy) {
2045     case AMDGPU::OPERAND_REG_IMM_INT64:
2046     case AMDGPU::OPERAND_REG_IMM_FP64:
2047     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2048     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2049     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2050       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2051                                        AsmParser->hasInv2PiInlineImm())) {
2052         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2053         setImmKindConst();
2054         return;
2055       }
2056 
2057       // Non-inlineable
2058       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2059         // For fp operands we check if low 32 bits are zeros
2060         if (Literal.getLoBits(32) != 0) {
2061           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2062           "Can't encode literal as exact 64-bit floating-point operand. "
2063           "Low 32-bits will be set to zero");
2064         }
2065 
2066         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2067         setImmKindLiteral();
2068         return;
2069       }
2070 
2071       // We don't allow fp literals in 64-bit integer instructions. It is
2072       // unclear how we should encode them. This case should be checked earlier
2073       // in predicate methods (isLiteralImm())
2074       llvm_unreachable("fp literal in 64-bit integer instruction.");
2075 
2076     case AMDGPU::OPERAND_REG_IMM_INT32:
2077     case AMDGPU::OPERAND_REG_IMM_FP32:
2078     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2079     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2080     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2081     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2082     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2083     case AMDGPU::OPERAND_REG_IMM_INT16:
2084     case AMDGPU::OPERAND_REG_IMM_FP16:
2085     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2086     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2087     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2088     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2089     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2090     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2091     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2092     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2093     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2094     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2095     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2096     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2097     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2098     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2099     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2100     case AMDGPU::OPERAND_KIMM32:
2101     case AMDGPU::OPERAND_KIMM16: {
2102       bool lost;
2103       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2104       // Convert literal to single precision
2105       FPLiteral.convert(*getOpFltSemantics(OpTy),
2106                         APFloat::rmNearestTiesToEven, &lost);
2107       // We allow precision lost but not overflow or underflow. This should be
2108       // checked earlier in isLiteralImm()
2109 
2110       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2111       Inst.addOperand(MCOperand::createImm(ImmVal));
2112       setImmKindLiteral();
2113       return;
2114     }
2115     default:
2116       llvm_unreachable("invalid operand size");
2117     }
2118 
2119     return;
2120   }
2121 
2122   // We got int literal token.
2123   // Only sign extend inline immediates.
2124   switch (OpTy) {
2125   case AMDGPU::OPERAND_REG_IMM_INT32:
2126   case AMDGPU::OPERAND_REG_IMM_FP32:
2127   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2128   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2129   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2130   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2131   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2132   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2133   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2134   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2135   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2136   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2137   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2138     if (isSafeTruncation(Val, 32) &&
2139         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2140                                      AsmParser->hasInv2PiInlineImm())) {
2141       Inst.addOperand(MCOperand::createImm(Val));
2142       setImmKindConst();
2143       return;
2144     }
2145 
2146     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2147     setImmKindLiteral();
2148     return;
2149 
2150   case AMDGPU::OPERAND_REG_IMM_INT64:
2151   case AMDGPU::OPERAND_REG_IMM_FP64:
2152   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2153   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2154   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2155     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2156       Inst.addOperand(MCOperand::createImm(Val));
2157       setImmKindConst();
2158       return;
2159     }
2160 
2161     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2162     setImmKindLiteral();
2163     return;
2164 
2165   case AMDGPU::OPERAND_REG_IMM_INT16:
2166   case AMDGPU::OPERAND_REG_IMM_FP16:
2167   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2168   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2169   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2170   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2171   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2172     if (isSafeTruncation(Val, 16) &&
2173         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2174                                      AsmParser->hasInv2PiInlineImm())) {
2175       Inst.addOperand(MCOperand::createImm(Val));
2176       setImmKindConst();
2177       return;
2178     }
2179 
2180     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2181     setImmKindLiteral();
2182     return;
2183 
2184   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2185   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2186   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2187   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2188     assert(isSafeTruncation(Val, 16));
2189     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2190                                         AsmParser->hasInv2PiInlineImm()));
2191 
2192     Inst.addOperand(MCOperand::createImm(Val));
2193     return;
2194   }
2195   case AMDGPU::OPERAND_KIMM32:
2196     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2197     setImmKindNone();
2198     return;
2199   case AMDGPU::OPERAND_KIMM16:
2200     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2201     setImmKindNone();
2202     return;
2203   default:
2204     llvm_unreachable("invalid operand size");
2205   }
2206 }
2207 
2208 template <unsigned Bitwidth>
2209 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2210   APInt Literal(64, Imm.Val);
2211   setImmKindNone();
2212 
2213   if (!Imm.IsFPImm) {
2214     // We got int literal token.
2215     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2216     return;
2217   }
2218 
2219   bool Lost;
2220   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2221   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2222                     APFloat::rmNearestTiesToEven, &Lost);
2223   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2224 }
2225 
2226 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2227   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2228 }
2229 
2230 static bool isInlineValue(unsigned Reg) {
2231   switch (Reg) {
2232   case AMDGPU::SRC_SHARED_BASE:
2233   case AMDGPU::SRC_SHARED_LIMIT:
2234   case AMDGPU::SRC_PRIVATE_BASE:
2235   case AMDGPU::SRC_PRIVATE_LIMIT:
2236   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2237     return true;
2238   case AMDGPU::SRC_VCCZ:
2239   case AMDGPU::SRC_EXECZ:
2240   case AMDGPU::SRC_SCC:
2241     return true;
2242   case AMDGPU::SGPR_NULL:
2243     return true;
2244   default:
2245     return false;
2246   }
2247 }
2248 
2249 bool AMDGPUOperand::isInlineValue() const {
2250   return isRegKind() && ::isInlineValue(getReg());
2251 }
2252 
2253 //===----------------------------------------------------------------------===//
2254 // AsmParser
2255 //===----------------------------------------------------------------------===//
2256 
2257 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2258   if (Is == IS_VGPR) {
2259     switch (RegWidth) {
2260       default: return -1;
2261       case 32:
2262         return AMDGPU::VGPR_32RegClassID;
2263       case 64:
2264         return AMDGPU::VReg_64RegClassID;
2265       case 96:
2266         return AMDGPU::VReg_96RegClassID;
2267       case 128:
2268         return AMDGPU::VReg_128RegClassID;
2269       case 160:
2270         return AMDGPU::VReg_160RegClassID;
2271       case 192:
2272         return AMDGPU::VReg_192RegClassID;
2273       case 224:
2274         return AMDGPU::VReg_224RegClassID;
2275       case 256:
2276         return AMDGPU::VReg_256RegClassID;
2277       case 512:
2278         return AMDGPU::VReg_512RegClassID;
2279       case 1024:
2280         return AMDGPU::VReg_1024RegClassID;
2281     }
2282   } else if (Is == IS_TTMP) {
2283     switch (RegWidth) {
2284       default: return -1;
2285       case 32:
2286         return AMDGPU::TTMP_32RegClassID;
2287       case 64:
2288         return AMDGPU::TTMP_64RegClassID;
2289       case 128:
2290         return AMDGPU::TTMP_128RegClassID;
2291       case 256:
2292         return AMDGPU::TTMP_256RegClassID;
2293       case 512:
2294         return AMDGPU::TTMP_512RegClassID;
2295     }
2296   } else if (Is == IS_SGPR) {
2297     switch (RegWidth) {
2298       default: return -1;
2299       case 32:
2300         return AMDGPU::SGPR_32RegClassID;
2301       case 64:
2302         return AMDGPU::SGPR_64RegClassID;
2303       case 96:
2304         return AMDGPU::SGPR_96RegClassID;
2305       case 128:
2306         return AMDGPU::SGPR_128RegClassID;
2307       case 160:
2308         return AMDGPU::SGPR_160RegClassID;
2309       case 192:
2310         return AMDGPU::SGPR_192RegClassID;
2311       case 224:
2312         return AMDGPU::SGPR_224RegClassID;
2313       case 256:
2314         return AMDGPU::SGPR_256RegClassID;
2315       case 512:
2316         return AMDGPU::SGPR_512RegClassID;
2317     }
2318   } else if (Is == IS_AGPR) {
2319     switch (RegWidth) {
2320       default: return -1;
2321       case 32:
2322         return AMDGPU::AGPR_32RegClassID;
2323       case 64:
2324         return AMDGPU::AReg_64RegClassID;
2325       case 96:
2326         return AMDGPU::AReg_96RegClassID;
2327       case 128:
2328         return AMDGPU::AReg_128RegClassID;
2329       case 160:
2330         return AMDGPU::AReg_160RegClassID;
2331       case 192:
2332         return AMDGPU::AReg_192RegClassID;
2333       case 224:
2334         return AMDGPU::AReg_224RegClassID;
2335       case 256:
2336         return AMDGPU::AReg_256RegClassID;
2337       case 512:
2338         return AMDGPU::AReg_512RegClassID;
2339       case 1024:
2340         return AMDGPU::AReg_1024RegClassID;
2341     }
2342   }
2343   return -1;
2344 }
2345 
2346 static unsigned getSpecialRegForName(StringRef RegName) {
2347   return StringSwitch<unsigned>(RegName)
2348     .Case("exec", AMDGPU::EXEC)
2349     .Case("vcc", AMDGPU::VCC)
2350     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2351     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2352     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2353     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2354     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2355     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2356     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2357     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2358     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2359     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2360     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2361     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2362     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2363     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2364     .Case("m0", AMDGPU::M0)
2365     .Case("vccz", AMDGPU::SRC_VCCZ)
2366     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2367     .Case("execz", AMDGPU::SRC_EXECZ)
2368     .Case("src_execz", AMDGPU::SRC_EXECZ)
2369     .Case("scc", AMDGPU::SRC_SCC)
2370     .Case("src_scc", AMDGPU::SRC_SCC)
2371     .Case("tba", AMDGPU::TBA)
2372     .Case("tma", AMDGPU::TMA)
2373     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2374     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2375     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2376     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2377     .Case("vcc_lo", AMDGPU::VCC_LO)
2378     .Case("vcc_hi", AMDGPU::VCC_HI)
2379     .Case("exec_lo", AMDGPU::EXEC_LO)
2380     .Case("exec_hi", AMDGPU::EXEC_HI)
2381     .Case("tma_lo", AMDGPU::TMA_LO)
2382     .Case("tma_hi", AMDGPU::TMA_HI)
2383     .Case("tba_lo", AMDGPU::TBA_LO)
2384     .Case("tba_hi", AMDGPU::TBA_HI)
2385     .Case("pc", AMDGPU::PC_REG)
2386     .Case("null", AMDGPU::SGPR_NULL)
2387     .Default(AMDGPU::NoRegister);
2388 }
2389 
2390 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2391                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2392   auto R = parseRegister();
2393   if (!R) return true;
2394   assert(R->isReg());
2395   RegNo = R->getReg();
2396   StartLoc = R->getStartLoc();
2397   EndLoc = R->getEndLoc();
2398   return false;
2399 }
2400 
2401 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2402                                     SMLoc &EndLoc) {
2403   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2404 }
2405 
2406 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2407                                                        SMLoc &StartLoc,
2408                                                        SMLoc &EndLoc) {
2409   bool Result =
2410       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2411   bool PendingErrors = getParser().hasPendingError();
2412   getParser().clearPendingErrors();
2413   if (PendingErrors)
2414     return MatchOperand_ParseFail;
2415   if (Result)
2416     return MatchOperand_NoMatch;
2417   return MatchOperand_Success;
2418 }
2419 
2420 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2421                                             RegisterKind RegKind, unsigned Reg1,
2422                                             SMLoc Loc) {
2423   switch (RegKind) {
2424   case IS_SPECIAL:
2425     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2426       Reg = AMDGPU::EXEC;
2427       RegWidth = 64;
2428       return true;
2429     }
2430     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2431       Reg = AMDGPU::FLAT_SCR;
2432       RegWidth = 64;
2433       return true;
2434     }
2435     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2436       Reg = AMDGPU::XNACK_MASK;
2437       RegWidth = 64;
2438       return true;
2439     }
2440     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2441       Reg = AMDGPU::VCC;
2442       RegWidth = 64;
2443       return true;
2444     }
2445     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2446       Reg = AMDGPU::TBA;
2447       RegWidth = 64;
2448       return true;
2449     }
2450     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2451       Reg = AMDGPU::TMA;
2452       RegWidth = 64;
2453       return true;
2454     }
2455     Error(Loc, "register does not fit in the list");
2456     return false;
2457   case IS_VGPR:
2458   case IS_SGPR:
2459   case IS_AGPR:
2460   case IS_TTMP:
2461     if (Reg1 != Reg + RegWidth / 32) {
2462       Error(Loc, "registers in a list must have consecutive indices");
2463       return false;
2464     }
2465     RegWidth += 32;
2466     return true;
2467   default:
2468     llvm_unreachable("unexpected register kind");
2469   }
2470 }
2471 
2472 struct RegInfo {
2473   StringLiteral Name;
2474   RegisterKind Kind;
2475 };
2476 
2477 static constexpr RegInfo RegularRegisters[] = {
2478   {{"v"},    IS_VGPR},
2479   {{"s"},    IS_SGPR},
2480   {{"ttmp"}, IS_TTMP},
2481   {{"acc"},  IS_AGPR},
2482   {{"a"},    IS_AGPR},
2483 };
2484 
2485 static bool isRegularReg(RegisterKind Kind) {
2486   return Kind == IS_VGPR ||
2487          Kind == IS_SGPR ||
2488          Kind == IS_TTMP ||
2489          Kind == IS_AGPR;
2490 }
2491 
2492 static const RegInfo* getRegularRegInfo(StringRef Str) {
2493   for (const RegInfo &Reg : RegularRegisters)
2494     if (Str.startswith(Reg.Name))
2495       return &Reg;
2496   return nullptr;
2497 }
2498 
2499 static bool getRegNum(StringRef Str, unsigned& Num) {
2500   return !Str.getAsInteger(10, Num);
2501 }
2502 
2503 bool
2504 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2505                             const AsmToken &NextToken) const {
2506 
2507   // A list of consecutive registers: [s0,s1,s2,s3]
2508   if (Token.is(AsmToken::LBrac))
2509     return true;
2510 
2511   if (!Token.is(AsmToken::Identifier))
2512     return false;
2513 
2514   // A single register like s0 or a range of registers like s[0:1]
2515 
2516   StringRef Str = Token.getString();
2517   const RegInfo *Reg = getRegularRegInfo(Str);
2518   if (Reg) {
2519     StringRef RegName = Reg->Name;
2520     StringRef RegSuffix = Str.substr(RegName.size());
2521     if (!RegSuffix.empty()) {
2522       unsigned Num;
2523       // A single register with an index: rXX
2524       if (getRegNum(RegSuffix, Num))
2525         return true;
2526     } else {
2527       // A range of registers: r[XX:YY].
2528       if (NextToken.is(AsmToken::LBrac))
2529         return true;
2530     }
2531   }
2532 
2533   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2534 }
2535 
2536 bool
2537 AMDGPUAsmParser::isRegister()
2538 {
2539   return isRegister(getToken(), peekToken());
2540 }
2541 
2542 unsigned
2543 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2544                                unsigned RegNum,
2545                                unsigned RegWidth,
2546                                SMLoc Loc) {
2547 
2548   assert(isRegularReg(RegKind));
2549 
2550   unsigned AlignSize = 1;
2551   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2552     // SGPR and TTMP registers must be aligned.
2553     // Max required alignment is 4 dwords.
2554     AlignSize = std::min(RegWidth / 32, 4u);
2555   }
2556 
2557   if (RegNum % AlignSize != 0) {
2558     Error(Loc, "invalid register alignment");
2559     return AMDGPU::NoRegister;
2560   }
2561 
2562   unsigned RegIdx = RegNum / AlignSize;
2563   int RCID = getRegClass(RegKind, RegWidth);
2564   if (RCID == -1) {
2565     Error(Loc, "invalid or unsupported register size");
2566     return AMDGPU::NoRegister;
2567   }
2568 
2569   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2570   const MCRegisterClass RC = TRI->getRegClass(RCID);
2571   if (RegIdx >= RC.getNumRegs()) {
2572     Error(Loc, "register index is out of range");
2573     return AMDGPU::NoRegister;
2574   }
2575 
2576   return RC.getRegister(RegIdx);
2577 }
2578 
2579 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2580   int64_t RegLo, RegHi;
2581   if (!skipToken(AsmToken::LBrac, "missing register index"))
2582     return false;
2583 
2584   SMLoc FirstIdxLoc = getLoc();
2585   SMLoc SecondIdxLoc;
2586 
2587   if (!parseExpr(RegLo))
2588     return false;
2589 
2590   if (trySkipToken(AsmToken::Colon)) {
2591     SecondIdxLoc = getLoc();
2592     if (!parseExpr(RegHi))
2593       return false;
2594   } else {
2595     RegHi = RegLo;
2596   }
2597 
2598   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2599     return false;
2600 
2601   if (!isUInt<32>(RegLo)) {
2602     Error(FirstIdxLoc, "invalid register index");
2603     return false;
2604   }
2605 
2606   if (!isUInt<32>(RegHi)) {
2607     Error(SecondIdxLoc, "invalid register index");
2608     return false;
2609   }
2610 
2611   if (RegLo > RegHi) {
2612     Error(FirstIdxLoc, "first register index should not exceed second index");
2613     return false;
2614   }
2615 
2616   Num = static_cast<unsigned>(RegLo);
2617   RegWidth = 32 * ((RegHi - RegLo) + 1);
2618   return true;
2619 }
2620 
2621 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2622                                           unsigned &RegNum, unsigned &RegWidth,
2623                                           SmallVectorImpl<AsmToken> &Tokens) {
2624   assert(isToken(AsmToken::Identifier));
2625   unsigned Reg = getSpecialRegForName(getTokenStr());
2626   if (Reg) {
2627     RegNum = 0;
2628     RegWidth = 32;
2629     RegKind = IS_SPECIAL;
2630     Tokens.push_back(getToken());
2631     lex(); // skip register name
2632   }
2633   return Reg;
2634 }
2635 
2636 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2637                                           unsigned &RegNum, unsigned &RegWidth,
2638                                           SmallVectorImpl<AsmToken> &Tokens) {
2639   assert(isToken(AsmToken::Identifier));
2640   StringRef RegName = getTokenStr();
2641   auto Loc = getLoc();
2642 
2643   const RegInfo *RI = getRegularRegInfo(RegName);
2644   if (!RI) {
2645     Error(Loc, "invalid register name");
2646     return AMDGPU::NoRegister;
2647   }
2648 
2649   Tokens.push_back(getToken());
2650   lex(); // skip register name
2651 
2652   RegKind = RI->Kind;
2653   StringRef RegSuffix = RegName.substr(RI->Name.size());
2654   if (!RegSuffix.empty()) {
2655     // Single 32-bit register: vXX.
2656     if (!getRegNum(RegSuffix, RegNum)) {
2657       Error(Loc, "invalid register index");
2658       return AMDGPU::NoRegister;
2659     }
2660     RegWidth = 32;
2661   } else {
2662     // Range of registers: v[XX:YY]. ":YY" is optional.
2663     if (!ParseRegRange(RegNum, RegWidth))
2664       return AMDGPU::NoRegister;
2665   }
2666 
2667   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2668 }
2669 
2670 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2671                                        unsigned &RegWidth,
2672                                        SmallVectorImpl<AsmToken> &Tokens) {
2673   unsigned Reg = AMDGPU::NoRegister;
2674   auto ListLoc = getLoc();
2675 
2676   if (!skipToken(AsmToken::LBrac,
2677                  "expected a register or a list of registers")) {
2678     return AMDGPU::NoRegister;
2679   }
2680 
2681   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2682 
2683   auto Loc = getLoc();
2684   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2685     return AMDGPU::NoRegister;
2686   if (RegWidth != 32) {
2687     Error(Loc, "expected a single 32-bit register");
2688     return AMDGPU::NoRegister;
2689   }
2690 
2691   for (; trySkipToken(AsmToken::Comma); ) {
2692     RegisterKind NextRegKind;
2693     unsigned NextReg, NextRegNum, NextRegWidth;
2694     Loc = getLoc();
2695 
2696     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2697                              NextRegNum, NextRegWidth,
2698                              Tokens)) {
2699       return AMDGPU::NoRegister;
2700     }
2701     if (NextRegWidth != 32) {
2702       Error(Loc, "expected a single 32-bit register");
2703       return AMDGPU::NoRegister;
2704     }
2705     if (NextRegKind != RegKind) {
2706       Error(Loc, "registers in a list must be of the same kind");
2707       return AMDGPU::NoRegister;
2708     }
2709     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2710       return AMDGPU::NoRegister;
2711   }
2712 
2713   if (!skipToken(AsmToken::RBrac,
2714                  "expected a comma or a closing square bracket")) {
2715     return AMDGPU::NoRegister;
2716   }
2717 
2718   if (isRegularReg(RegKind))
2719     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2720 
2721   return Reg;
2722 }
2723 
2724 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2725                                           unsigned &RegNum, unsigned &RegWidth,
2726                                           SmallVectorImpl<AsmToken> &Tokens) {
2727   auto Loc = getLoc();
2728   Reg = AMDGPU::NoRegister;
2729 
2730   if (isToken(AsmToken::Identifier)) {
2731     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2732     if (Reg == AMDGPU::NoRegister)
2733       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2734   } else {
2735     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2736   }
2737 
2738   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2739   if (Reg == AMDGPU::NoRegister) {
2740     assert(Parser.hasPendingError());
2741     return false;
2742   }
2743 
2744   if (!subtargetHasRegister(*TRI, Reg)) {
2745     if (Reg == AMDGPU::SGPR_NULL) {
2746       Error(Loc, "'null' operand is not supported on this GPU");
2747     } else {
2748       Error(Loc, "register not available on this GPU");
2749     }
2750     return false;
2751   }
2752 
2753   return true;
2754 }
2755 
2756 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2757                                           unsigned &RegNum, unsigned &RegWidth,
2758                                           bool RestoreOnFailure /*=false*/) {
2759   Reg = AMDGPU::NoRegister;
2760 
2761   SmallVector<AsmToken, 1> Tokens;
2762   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2763     if (RestoreOnFailure) {
2764       while (!Tokens.empty()) {
2765         getLexer().UnLex(Tokens.pop_back_val());
2766       }
2767     }
2768     return true;
2769   }
2770   return false;
2771 }
2772 
2773 Optional<StringRef>
2774 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2775   switch (RegKind) {
2776   case IS_VGPR:
2777     return StringRef(".amdgcn.next_free_vgpr");
2778   case IS_SGPR:
2779     return StringRef(".amdgcn.next_free_sgpr");
2780   default:
2781     return None;
2782   }
2783 }
2784 
2785 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2786   auto SymbolName = getGprCountSymbolName(RegKind);
2787   assert(SymbolName && "initializing invalid register kind");
2788   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2789   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2790 }
2791 
2792 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2793                                             unsigned DwordRegIndex,
2794                                             unsigned RegWidth) {
2795   // Symbols are only defined for GCN targets
2796   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2797     return true;
2798 
2799   auto SymbolName = getGprCountSymbolName(RegKind);
2800   if (!SymbolName)
2801     return true;
2802   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2803 
2804   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2805   int64_t OldCount;
2806 
2807   if (!Sym->isVariable())
2808     return !Error(getLoc(),
2809                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2810   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2811     return !Error(
2812         getLoc(),
2813         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2814 
2815   if (OldCount <= NewMax)
2816     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2817 
2818   return true;
2819 }
2820 
2821 std::unique_ptr<AMDGPUOperand>
2822 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2823   const auto &Tok = getToken();
2824   SMLoc StartLoc = Tok.getLoc();
2825   SMLoc EndLoc = Tok.getEndLoc();
2826   RegisterKind RegKind;
2827   unsigned Reg, RegNum, RegWidth;
2828 
2829   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2830     return nullptr;
2831   }
2832   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2833     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2834       return nullptr;
2835   } else
2836     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2837   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2838 }
2839 
2840 OperandMatchResultTy
2841 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2842   // TODO: add syntactic sugar for 1/(2*PI)
2843 
2844   assert(!isRegister());
2845   assert(!isModifier());
2846 
2847   const auto& Tok = getToken();
2848   const auto& NextTok = peekToken();
2849   bool IsReal = Tok.is(AsmToken::Real);
2850   SMLoc S = getLoc();
2851   bool Negate = false;
2852 
2853   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2854     lex();
2855     IsReal = true;
2856     Negate = true;
2857   }
2858 
2859   if (IsReal) {
2860     // Floating-point expressions are not supported.
2861     // Can only allow floating-point literals with an
2862     // optional sign.
2863 
2864     StringRef Num = getTokenStr();
2865     lex();
2866 
2867     APFloat RealVal(APFloat::IEEEdouble());
2868     auto roundMode = APFloat::rmNearestTiesToEven;
2869     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2870       return MatchOperand_ParseFail;
2871     }
2872     if (Negate)
2873       RealVal.changeSign();
2874 
2875     Operands.push_back(
2876       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2877                                AMDGPUOperand::ImmTyNone, true));
2878 
2879     return MatchOperand_Success;
2880 
2881   } else {
2882     int64_t IntVal;
2883     const MCExpr *Expr;
2884     SMLoc S = getLoc();
2885 
2886     if (HasSP3AbsModifier) {
2887       // This is a workaround for handling expressions
2888       // as arguments of SP3 'abs' modifier, for example:
2889       //     |1.0|
2890       //     |-1|
2891       //     |1+x|
2892       // This syntax is not compatible with syntax of standard
2893       // MC expressions (due to the trailing '|').
2894       SMLoc EndLoc;
2895       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2896         return MatchOperand_ParseFail;
2897     } else {
2898       if (Parser.parseExpression(Expr))
2899         return MatchOperand_ParseFail;
2900     }
2901 
2902     if (Expr->evaluateAsAbsolute(IntVal)) {
2903       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2904     } else {
2905       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2906     }
2907 
2908     return MatchOperand_Success;
2909   }
2910 
2911   return MatchOperand_NoMatch;
2912 }
2913 
2914 OperandMatchResultTy
2915 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2916   if (!isRegister())
2917     return MatchOperand_NoMatch;
2918 
2919   if (auto R = parseRegister()) {
2920     assert(R->isReg());
2921     Operands.push_back(std::move(R));
2922     return MatchOperand_Success;
2923   }
2924   return MatchOperand_ParseFail;
2925 }
2926 
2927 OperandMatchResultTy
2928 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2929   auto res = parseReg(Operands);
2930   if (res != MatchOperand_NoMatch) {
2931     return res;
2932   } else if (isModifier()) {
2933     return MatchOperand_NoMatch;
2934   } else {
2935     return parseImm(Operands, HasSP3AbsMod);
2936   }
2937 }
2938 
2939 bool
2940 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2941   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2942     const auto &str = Token.getString();
2943     return str == "abs" || str == "neg" || str == "sext";
2944   }
2945   return false;
2946 }
2947 
2948 bool
2949 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2950   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2951 }
2952 
2953 bool
2954 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2955   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2956 }
2957 
2958 bool
2959 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2960   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2961 }
2962 
2963 // Check if this is an operand modifier or an opcode modifier
2964 // which may look like an expression but it is not. We should
2965 // avoid parsing these modifiers as expressions. Currently
2966 // recognized sequences are:
2967 //   |...|
2968 //   abs(...)
2969 //   neg(...)
2970 //   sext(...)
2971 //   -reg
2972 //   -|...|
2973 //   -abs(...)
2974 //   name:...
2975 // Note that simple opcode modifiers like 'gds' may be parsed as
2976 // expressions; this is a special case. See getExpressionAsToken.
2977 //
2978 bool
2979 AMDGPUAsmParser::isModifier() {
2980 
2981   AsmToken Tok = getToken();
2982   AsmToken NextToken[2];
2983   peekTokens(NextToken);
2984 
2985   return isOperandModifier(Tok, NextToken[0]) ||
2986          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2987          isOpcodeModifierWithVal(Tok, NextToken[0]);
2988 }
2989 
2990 // Check if the current token is an SP3 'neg' modifier.
2991 // Currently this modifier is allowed in the following context:
2992 //
2993 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2994 // 2. Before an 'abs' modifier: -abs(...)
2995 // 3. Before an SP3 'abs' modifier: -|...|
2996 //
2997 // In all other cases "-" is handled as a part
2998 // of an expression that follows the sign.
2999 //
3000 // Note: When "-" is followed by an integer literal,
3001 // this is interpreted as integer negation rather
3002 // than a floating-point NEG modifier applied to N.
3003 // Beside being contr-intuitive, such use of floating-point
3004 // NEG modifier would have resulted in different meaning
3005 // of integer literals used with VOP1/2/C and VOP3,
3006 // for example:
3007 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3008 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3009 // Negative fp literals with preceding "-" are
3010 // handled likewise for uniformity
3011 //
3012 bool
3013 AMDGPUAsmParser::parseSP3NegModifier() {
3014 
3015   AsmToken NextToken[2];
3016   peekTokens(NextToken);
3017 
3018   if (isToken(AsmToken::Minus) &&
3019       (isRegister(NextToken[0], NextToken[1]) ||
3020        NextToken[0].is(AsmToken::Pipe) ||
3021        isId(NextToken[0], "abs"))) {
3022     lex();
3023     return true;
3024   }
3025 
3026   return false;
3027 }
3028 
3029 OperandMatchResultTy
3030 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3031                                               bool AllowImm) {
3032   bool Neg, SP3Neg;
3033   bool Abs, SP3Abs;
3034   SMLoc Loc;
3035 
3036   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3037   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3038     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3039     return MatchOperand_ParseFail;
3040   }
3041 
3042   SP3Neg = parseSP3NegModifier();
3043 
3044   Loc = getLoc();
3045   Neg = trySkipId("neg");
3046   if (Neg && SP3Neg) {
3047     Error(Loc, "expected register or immediate");
3048     return MatchOperand_ParseFail;
3049   }
3050   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3051     return MatchOperand_ParseFail;
3052 
3053   Abs = trySkipId("abs");
3054   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3055     return MatchOperand_ParseFail;
3056 
3057   Loc = getLoc();
3058   SP3Abs = trySkipToken(AsmToken::Pipe);
3059   if (Abs && SP3Abs) {
3060     Error(Loc, "expected register or immediate");
3061     return MatchOperand_ParseFail;
3062   }
3063 
3064   OperandMatchResultTy Res;
3065   if (AllowImm) {
3066     Res = parseRegOrImm(Operands, SP3Abs);
3067   } else {
3068     Res = parseReg(Operands);
3069   }
3070   if (Res != MatchOperand_Success) {
3071     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3072   }
3073 
3074   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3075     return MatchOperand_ParseFail;
3076   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3077     return MatchOperand_ParseFail;
3078   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3079     return MatchOperand_ParseFail;
3080 
3081   AMDGPUOperand::Modifiers Mods;
3082   Mods.Abs = Abs || SP3Abs;
3083   Mods.Neg = Neg || SP3Neg;
3084 
3085   if (Mods.hasFPModifiers()) {
3086     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3087     if (Op.isExpr()) {
3088       Error(Op.getStartLoc(), "expected an absolute expression");
3089       return MatchOperand_ParseFail;
3090     }
3091     Op.setModifiers(Mods);
3092   }
3093   return MatchOperand_Success;
3094 }
3095 
3096 OperandMatchResultTy
3097 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3098                                                bool AllowImm) {
3099   bool Sext = trySkipId("sext");
3100   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3101     return MatchOperand_ParseFail;
3102 
3103   OperandMatchResultTy Res;
3104   if (AllowImm) {
3105     Res = parseRegOrImm(Operands);
3106   } else {
3107     Res = parseReg(Operands);
3108   }
3109   if (Res != MatchOperand_Success) {
3110     return Sext? MatchOperand_ParseFail : Res;
3111   }
3112 
3113   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3114     return MatchOperand_ParseFail;
3115 
3116   AMDGPUOperand::Modifiers Mods;
3117   Mods.Sext = Sext;
3118 
3119   if (Mods.hasIntModifiers()) {
3120     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3121     if (Op.isExpr()) {
3122       Error(Op.getStartLoc(), "expected an absolute expression");
3123       return MatchOperand_ParseFail;
3124     }
3125     Op.setModifiers(Mods);
3126   }
3127 
3128   return MatchOperand_Success;
3129 }
3130 
3131 OperandMatchResultTy
3132 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3133   return parseRegOrImmWithFPInputMods(Operands, false);
3134 }
3135 
3136 OperandMatchResultTy
3137 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3138   return parseRegOrImmWithIntInputMods(Operands, false);
3139 }
3140 
3141 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3142   auto Loc = getLoc();
3143   if (trySkipId("off")) {
3144     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3145                                                 AMDGPUOperand::ImmTyOff, false));
3146     return MatchOperand_Success;
3147   }
3148 
3149   if (!isRegister())
3150     return MatchOperand_NoMatch;
3151 
3152   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3153   if (Reg) {
3154     Operands.push_back(std::move(Reg));
3155     return MatchOperand_Success;
3156   }
3157 
3158   return MatchOperand_ParseFail;
3159 
3160 }
3161 
3162 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3163   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3164 
3165   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3166       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3167       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3168       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3169     return Match_InvalidOperand;
3170 
3171   if ((TSFlags & SIInstrFlags::VOP3) &&
3172       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3173       getForcedEncodingSize() != 64)
3174     return Match_PreferE32;
3175 
3176   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3177       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3178     // v_mac_f32/16 allow only dst_sel == DWORD;
3179     auto OpNum =
3180         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3181     const auto &Op = Inst.getOperand(OpNum);
3182     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3183       return Match_InvalidOperand;
3184     }
3185   }
3186 
3187   return Match_Success;
3188 }
3189 
3190 static ArrayRef<unsigned> getAllVariants() {
3191   static const unsigned Variants[] = {
3192     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3193     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3194   };
3195 
3196   return makeArrayRef(Variants);
3197 }
3198 
3199 // What asm variants we should check
3200 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3201   if (getForcedEncodingSize() == 32) {
3202     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3203     return makeArrayRef(Variants);
3204   }
3205 
3206   if (isForcedVOP3()) {
3207     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3208     return makeArrayRef(Variants);
3209   }
3210 
3211   if (isForcedSDWA()) {
3212     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3213                                         AMDGPUAsmVariants::SDWA9};
3214     return makeArrayRef(Variants);
3215   }
3216 
3217   if (isForcedDPP()) {
3218     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3219     return makeArrayRef(Variants);
3220   }
3221 
3222   return getAllVariants();
3223 }
3224 
3225 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3226   if (getForcedEncodingSize() == 32)
3227     return "e32";
3228 
3229   if (isForcedVOP3())
3230     return "e64";
3231 
3232   if (isForcedSDWA())
3233     return "sdwa";
3234 
3235   if (isForcedDPP())
3236     return "dpp";
3237 
3238   return "";
3239 }
3240 
3241 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3242   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3243   const unsigned Num = Desc.getNumImplicitUses();
3244   for (unsigned i = 0; i < Num; ++i) {
3245     unsigned Reg = Desc.ImplicitUses[i];
3246     switch (Reg) {
3247     case AMDGPU::FLAT_SCR:
3248     case AMDGPU::VCC:
3249     case AMDGPU::VCC_LO:
3250     case AMDGPU::VCC_HI:
3251     case AMDGPU::M0:
3252       return Reg;
3253     default:
3254       break;
3255     }
3256   }
3257   return AMDGPU::NoRegister;
3258 }
3259 
3260 // NB: This code is correct only when used to check constant
3261 // bus limitations because GFX7 support no f16 inline constants.
3262 // Note that there are no cases when a GFX7 opcode violates
3263 // constant bus limitations due to the use of an f16 constant.
3264 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3265                                        unsigned OpIdx) const {
3266   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3267 
3268   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3269     return false;
3270   }
3271 
3272   const MCOperand &MO = Inst.getOperand(OpIdx);
3273 
3274   int64_t Val = MO.getImm();
3275   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3276 
3277   switch (OpSize) { // expected operand size
3278   case 8:
3279     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3280   case 4:
3281     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3282   case 2: {
3283     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3284     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3285         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3286         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3287       return AMDGPU::isInlinableIntLiteral(Val);
3288 
3289     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3290         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3291         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3292       return AMDGPU::isInlinableIntLiteralV216(Val);
3293 
3294     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3295         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3296         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3297       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3298 
3299     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3300   }
3301   default:
3302     llvm_unreachable("invalid operand size");
3303   }
3304 }
3305 
3306 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3307   if (!isGFX10Plus())
3308     return 1;
3309 
3310   switch (Opcode) {
3311   // 64-bit shift instructions can use only one scalar value input
3312   case AMDGPU::V_LSHLREV_B64_e64:
3313   case AMDGPU::V_LSHLREV_B64_gfx10:
3314   case AMDGPU::V_LSHRREV_B64_e64:
3315   case AMDGPU::V_LSHRREV_B64_gfx10:
3316   case AMDGPU::V_ASHRREV_I64_e64:
3317   case AMDGPU::V_ASHRREV_I64_gfx10:
3318   case AMDGPU::V_LSHL_B64_e64:
3319   case AMDGPU::V_LSHR_B64_e64:
3320   case AMDGPU::V_ASHR_I64_e64:
3321     return 1;
3322   default:
3323     return 2;
3324   }
3325 }
3326 
3327 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3328   const MCOperand &MO = Inst.getOperand(OpIdx);
3329   if (MO.isImm()) {
3330     return !isInlineConstant(Inst, OpIdx);
3331   } else if (MO.isReg()) {
3332     auto Reg = MO.getReg();
3333     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3334     auto PReg = mc2PseudoReg(Reg);
3335     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3336   } else {
3337     return true;
3338   }
3339 }
3340 
3341 bool
3342 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3343                                                 const OperandVector &Operands) {
3344   const unsigned Opcode = Inst.getOpcode();
3345   const MCInstrDesc &Desc = MII.get(Opcode);
3346   unsigned LastSGPR = AMDGPU::NoRegister;
3347   unsigned ConstantBusUseCount = 0;
3348   unsigned NumLiterals = 0;
3349   unsigned LiteralSize;
3350 
3351   if (Desc.TSFlags &
3352       (SIInstrFlags::VOPC |
3353        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3354        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3355        SIInstrFlags::SDWA)) {
3356     // Check special imm operands (used by madmk, etc)
3357     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3358       ++NumLiterals;
3359       LiteralSize = 4;
3360     }
3361 
3362     SmallDenseSet<unsigned> SGPRsUsed;
3363     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3364     if (SGPRUsed != AMDGPU::NoRegister) {
3365       SGPRsUsed.insert(SGPRUsed);
3366       ++ConstantBusUseCount;
3367     }
3368 
3369     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3370     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3371     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3372 
3373     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3374 
3375     for (int OpIdx : OpIndices) {
3376       if (OpIdx == -1) break;
3377 
3378       const MCOperand &MO = Inst.getOperand(OpIdx);
3379       if (usesConstantBus(Inst, OpIdx)) {
3380         if (MO.isReg()) {
3381           LastSGPR = mc2PseudoReg(MO.getReg());
3382           // Pairs of registers with a partial intersections like these
3383           //   s0, s[0:1]
3384           //   flat_scratch_lo, flat_scratch
3385           //   flat_scratch_lo, flat_scratch_hi
3386           // are theoretically valid but they are disabled anyway.
3387           // Note that this code mimics SIInstrInfo::verifyInstruction
3388           if (!SGPRsUsed.count(LastSGPR)) {
3389             SGPRsUsed.insert(LastSGPR);
3390             ++ConstantBusUseCount;
3391           }
3392         } else { // Expression or a literal
3393 
3394           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3395             continue; // special operand like VINTERP attr_chan
3396 
3397           // An instruction may use only one literal.
3398           // This has been validated on the previous step.
3399           // See validateVOPLiteral.
3400           // This literal may be used as more than one operand.
3401           // If all these operands are of the same size,
3402           // this literal counts as one scalar value.
3403           // Otherwise it counts as 2 scalar values.
3404           // See "GFX10 Shader Programming", section 3.6.2.3.
3405 
3406           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3407           if (Size < 4) Size = 4;
3408 
3409           if (NumLiterals == 0) {
3410             NumLiterals = 1;
3411             LiteralSize = Size;
3412           } else if (LiteralSize != Size) {
3413             NumLiterals = 2;
3414           }
3415         }
3416       }
3417     }
3418   }
3419   ConstantBusUseCount += NumLiterals;
3420 
3421   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3422     return true;
3423 
3424   SMLoc LitLoc = getLitLoc(Operands);
3425   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3426   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3427   Error(Loc, "invalid operand (violates constant bus restrictions)");
3428   return false;
3429 }
3430 
3431 bool
3432 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3433                                                  const OperandVector &Operands) {
3434   const unsigned Opcode = Inst.getOpcode();
3435   const MCInstrDesc &Desc = MII.get(Opcode);
3436 
3437   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3438   if (DstIdx == -1 ||
3439       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3440     return true;
3441   }
3442 
3443   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3444 
3445   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3446   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3447   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3448 
3449   assert(DstIdx != -1);
3450   const MCOperand &Dst = Inst.getOperand(DstIdx);
3451   assert(Dst.isReg());
3452 
3453   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3454 
3455   for (int SrcIdx : SrcIndices) {
3456     if (SrcIdx == -1) break;
3457     const MCOperand &Src = Inst.getOperand(SrcIdx);
3458     if (Src.isReg()) {
3459       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3460         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3461         Error(getRegLoc(SrcReg, Operands),
3462           "destination must be different than all sources");
3463         return false;
3464       }
3465     }
3466   }
3467 
3468   return true;
3469 }
3470 
3471 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3472 
3473   const unsigned Opc = Inst.getOpcode();
3474   const MCInstrDesc &Desc = MII.get(Opc);
3475 
3476   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3477     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3478     assert(ClampIdx != -1);
3479     return Inst.getOperand(ClampIdx).getImm() == 0;
3480   }
3481 
3482   return true;
3483 }
3484 
3485 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3486 
3487   const unsigned Opc = Inst.getOpcode();
3488   const MCInstrDesc &Desc = MII.get(Opc);
3489 
3490   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3491     return true;
3492 
3493   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3494   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3495   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3496 
3497   assert(VDataIdx != -1);
3498 
3499   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3500     return true;
3501 
3502   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3503   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3504   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3505   if (DMask == 0)
3506     DMask = 1;
3507 
3508   unsigned DataSize =
3509     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3510   if (hasPackedD16()) {
3511     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3512     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3513       DataSize = (DataSize + 1) / 2;
3514   }
3515 
3516   return (VDataSize / 4) == DataSize + TFESize;
3517 }
3518 
3519 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3520   const unsigned Opc = Inst.getOpcode();
3521   const MCInstrDesc &Desc = MII.get(Opc);
3522 
3523   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3524     return true;
3525 
3526   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3527 
3528   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3529       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3530   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3531   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3532   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3533   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3534 
3535   assert(VAddr0Idx != -1);
3536   assert(SrsrcIdx != -1);
3537   assert(SrsrcIdx > VAddr0Idx);
3538 
3539   if (DimIdx == -1)
3540     return true; // intersect_ray
3541 
3542   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3543   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3544   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3545   unsigned ActualAddrSize =
3546       IsNSA ? SrsrcIdx - VAddr0Idx
3547             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3548   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3549 
3550   unsigned ExpectedAddrSize =
3551       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3552 
3553   if (!IsNSA) {
3554     if (ExpectedAddrSize > 8)
3555       ExpectedAddrSize = 16;
3556 
3557     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3558     // This provides backward compatibility for assembly created
3559     // before 160b/192b/224b types were directly supported.
3560     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3561       return true;
3562   }
3563 
3564   return ActualAddrSize == ExpectedAddrSize;
3565 }
3566 
3567 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3568 
3569   const unsigned Opc = Inst.getOpcode();
3570   const MCInstrDesc &Desc = MII.get(Opc);
3571 
3572   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3573     return true;
3574   if (!Desc.mayLoad() || !Desc.mayStore())
3575     return true; // Not atomic
3576 
3577   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3578   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3579 
3580   // This is an incomplete check because image_atomic_cmpswap
3581   // may only use 0x3 and 0xf while other atomic operations
3582   // may use 0x1 and 0x3. However these limitations are
3583   // verified when we check that dmask matches dst size.
3584   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3585 }
3586 
3587 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3588 
3589   const unsigned Opc = Inst.getOpcode();
3590   const MCInstrDesc &Desc = MII.get(Opc);
3591 
3592   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3593     return true;
3594 
3595   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3596   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3597 
3598   // GATHER4 instructions use dmask in a different fashion compared to
3599   // other MIMG instructions. The only useful DMASK values are
3600   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3601   // (red,red,red,red) etc.) The ISA document doesn't mention
3602   // this.
3603   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3604 }
3605 
3606 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3607   const unsigned Opc = Inst.getOpcode();
3608   const MCInstrDesc &Desc = MII.get(Opc);
3609 
3610   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3611     return true;
3612 
3613   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3614   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3615       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3616 
3617   if (!BaseOpcode->MSAA)
3618     return true;
3619 
3620   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3621   assert(DimIdx != -1);
3622 
3623   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3624   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3625 
3626   return DimInfo->MSAA;
3627 }
3628 
3629 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3630 {
3631   switch (Opcode) {
3632   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3633   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3634   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3635     return true;
3636   default:
3637     return false;
3638   }
3639 }
3640 
3641 // movrels* opcodes should only allow VGPRS as src0.
3642 // This is specified in .td description for vop1/vop3,
3643 // but sdwa is handled differently. See isSDWAOperand.
3644 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3645                                       const OperandVector &Operands) {
3646 
3647   const unsigned Opc = Inst.getOpcode();
3648   const MCInstrDesc &Desc = MII.get(Opc);
3649 
3650   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3651     return true;
3652 
3653   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3654   assert(Src0Idx != -1);
3655 
3656   SMLoc ErrLoc;
3657   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3658   if (Src0.isReg()) {
3659     auto Reg = mc2PseudoReg(Src0.getReg());
3660     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3661     if (!isSGPR(Reg, TRI))
3662       return true;
3663     ErrLoc = getRegLoc(Reg, Operands);
3664   } else {
3665     ErrLoc = getConstLoc(Operands);
3666   }
3667 
3668   Error(ErrLoc, "source operand must be a VGPR");
3669   return false;
3670 }
3671 
3672 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3673                                           const OperandVector &Operands) {
3674 
3675   const unsigned Opc = Inst.getOpcode();
3676 
3677   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3678     return true;
3679 
3680   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3681   assert(Src0Idx != -1);
3682 
3683   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3684   if (!Src0.isReg())
3685     return true;
3686 
3687   auto Reg = mc2PseudoReg(Src0.getReg());
3688   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3689   if (isSGPR(Reg, TRI)) {
3690     Error(getRegLoc(Reg, Operands),
3691           "source operand must be either a VGPR or an inline constant");
3692     return false;
3693   }
3694 
3695   return true;
3696 }
3697 
3698 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3699                                    const OperandVector &Operands) {
3700   const unsigned Opc = Inst.getOpcode();
3701   const MCInstrDesc &Desc = MII.get(Opc);
3702 
3703   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3704     return true;
3705 
3706   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3707   if (Src2Idx == -1)
3708     return true;
3709 
3710   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3711   if (!Src2.isReg())
3712     return true;
3713 
3714   MCRegister Src2Reg = Src2.getReg();
3715   MCRegister DstReg = Inst.getOperand(0).getReg();
3716   if (Src2Reg == DstReg)
3717     return true;
3718 
3719   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3720   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3721     return true;
3722 
3723   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3724     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3725           "source 2 operand must not partially overlap with dst");
3726     return false;
3727   }
3728 
3729   return true;
3730 }
3731 
3732 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3733   switch (Inst.getOpcode()) {
3734   default:
3735     return true;
3736   case V_DIV_SCALE_F32_gfx6_gfx7:
3737   case V_DIV_SCALE_F32_vi:
3738   case V_DIV_SCALE_F32_gfx10:
3739   case V_DIV_SCALE_F64_gfx6_gfx7:
3740   case V_DIV_SCALE_F64_vi:
3741   case V_DIV_SCALE_F64_gfx10:
3742     break;
3743   }
3744 
3745   // TODO: Check that src0 = src1 or src2.
3746 
3747   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3748                     AMDGPU::OpName::src2_modifiers,
3749                     AMDGPU::OpName::src2_modifiers}) {
3750     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3751             .getImm() &
3752         SISrcMods::ABS) {
3753       return false;
3754     }
3755   }
3756 
3757   return true;
3758 }
3759 
3760 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3761 
3762   const unsigned Opc = Inst.getOpcode();
3763   const MCInstrDesc &Desc = MII.get(Opc);
3764 
3765   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3766     return true;
3767 
3768   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3769   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3770     if (isCI() || isSI())
3771       return false;
3772   }
3773 
3774   return true;
3775 }
3776 
3777 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3778   const unsigned Opc = Inst.getOpcode();
3779   const MCInstrDesc &Desc = MII.get(Opc);
3780 
3781   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3782     return true;
3783 
3784   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3785   if (DimIdx < 0)
3786     return true;
3787 
3788   long Imm = Inst.getOperand(DimIdx).getImm();
3789   if (Imm < 0 || Imm >= 8)
3790     return false;
3791 
3792   return true;
3793 }
3794 
3795 static bool IsRevOpcode(const unsigned Opcode)
3796 {
3797   switch (Opcode) {
3798   case AMDGPU::V_SUBREV_F32_e32:
3799   case AMDGPU::V_SUBREV_F32_e64:
3800   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3801   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3802   case AMDGPU::V_SUBREV_F32_e32_vi:
3803   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3804   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3805   case AMDGPU::V_SUBREV_F32_e64_vi:
3806 
3807   case AMDGPU::V_SUBREV_CO_U32_e32:
3808   case AMDGPU::V_SUBREV_CO_U32_e64:
3809   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3810   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3811 
3812   case AMDGPU::V_SUBBREV_U32_e32:
3813   case AMDGPU::V_SUBBREV_U32_e64:
3814   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3815   case AMDGPU::V_SUBBREV_U32_e32_vi:
3816   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3817   case AMDGPU::V_SUBBREV_U32_e64_vi:
3818 
3819   case AMDGPU::V_SUBREV_U32_e32:
3820   case AMDGPU::V_SUBREV_U32_e64:
3821   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3822   case AMDGPU::V_SUBREV_U32_e32_vi:
3823   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3824   case AMDGPU::V_SUBREV_U32_e64_vi:
3825 
3826   case AMDGPU::V_SUBREV_F16_e32:
3827   case AMDGPU::V_SUBREV_F16_e64:
3828   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3829   case AMDGPU::V_SUBREV_F16_e32_vi:
3830   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3831   case AMDGPU::V_SUBREV_F16_e64_vi:
3832 
3833   case AMDGPU::V_SUBREV_U16_e32:
3834   case AMDGPU::V_SUBREV_U16_e64:
3835   case AMDGPU::V_SUBREV_U16_e32_vi:
3836   case AMDGPU::V_SUBREV_U16_e64_vi:
3837 
3838   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3839   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3840   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3841 
3842   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3843   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3844 
3845   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3846   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3847 
3848   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3849   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3850 
3851   case AMDGPU::V_LSHRREV_B32_e32:
3852   case AMDGPU::V_LSHRREV_B32_e64:
3853   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3854   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3855   case AMDGPU::V_LSHRREV_B32_e32_vi:
3856   case AMDGPU::V_LSHRREV_B32_e64_vi:
3857   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3858   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3859 
3860   case AMDGPU::V_ASHRREV_I32_e32:
3861   case AMDGPU::V_ASHRREV_I32_e64:
3862   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3863   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3864   case AMDGPU::V_ASHRREV_I32_e32_vi:
3865   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3866   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3867   case AMDGPU::V_ASHRREV_I32_e64_vi:
3868 
3869   case AMDGPU::V_LSHLREV_B32_e32:
3870   case AMDGPU::V_LSHLREV_B32_e64:
3871   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3872   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3873   case AMDGPU::V_LSHLREV_B32_e32_vi:
3874   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3875   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3876   case AMDGPU::V_LSHLREV_B32_e64_vi:
3877 
3878   case AMDGPU::V_LSHLREV_B16_e32:
3879   case AMDGPU::V_LSHLREV_B16_e64:
3880   case AMDGPU::V_LSHLREV_B16_e32_vi:
3881   case AMDGPU::V_LSHLREV_B16_e64_vi:
3882   case AMDGPU::V_LSHLREV_B16_gfx10:
3883 
3884   case AMDGPU::V_LSHRREV_B16_e32:
3885   case AMDGPU::V_LSHRREV_B16_e64:
3886   case AMDGPU::V_LSHRREV_B16_e32_vi:
3887   case AMDGPU::V_LSHRREV_B16_e64_vi:
3888   case AMDGPU::V_LSHRREV_B16_gfx10:
3889 
3890   case AMDGPU::V_ASHRREV_I16_e32:
3891   case AMDGPU::V_ASHRREV_I16_e64:
3892   case AMDGPU::V_ASHRREV_I16_e32_vi:
3893   case AMDGPU::V_ASHRREV_I16_e64_vi:
3894   case AMDGPU::V_ASHRREV_I16_gfx10:
3895 
3896   case AMDGPU::V_LSHLREV_B64_e64:
3897   case AMDGPU::V_LSHLREV_B64_gfx10:
3898   case AMDGPU::V_LSHLREV_B64_vi:
3899 
3900   case AMDGPU::V_LSHRREV_B64_e64:
3901   case AMDGPU::V_LSHRREV_B64_gfx10:
3902   case AMDGPU::V_LSHRREV_B64_vi:
3903 
3904   case AMDGPU::V_ASHRREV_I64_e64:
3905   case AMDGPU::V_ASHRREV_I64_gfx10:
3906   case AMDGPU::V_ASHRREV_I64_vi:
3907 
3908   case AMDGPU::V_PK_LSHLREV_B16:
3909   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3910   case AMDGPU::V_PK_LSHLREV_B16_vi:
3911 
3912   case AMDGPU::V_PK_LSHRREV_B16:
3913   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3914   case AMDGPU::V_PK_LSHRREV_B16_vi:
3915   case AMDGPU::V_PK_ASHRREV_I16:
3916   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3917   case AMDGPU::V_PK_ASHRREV_I16_vi:
3918     return true;
3919   default:
3920     return false;
3921   }
3922 }
3923 
3924 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3925 
3926   using namespace SIInstrFlags;
3927   const unsigned Opcode = Inst.getOpcode();
3928   const MCInstrDesc &Desc = MII.get(Opcode);
3929 
3930   // lds_direct register is defined so that it can be used
3931   // with 9-bit operands only. Ignore encodings which do not accept these.
3932   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3933   if ((Desc.TSFlags & Enc) == 0)
3934     return None;
3935 
3936   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3937     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3938     if (SrcIdx == -1)
3939       break;
3940     const auto &Src = Inst.getOperand(SrcIdx);
3941     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3942 
3943       if (isGFX90A())
3944         return StringRef("lds_direct is not supported on this GPU");
3945 
3946       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3947         return StringRef("lds_direct cannot be used with this instruction");
3948 
3949       if (SrcName != OpName::src0)
3950         return StringRef("lds_direct may be used as src0 only");
3951     }
3952   }
3953 
3954   return None;
3955 }
3956 
3957 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3958   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3959     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3960     if (Op.isFlatOffset())
3961       return Op.getStartLoc();
3962   }
3963   return getLoc();
3964 }
3965 
3966 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3967                                          const OperandVector &Operands) {
3968   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3969   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3970     return true;
3971 
3972   auto Opcode = Inst.getOpcode();
3973   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3974   assert(OpNum != -1);
3975 
3976   const auto &Op = Inst.getOperand(OpNum);
3977   if (!hasFlatOffsets() && Op.getImm() != 0) {
3978     Error(getFlatOffsetLoc(Operands),
3979           "flat offset modifier is not supported on this GPU");
3980     return false;
3981   }
3982 
3983   // For FLAT segment the offset must be positive;
3984   // MSB is ignored and forced to zero.
3985   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3986     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3987     if (!isIntN(OffsetSize, Op.getImm())) {
3988       Error(getFlatOffsetLoc(Operands),
3989             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3990       return false;
3991     }
3992   } else {
3993     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3994     if (!isUIntN(OffsetSize, Op.getImm())) {
3995       Error(getFlatOffsetLoc(Operands),
3996             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3997       return false;
3998     }
3999   }
4000 
4001   return true;
4002 }
4003 
4004 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4005   // Start with second operand because SMEM Offset cannot be dst or src0.
4006   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4007     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4008     if (Op.isSMEMOffset())
4009       return Op.getStartLoc();
4010   }
4011   return getLoc();
4012 }
4013 
4014 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4015                                          const OperandVector &Operands) {
4016   if (isCI() || isSI())
4017     return true;
4018 
4019   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4020   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4021     return true;
4022 
4023   auto Opcode = Inst.getOpcode();
4024   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4025   if (OpNum == -1)
4026     return true;
4027 
4028   const auto &Op = Inst.getOperand(OpNum);
4029   if (!Op.isImm())
4030     return true;
4031 
4032   uint64_t Offset = Op.getImm();
4033   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4034   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4035       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4036     return true;
4037 
4038   Error(getSMEMOffsetLoc(Operands),
4039         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4040                                "expected a 21-bit signed offset");
4041 
4042   return false;
4043 }
4044 
4045 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4046   unsigned Opcode = Inst.getOpcode();
4047   const MCInstrDesc &Desc = MII.get(Opcode);
4048   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4049     return true;
4050 
4051   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4052   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4053 
4054   const int OpIndices[] = { Src0Idx, Src1Idx };
4055 
4056   unsigned NumExprs = 0;
4057   unsigned NumLiterals = 0;
4058   uint32_t LiteralValue;
4059 
4060   for (int OpIdx : OpIndices) {
4061     if (OpIdx == -1) break;
4062 
4063     const MCOperand &MO = Inst.getOperand(OpIdx);
4064     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4065     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4066       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4067         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4068         if (NumLiterals == 0 || LiteralValue != Value) {
4069           LiteralValue = Value;
4070           ++NumLiterals;
4071         }
4072       } else if (MO.isExpr()) {
4073         ++NumExprs;
4074       }
4075     }
4076   }
4077 
4078   return NumLiterals + NumExprs <= 1;
4079 }
4080 
4081 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4082   const unsigned Opc = Inst.getOpcode();
4083   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4084       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4085     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4086     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4087 
4088     if (OpSel & ~3)
4089       return false;
4090   }
4091 
4092   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4093     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4094     if (OpSelIdx != -1) {
4095       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4096         return false;
4097     }
4098     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4099     if (OpSelHiIdx != -1) {
4100       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4101         return false;
4102     }
4103   }
4104 
4105   return true;
4106 }
4107 
4108 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4109                                   const OperandVector &Operands) {
4110   const unsigned Opc = Inst.getOpcode();
4111   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4112   if (DppCtrlIdx < 0)
4113     return true;
4114   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4115 
4116   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4117     // DPP64 is supported for row_newbcast only.
4118     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4119     if (Src0Idx >= 0 &&
4120         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4121       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4122       Error(S, "64 bit dpp only supports row_newbcast");
4123       return false;
4124     }
4125   }
4126 
4127   return true;
4128 }
4129 
4130 // Check if VCC register matches wavefront size
4131 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4132   auto FB = getFeatureBits();
4133   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4134     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4135 }
4136 
4137 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4138 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4139                                          const OperandVector &Operands) {
4140   unsigned Opcode = Inst.getOpcode();
4141   const MCInstrDesc &Desc = MII.get(Opcode);
4142   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4143   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4144       ImmIdx == -1)
4145     return true;
4146 
4147   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4148   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4149   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4150 
4151   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4152 
4153   unsigned NumExprs = 0;
4154   unsigned NumLiterals = 0;
4155   uint32_t LiteralValue;
4156 
4157   for (int OpIdx : OpIndices) {
4158     if (OpIdx == -1)
4159       continue;
4160 
4161     const MCOperand &MO = Inst.getOperand(OpIdx);
4162     if (!MO.isImm() && !MO.isExpr())
4163       continue;
4164     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4165       continue;
4166 
4167     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4168         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4169       Error(getConstLoc(Operands),
4170             "inline constants are not allowed for this operand");
4171       return false;
4172     }
4173 
4174     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4175       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4176       if (NumLiterals == 0 || LiteralValue != Value) {
4177         LiteralValue = Value;
4178         ++NumLiterals;
4179       }
4180     } else if (MO.isExpr()) {
4181       ++NumExprs;
4182     }
4183   }
4184   NumLiterals += NumExprs;
4185 
4186   if (!NumLiterals)
4187     return true;
4188 
4189   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4190     Error(getLitLoc(Operands), "literal operands are not supported");
4191     return false;
4192   }
4193 
4194   if (NumLiterals > 1) {
4195     Error(getLitLoc(Operands), "only one literal operand is allowed");
4196     return false;
4197   }
4198 
4199   return true;
4200 }
4201 
4202 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4203 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4204                          const MCRegisterInfo *MRI) {
4205   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4206   if (OpIdx < 0)
4207     return -1;
4208 
4209   const MCOperand &Op = Inst.getOperand(OpIdx);
4210   if (!Op.isReg())
4211     return -1;
4212 
4213   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4214   auto Reg = Sub ? Sub : Op.getReg();
4215   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4216   return AGPR32.contains(Reg) ? 1 : 0;
4217 }
4218 
4219 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4220   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4221   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4222                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4223                   SIInstrFlags::DS)) == 0)
4224     return true;
4225 
4226   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4227                                                       : AMDGPU::OpName::vdata;
4228 
4229   const MCRegisterInfo *MRI = getMRI();
4230   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4231   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4232 
4233   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4234     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4235     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4236       return false;
4237   }
4238 
4239   auto FB = getFeatureBits();
4240   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4241     if (DataAreg < 0 || DstAreg < 0)
4242       return true;
4243     return DstAreg == DataAreg;
4244   }
4245 
4246   return DstAreg < 1 && DataAreg < 1;
4247 }
4248 
4249 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4250   auto FB = getFeatureBits();
4251   if (!FB[AMDGPU::FeatureGFX90AInsts])
4252     return true;
4253 
4254   const MCRegisterInfo *MRI = getMRI();
4255   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4256   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4257   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4258     const MCOperand &Op = Inst.getOperand(I);
4259     if (!Op.isReg())
4260       continue;
4261 
4262     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4263     if (!Sub)
4264       continue;
4265 
4266     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4267       return false;
4268     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4269       return false;
4270   }
4271 
4272   return true;
4273 }
4274 
4275 // gfx90a has an undocumented limitation:
4276 // DS_GWS opcodes must use even aligned registers.
4277 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4278                                   const OperandVector &Operands) {
4279   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4280     return true;
4281 
4282   int Opc = Inst.getOpcode();
4283   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4284       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4285     return true;
4286 
4287   const MCRegisterInfo *MRI = getMRI();
4288   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4289   int Data0Pos =
4290       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4291   assert(Data0Pos != -1);
4292   auto Reg = Inst.getOperand(Data0Pos).getReg();
4293   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4294   if (RegIdx & 1) {
4295     SMLoc RegLoc = getRegLoc(Reg, Operands);
4296     Error(RegLoc, "vgpr must be even aligned");
4297     return false;
4298   }
4299 
4300   return true;
4301 }
4302 
4303 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4304                                             const OperandVector &Operands,
4305                                             const SMLoc &IDLoc) {
4306   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4307                                            AMDGPU::OpName::cpol);
4308   if (CPolPos == -1)
4309     return true;
4310 
4311   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4312 
4313   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4314   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4315       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4316     Error(IDLoc, "invalid cache policy for SMRD instruction");
4317     return false;
4318   }
4319 
4320   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4321     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4322     StringRef CStr(S.getPointer());
4323     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4324     Error(S, "scc is not supported on this GPU");
4325     return false;
4326   }
4327 
4328   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4329     return true;
4330 
4331   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4332     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4333       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4334                               : "instruction must use glc");
4335       return false;
4336     }
4337   } else {
4338     if (CPol & CPol::GLC) {
4339       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4340       StringRef CStr(S.getPointer());
4341       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4342       Error(S, isGFX940() ? "instruction must not use sc0"
4343                           : "instruction must not use glc");
4344       return false;
4345     }
4346   }
4347 
4348   return true;
4349 }
4350 
4351 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4352                                           const SMLoc &IDLoc,
4353                                           const OperandVector &Operands) {
4354   if (auto ErrMsg = validateLdsDirect(Inst)) {
4355     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4356     return false;
4357   }
4358   if (!validateSOPLiteral(Inst)) {
4359     Error(getLitLoc(Operands),
4360       "only one literal operand is allowed");
4361     return false;
4362   }
4363   if (!validateVOPLiteral(Inst, Operands)) {
4364     return false;
4365   }
4366   if (!validateConstantBusLimitations(Inst, Operands)) {
4367     return false;
4368   }
4369   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4370     return false;
4371   }
4372   if (!validateIntClampSupported(Inst)) {
4373     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4374       "integer clamping is not supported on this GPU");
4375     return false;
4376   }
4377   if (!validateOpSel(Inst)) {
4378     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4379       "invalid op_sel operand");
4380     return false;
4381   }
4382   if (!validateDPP(Inst, Operands)) {
4383     return false;
4384   }
4385   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4386   if (!validateMIMGD16(Inst)) {
4387     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4388       "d16 modifier is not supported on this GPU");
4389     return false;
4390   }
4391   if (!validateMIMGDim(Inst)) {
4392     Error(IDLoc, "dim modifier is required on this GPU");
4393     return false;
4394   }
4395   if (!validateMIMGMSAA(Inst)) {
4396     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4397           "invalid dim; must be MSAA type");
4398     return false;
4399   }
4400   if (!validateMIMGDataSize(Inst)) {
4401     Error(IDLoc,
4402       "image data size does not match dmask and tfe");
4403     return false;
4404   }
4405   if (!validateMIMGAddrSize(Inst)) {
4406     Error(IDLoc,
4407       "image address size does not match dim and a16");
4408     return false;
4409   }
4410   if (!validateMIMGAtomicDMask(Inst)) {
4411     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4412       "invalid atomic image dmask");
4413     return false;
4414   }
4415   if (!validateMIMGGatherDMask(Inst)) {
4416     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4417       "invalid image_gather dmask: only one bit must be set");
4418     return false;
4419   }
4420   if (!validateMovrels(Inst, Operands)) {
4421     return false;
4422   }
4423   if (!validateFlatOffset(Inst, Operands)) {
4424     return false;
4425   }
4426   if (!validateSMEMOffset(Inst, Operands)) {
4427     return false;
4428   }
4429   if (!validateMAIAccWrite(Inst, Operands)) {
4430     return false;
4431   }
4432   if (!validateMFMA(Inst, Operands)) {
4433     return false;
4434   }
4435   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4436     return false;
4437   }
4438 
4439   if (!validateAGPRLdSt(Inst)) {
4440     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4441     ? "invalid register class: data and dst should be all VGPR or AGPR"
4442     : "invalid register class: agpr loads and stores not supported on this GPU"
4443     );
4444     return false;
4445   }
4446   if (!validateVGPRAlign(Inst)) {
4447     Error(IDLoc,
4448       "invalid register class: vgpr tuples must be 64 bit aligned");
4449     return false;
4450   }
4451   if (!validateGWS(Inst, Operands)) {
4452     return false;
4453   }
4454 
4455   if (!validateDivScale(Inst)) {
4456     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4457     return false;
4458   }
4459   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4460     return false;
4461   }
4462 
4463   return true;
4464 }
4465 
4466 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4467                                             const FeatureBitset &FBS,
4468                                             unsigned VariantID = 0);
4469 
4470 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4471                                 const FeatureBitset &AvailableFeatures,
4472                                 unsigned VariantID);
4473 
4474 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4475                                        const FeatureBitset &FBS) {
4476   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4477 }
4478 
4479 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4480                                        const FeatureBitset &FBS,
4481                                        ArrayRef<unsigned> Variants) {
4482   for (auto Variant : Variants) {
4483     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4484       return true;
4485   }
4486 
4487   return false;
4488 }
4489 
4490 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4491                                                   const SMLoc &IDLoc) {
4492   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4493 
4494   // Check if requested instruction variant is supported.
4495   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4496     return false;
4497 
4498   // This instruction is not supported.
4499   // Clear any other pending errors because they are no longer relevant.
4500   getParser().clearPendingErrors();
4501 
4502   // Requested instruction variant is not supported.
4503   // Check if any other variants are supported.
4504   StringRef VariantName = getMatchedVariantName();
4505   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4506     return Error(IDLoc,
4507                  Twine(VariantName,
4508                        " variant of this instruction is not supported"));
4509   }
4510 
4511   // Finally check if this instruction is supported on any other GPU.
4512   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4513     return Error(IDLoc, "instruction not supported on this GPU");
4514   }
4515 
4516   // Instruction not supported on any GPU. Probably a typo.
4517   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4518   return Error(IDLoc, "invalid instruction" + Suggestion);
4519 }
4520 
4521 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4522                                               OperandVector &Operands,
4523                                               MCStreamer &Out,
4524                                               uint64_t &ErrorInfo,
4525                                               bool MatchingInlineAsm) {
4526   MCInst Inst;
4527   unsigned Result = Match_Success;
4528   for (auto Variant : getMatchedVariants()) {
4529     uint64_t EI;
4530     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4531                                   Variant);
4532     // We order match statuses from least to most specific. We use most specific
4533     // status as resulting
4534     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4535     if ((R == Match_Success) ||
4536         (R == Match_PreferE32) ||
4537         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4538         (R == Match_InvalidOperand && Result != Match_MissingFeature
4539                                    && Result != Match_PreferE32) ||
4540         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4541                                    && Result != Match_MissingFeature
4542                                    && Result != Match_PreferE32)) {
4543       Result = R;
4544       ErrorInfo = EI;
4545     }
4546     if (R == Match_Success)
4547       break;
4548   }
4549 
4550   if (Result == Match_Success) {
4551     if (!validateInstruction(Inst, IDLoc, Operands)) {
4552       return true;
4553     }
4554     Inst.setLoc(IDLoc);
4555     Out.emitInstruction(Inst, getSTI());
4556     return false;
4557   }
4558 
4559   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4560   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4561     return true;
4562   }
4563 
4564   switch (Result) {
4565   default: break;
4566   case Match_MissingFeature:
4567     // It has been verified that the specified instruction
4568     // mnemonic is valid. A match was found but it requires
4569     // features which are not supported on this GPU.
4570     return Error(IDLoc, "operands are not valid for this GPU or mode");
4571 
4572   case Match_InvalidOperand: {
4573     SMLoc ErrorLoc = IDLoc;
4574     if (ErrorInfo != ~0ULL) {
4575       if (ErrorInfo >= Operands.size()) {
4576         return Error(IDLoc, "too few operands for instruction");
4577       }
4578       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4579       if (ErrorLoc == SMLoc())
4580         ErrorLoc = IDLoc;
4581     }
4582     return Error(ErrorLoc, "invalid operand for instruction");
4583   }
4584 
4585   case Match_PreferE32:
4586     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4587                         "should be encoded as e32");
4588   case Match_MnemonicFail:
4589     llvm_unreachable("Invalid instructions should have been handled already");
4590   }
4591   llvm_unreachable("Implement any new match types added!");
4592 }
4593 
4594 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4595   int64_t Tmp = -1;
4596   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4597     return true;
4598   }
4599   if (getParser().parseAbsoluteExpression(Tmp)) {
4600     return true;
4601   }
4602   Ret = static_cast<uint32_t>(Tmp);
4603   return false;
4604 }
4605 
4606 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4607                                                uint32_t &Minor) {
4608   if (ParseAsAbsoluteExpression(Major))
4609     return TokError("invalid major version");
4610 
4611   if (!trySkipToken(AsmToken::Comma))
4612     return TokError("minor version number required, comma expected");
4613 
4614   if (ParseAsAbsoluteExpression(Minor))
4615     return TokError("invalid minor version");
4616 
4617   return false;
4618 }
4619 
4620 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4621   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4622     return TokError("directive only supported for amdgcn architecture");
4623 
4624   std::string TargetIDDirective;
4625   SMLoc TargetStart = getTok().getLoc();
4626   if (getParser().parseEscapedString(TargetIDDirective))
4627     return true;
4628 
4629   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4630   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4631     return getParser().Error(TargetRange.Start,
4632         (Twine(".amdgcn_target directive's target id ") +
4633          Twine(TargetIDDirective) +
4634          Twine(" does not match the specified target id ") +
4635          Twine(getTargetStreamer().getTargetID()->toString())).str());
4636 
4637   return false;
4638 }
4639 
4640 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4641   return Error(Range.Start, "value out of range", Range);
4642 }
4643 
4644 bool AMDGPUAsmParser::calculateGPRBlocks(
4645     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4646     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4647     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4648     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4649   // TODO(scott.linder): These calculations are duplicated from
4650   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4651   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4652 
4653   unsigned NumVGPRs = NextFreeVGPR;
4654   unsigned NumSGPRs = NextFreeSGPR;
4655 
4656   if (Version.Major >= 10)
4657     NumSGPRs = 0;
4658   else {
4659     unsigned MaxAddressableNumSGPRs =
4660         IsaInfo::getAddressableNumSGPRs(&getSTI());
4661 
4662     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4663         NumSGPRs > MaxAddressableNumSGPRs)
4664       return OutOfRangeError(SGPRRange);
4665 
4666     NumSGPRs +=
4667         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4668 
4669     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4670         NumSGPRs > MaxAddressableNumSGPRs)
4671       return OutOfRangeError(SGPRRange);
4672 
4673     if (Features.test(FeatureSGPRInitBug))
4674       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4675   }
4676 
4677   VGPRBlocks =
4678       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4679   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4680 
4681   return false;
4682 }
4683 
4684 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4685   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4686     return TokError("directive only supported for amdgcn architecture");
4687 
4688   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4689     return TokError("directive only supported for amdhsa OS");
4690 
4691   StringRef KernelName;
4692   if (getParser().parseIdentifier(KernelName))
4693     return true;
4694 
4695   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4696 
4697   StringSet<> Seen;
4698 
4699   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4700 
4701   SMRange VGPRRange;
4702   uint64_t NextFreeVGPR = 0;
4703   uint64_t AccumOffset = 0;
4704   uint64_t SharedVGPRCount = 0;
4705   SMRange SGPRRange;
4706   uint64_t NextFreeSGPR = 0;
4707 
4708   // Count the number of user SGPRs implied from the enabled feature bits.
4709   unsigned ImpliedUserSGPRCount = 0;
4710 
4711   // Track if the asm explicitly contains the directive for the user SGPR
4712   // count.
4713   Optional<unsigned> ExplicitUserSGPRCount;
4714   bool ReserveVCC = true;
4715   bool ReserveFlatScr = true;
4716   Optional<bool> EnableWavefrontSize32;
4717 
4718   while (true) {
4719     while (trySkipToken(AsmToken::EndOfStatement));
4720 
4721     StringRef ID;
4722     SMRange IDRange = getTok().getLocRange();
4723     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4724       return true;
4725 
4726     if (ID == ".end_amdhsa_kernel")
4727       break;
4728 
4729     if (Seen.find(ID) != Seen.end())
4730       return TokError(".amdhsa_ directives cannot be repeated");
4731     Seen.insert(ID);
4732 
4733     SMLoc ValStart = getLoc();
4734     int64_t IVal;
4735     if (getParser().parseAbsoluteExpression(IVal))
4736       return true;
4737     SMLoc ValEnd = getLoc();
4738     SMRange ValRange = SMRange(ValStart, ValEnd);
4739 
4740     if (IVal < 0)
4741       return OutOfRangeError(ValRange);
4742 
4743     uint64_t Val = IVal;
4744 
4745 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4746   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4747     return OutOfRangeError(RANGE);                                             \
4748   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4749 
4750     if (ID == ".amdhsa_group_segment_fixed_size") {
4751       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4752         return OutOfRangeError(ValRange);
4753       KD.group_segment_fixed_size = Val;
4754     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4755       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4756         return OutOfRangeError(ValRange);
4757       KD.private_segment_fixed_size = Val;
4758     } else if (ID == ".amdhsa_kernarg_size") {
4759       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4760         return OutOfRangeError(ValRange);
4761       KD.kernarg_size = Val;
4762     } else if (ID == ".amdhsa_user_sgpr_count") {
4763       ExplicitUserSGPRCount = Val;
4764     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4765       if (hasArchitectedFlatScratch())
4766         return Error(IDRange.Start,
4767                      "directive is not supported with architected flat scratch",
4768                      IDRange);
4769       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4770                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4771                        Val, ValRange);
4772       if (Val)
4773         ImpliedUserSGPRCount += 4;
4774     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4775       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4776                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4777                        ValRange);
4778       if (Val)
4779         ImpliedUserSGPRCount += 2;
4780     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4781       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4782                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4783                        ValRange);
4784       if (Val)
4785         ImpliedUserSGPRCount += 2;
4786     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4787       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4788                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4789                        Val, ValRange);
4790       if (Val)
4791         ImpliedUserSGPRCount += 2;
4792     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4793       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4794                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4795                        ValRange);
4796       if (Val)
4797         ImpliedUserSGPRCount += 2;
4798     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4799       if (hasArchitectedFlatScratch())
4800         return Error(IDRange.Start,
4801                      "directive is not supported with architected flat scratch",
4802                      IDRange);
4803       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4804                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4805                        ValRange);
4806       if (Val)
4807         ImpliedUserSGPRCount += 2;
4808     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4809       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4810                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4811                        Val, ValRange);
4812       if (Val)
4813         ImpliedUserSGPRCount += 1;
4814     } else if (ID == ".amdhsa_wavefront_size32") {
4815       if (IVersion.Major < 10)
4816         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4817       EnableWavefrontSize32 = Val;
4818       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4819                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4820                        Val, ValRange);
4821     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4822       if (hasArchitectedFlatScratch())
4823         return Error(IDRange.Start,
4824                      "directive is not supported with architected flat scratch",
4825                      IDRange);
4826       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4827                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4828     } else if (ID == ".amdhsa_enable_private_segment") {
4829       if (!hasArchitectedFlatScratch())
4830         return Error(
4831             IDRange.Start,
4832             "directive is not supported without architected flat scratch",
4833             IDRange);
4834       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4835                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4836     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4837       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4838                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4839                        ValRange);
4840     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4841       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4842                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4843                        ValRange);
4844     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4845       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4846                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4847                        ValRange);
4848     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4849       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4850                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4851                        ValRange);
4852     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4853       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4854                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4855                        ValRange);
4856     } else if (ID == ".amdhsa_next_free_vgpr") {
4857       VGPRRange = ValRange;
4858       NextFreeVGPR = Val;
4859     } else if (ID == ".amdhsa_next_free_sgpr") {
4860       SGPRRange = ValRange;
4861       NextFreeSGPR = Val;
4862     } else if (ID == ".amdhsa_accum_offset") {
4863       if (!isGFX90A())
4864         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4865       AccumOffset = Val;
4866     } else if (ID == ".amdhsa_reserve_vcc") {
4867       if (!isUInt<1>(Val))
4868         return OutOfRangeError(ValRange);
4869       ReserveVCC = Val;
4870     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4871       if (IVersion.Major < 7)
4872         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4873       if (hasArchitectedFlatScratch())
4874         return Error(IDRange.Start,
4875                      "directive is not supported with architected flat scratch",
4876                      IDRange);
4877       if (!isUInt<1>(Val))
4878         return OutOfRangeError(ValRange);
4879       ReserveFlatScr = Val;
4880     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4881       if (IVersion.Major < 8)
4882         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4883       if (!isUInt<1>(Val))
4884         return OutOfRangeError(ValRange);
4885       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4886         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4887                                  IDRange);
4888     } else if (ID == ".amdhsa_float_round_mode_32") {
4889       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4890                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4891     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4892       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4893                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4894     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4895       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4896                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4897     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4898       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4899                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4900                        ValRange);
4901     } else if (ID == ".amdhsa_dx10_clamp") {
4902       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4903                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4904     } else if (ID == ".amdhsa_ieee_mode") {
4905       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4906                        Val, ValRange);
4907     } else if (ID == ".amdhsa_fp16_overflow") {
4908       if (IVersion.Major < 9)
4909         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4910       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4911                        ValRange);
4912     } else if (ID == ".amdhsa_tg_split") {
4913       if (!isGFX90A())
4914         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4915       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4916                        ValRange);
4917     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4918       if (IVersion.Major < 10)
4919         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4920       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4921                        ValRange);
4922     } else if (ID == ".amdhsa_memory_ordered") {
4923       if (IVersion.Major < 10)
4924         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4925       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4926                        ValRange);
4927     } else if (ID == ".amdhsa_forward_progress") {
4928       if (IVersion.Major < 10)
4929         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4930       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4931                        ValRange);
4932     } else if (ID == ".amdhsa_shared_vgpr_count") {
4933       if (IVersion.Major < 10)
4934         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4935       SharedVGPRCount = Val;
4936       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
4937                        COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
4938                        ValRange);
4939     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4940       PARSE_BITS_ENTRY(
4941           KD.compute_pgm_rsrc2,
4942           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4943           ValRange);
4944     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4945       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4946                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4947                        Val, ValRange);
4948     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4949       PARSE_BITS_ENTRY(
4950           KD.compute_pgm_rsrc2,
4951           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4952           ValRange);
4953     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4954       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4955                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4956                        Val, ValRange);
4957     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4958       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4959                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4960                        Val, ValRange);
4961     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4962       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4963                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4964                        Val, ValRange);
4965     } else if (ID == ".amdhsa_exception_int_div_zero") {
4966       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4967                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4968                        Val, ValRange);
4969     } else {
4970       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4971     }
4972 
4973 #undef PARSE_BITS_ENTRY
4974   }
4975 
4976   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4977     return TokError(".amdhsa_next_free_vgpr directive is required");
4978 
4979   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4980     return TokError(".amdhsa_next_free_sgpr directive is required");
4981 
4982   unsigned VGPRBlocks;
4983   unsigned SGPRBlocks;
4984   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4985                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4986                          EnableWavefrontSize32, NextFreeVGPR,
4987                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4988                          SGPRBlocks))
4989     return true;
4990 
4991   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4992           VGPRBlocks))
4993     return OutOfRangeError(VGPRRange);
4994   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4995                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4996 
4997   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4998           SGPRBlocks))
4999     return OutOfRangeError(SGPRRange);
5000   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5001                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5002                   SGPRBlocks);
5003 
5004   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5005     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5006                     "enabled user SGPRs");
5007 
5008   unsigned UserSGPRCount =
5009       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5010 
5011   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5012     return TokError("too many user SGPRs enabled");
5013   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5014                   UserSGPRCount);
5015 
5016   if (isGFX90A()) {
5017     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5018       return TokError(".amdhsa_accum_offset directive is required");
5019     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5020       return TokError("accum_offset should be in range [4..256] in "
5021                       "increments of 4");
5022     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5023       return TokError("accum_offset exceeds total VGPR allocation");
5024     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5025                     (AccumOffset / 4 - 1));
5026   }
5027 
5028   if (IVersion.Major == 10) {
5029     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5030     if (SharedVGPRCount && EnableWavefrontSize32) {
5031       return TokError("shared_vgpr_count directive not valid on "
5032                       "wavefront size 32");
5033     }
5034     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5035       return TokError("shared_vgpr_count*2 + "
5036                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5037                       "exceed 63\n");
5038     }
5039   }
5040 
5041   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5042       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5043       ReserveFlatScr);
5044   return false;
5045 }
5046 
5047 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5048   uint32_t Major;
5049   uint32_t Minor;
5050 
5051   if (ParseDirectiveMajorMinor(Major, Minor))
5052     return true;
5053 
5054   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5055   return false;
5056 }
5057 
5058 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5059   uint32_t Major;
5060   uint32_t Minor;
5061   uint32_t Stepping;
5062   StringRef VendorName;
5063   StringRef ArchName;
5064 
5065   // If this directive has no arguments, then use the ISA version for the
5066   // targeted GPU.
5067   if (isToken(AsmToken::EndOfStatement)) {
5068     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5069     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5070                                                         ISA.Stepping,
5071                                                         "AMD", "AMDGPU");
5072     return false;
5073   }
5074 
5075   if (ParseDirectiveMajorMinor(Major, Minor))
5076     return true;
5077 
5078   if (!trySkipToken(AsmToken::Comma))
5079     return TokError("stepping version number required, comma expected");
5080 
5081   if (ParseAsAbsoluteExpression(Stepping))
5082     return TokError("invalid stepping version");
5083 
5084   if (!trySkipToken(AsmToken::Comma))
5085     return TokError("vendor name required, comma expected");
5086 
5087   if (!parseString(VendorName, "invalid vendor name"))
5088     return true;
5089 
5090   if (!trySkipToken(AsmToken::Comma))
5091     return TokError("arch name required, comma expected");
5092 
5093   if (!parseString(ArchName, "invalid arch name"))
5094     return true;
5095 
5096   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5097                                                       VendorName, ArchName);
5098   return false;
5099 }
5100 
5101 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5102                                                amd_kernel_code_t &Header) {
5103   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5104   // assembly for backwards compatibility.
5105   if (ID == "max_scratch_backing_memory_byte_size") {
5106     Parser.eatToEndOfStatement();
5107     return false;
5108   }
5109 
5110   SmallString<40> ErrStr;
5111   raw_svector_ostream Err(ErrStr);
5112   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5113     return TokError(Err.str());
5114   }
5115   Lex();
5116 
5117   if (ID == "enable_wavefront_size32") {
5118     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5119       if (!isGFX10Plus())
5120         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5121       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5122         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5123     } else {
5124       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5125         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5126     }
5127   }
5128 
5129   if (ID == "wavefront_size") {
5130     if (Header.wavefront_size == 5) {
5131       if (!isGFX10Plus())
5132         return TokError("wavefront_size=5 is only allowed on GFX10+");
5133       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5134         return TokError("wavefront_size=5 requires +WavefrontSize32");
5135     } else if (Header.wavefront_size == 6) {
5136       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5137         return TokError("wavefront_size=6 requires +WavefrontSize64");
5138     }
5139   }
5140 
5141   if (ID == "enable_wgp_mode") {
5142     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5143         !isGFX10Plus())
5144       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5145   }
5146 
5147   if (ID == "enable_mem_ordered") {
5148     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5149         !isGFX10Plus())
5150       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5151   }
5152 
5153   if (ID == "enable_fwd_progress") {
5154     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5155         !isGFX10Plus())
5156       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5157   }
5158 
5159   return false;
5160 }
5161 
5162 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5163   amd_kernel_code_t Header;
5164   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5165 
5166   while (true) {
5167     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5168     // will set the current token to EndOfStatement.
5169     while(trySkipToken(AsmToken::EndOfStatement));
5170 
5171     StringRef ID;
5172     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5173       return true;
5174 
5175     if (ID == ".end_amd_kernel_code_t")
5176       break;
5177 
5178     if (ParseAMDKernelCodeTValue(ID, Header))
5179       return true;
5180   }
5181 
5182   getTargetStreamer().EmitAMDKernelCodeT(Header);
5183 
5184   return false;
5185 }
5186 
5187 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5188   StringRef KernelName;
5189   if (!parseId(KernelName, "expected symbol name"))
5190     return true;
5191 
5192   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5193                                            ELF::STT_AMDGPU_HSA_KERNEL);
5194 
5195   KernelScope.initialize(getContext());
5196   return false;
5197 }
5198 
5199 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5200   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5201     return Error(getLoc(),
5202                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5203                  "architectures");
5204   }
5205 
5206   auto TargetIDDirective = getLexer().getTok().getStringContents();
5207   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5208     return Error(getParser().getTok().getLoc(), "target id must match options");
5209 
5210   getTargetStreamer().EmitISAVersion();
5211   Lex();
5212 
5213   return false;
5214 }
5215 
5216 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5217   const char *AssemblerDirectiveBegin;
5218   const char *AssemblerDirectiveEnd;
5219   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5220       isHsaAbiVersion3AndAbove(&getSTI())
5221           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5222                             HSAMD::V3::AssemblerDirectiveEnd)
5223           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5224                             HSAMD::AssemblerDirectiveEnd);
5225 
5226   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5227     return Error(getLoc(),
5228                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5229                  "not available on non-amdhsa OSes")).str());
5230   }
5231 
5232   std::string HSAMetadataString;
5233   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5234                           HSAMetadataString))
5235     return true;
5236 
5237   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5238     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5239       return Error(getLoc(), "invalid HSA metadata");
5240   } else {
5241     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5242       return Error(getLoc(), "invalid HSA metadata");
5243   }
5244 
5245   return false;
5246 }
5247 
5248 /// Common code to parse out a block of text (typically YAML) between start and
5249 /// end directives.
5250 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5251                                           const char *AssemblerDirectiveEnd,
5252                                           std::string &CollectString) {
5253 
5254   raw_string_ostream CollectStream(CollectString);
5255 
5256   getLexer().setSkipSpace(false);
5257 
5258   bool FoundEnd = false;
5259   while (!isToken(AsmToken::Eof)) {
5260     while (isToken(AsmToken::Space)) {
5261       CollectStream << getTokenStr();
5262       Lex();
5263     }
5264 
5265     if (trySkipId(AssemblerDirectiveEnd)) {
5266       FoundEnd = true;
5267       break;
5268     }
5269 
5270     CollectStream << Parser.parseStringToEndOfStatement()
5271                   << getContext().getAsmInfo()->getSeparatorString();
5272 
5273     Parser.eatToEndOfStatement();
5274   }
5275 
5276   getLexer().setSkipSpace(true);
5277 
5278   if (isToken(AsmToken::Eof) && !FoundEnd) {
5279     return TokError(Twine("expected directive ") +
5280                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5281   }
5282 
5283   CollectStream.flush();
5284   return false;
5285 }
5286 
5287 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5288 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5289   std::string String;
5290   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5291                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5292     return true;
5293 
5294   auto PALMetadata = getTargetStreamer().getPALMetadata();
5295   if (!PALMetadata->setFromString(String))
5296     return Error(getLoc(), "invalid PAL metadata");
5297   return false;
5298 }
5299 
5300 /// Parse the assembler directive for old linear-format PAL metadata.
5301 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5302   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5303     return Error(getLoc(),
5304                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5305                  "not available on non-amdpal OSes")).str());
5306   }
5307 
5308   auto PALMetadata = getTargetStreamer().getPALMetadata();
5309   PALMetadata->setLegacy();
5310   for (;;) {
5311     uint32_t Key, Value;
5312     if (ParseAsAbsoluteExpression(Key)) {
5313       return TokError(Twine("invalid value in ") +
5314                       Twine(PALMD::AssemblerDirective));
5315     }
5316     if (!trySkipToken(AsmToken::Comma)) {
5317       return TokError(Twine("expected an even number of values in ") +
5318                       Twine(PALMD::AssemblerDirective));
5319     }
5320     if (ParseAsAbsoluteExpression(Value)) {
5321       return TokError(Twine("invalid value in ") +
5322                       Twine(PALMD::AssemblerDirective));
5323     }
5324     PALMetadata->setRegister(Key, Value);
5325     if (!trySkipToken(AsmToken::Comma))
5326       break;
5327   }
5328   return false;
5329 }
5330 
5331 /// ParseDirectiveAMDGPULDS
5332 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5333 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5334   if (getParser().checkForValidSection())
5335     return true;
5336 
5337   StringRef Name;
5338   SMLoc NameLoc = getLoc();
5339   if (getParser().parseIdentifier(Name))
5340     return TokError("expected identifier in directive");
5341 
5342   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5343   if (parseToken(AsmToken::Comma, "expected ','"))
5344     return true;
5345 
5346   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5347 
5348   int64_t Size;
5349   SMLoc SizeLoc = getLoc();
5350   if (getParser().parseAbsoluteExpression(Size))
5351     return true;
5352   if (Size < 0)
5353     return Error(SizeLoc, "size must be non-negative");
5354   if (Size > LocalMemorySize)
5355     return Error(SizeLoc, "size is too large");
5356 
5357   int64_t Alignment = 4;
5358   if (trySkipToken(AsmToken::Comma)) {
5359     SMLoc AlignLoc = getLoc();
5360     if (getParser().parseAbsoluteExpression(Alignment))
5361       return true;
5362     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5363       return Error(AlignLoc, "alignment must be a power of two");
5364 
5365     // Alignment larger than the size of LDS is possible in theory, as long
5366     // as the linker manages to place to symbol at address 0, but we do want
5367     // to make sure the alignment fits nicely into a 32-bit integer.
5368     if (Alignment >= 1u << 31)
5369       return Error(AlignLoc, "alignment is too large");
5370   }
5371 
5372   if (parseToken(AsmToken::EndOfStatement,
5373                  "unexpected token in '.amdgpu_lds' directive"))
5374     return true;
5375 
5376   Symbol->redefineIfPossible();
5377   if (!Symbol->isUndefined())
5378     return Error(NameLoc, "invalid symbol redefinition");
5379 
5380   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5381   return false;
5382 }
5383 
5384 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5385   StringRef IDVal = DirectiveID.getString();
5386 
5387   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5388     if (IDVal == ".amdhsa_kernel")
5389      return ParseDirectiveAMDHSAKernel();
5390 
5391     // TODO: Restructure/combine with PAL metadata directive.
5392     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5393       return ParseDirectiveHSAMetadata();
5394   } else {
5395     if (IDVal == ".hsa_code_object_version")
5396       return ParseDirectiveHSACodeObjectVersion();
5397 
5398     if (IDVal == ".hsa_code_object_isa")
5399       return ParseDirectiveHSACodeObjectISA();
5400 
5401     if (IDVal == ".amd_kernel_code_t")
5402       return ParseDirectiveAMDKernelCodeT();
5403 
5404     if (IDVal == ".amdgpu_hsa_kernel")
5405       return ParseDirectiveAMDGPUHsaKernel();
5406 
5407     if (IDVal == ".amd_amdgpu_isa")
5408       return ParseDirectiveISAVersion();
5409 
5410     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5411       return ParseDirectiveHSAMetadata();
5412   }
5413 
5414   if (IDVal == ".amdgcn_target")
5415     return ParseDirectiveAMDGCNTarget();
5416 
5417   if (IDVal == ".amdgpu_lds")
5418     return ParseDirectiveAMDGPULDS();
5419 
5420   if (IDVal == PALMD::AssemblerDirectiveBegin)
5421     return ParseDirectivePALMetadataBegin();
5422 
5423   if (IDVal == PALMD::AssemblerDirective)
5424     return ParseDirectivePALMetadata();
5425 
5426   return true;
5427 }
5428 
5429 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5430                                            unsigned RegNo) {
5431 
5432   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5433     return isGFX9Plus();
5434 
5435   // GFX10 has 2 more SGPRs 104 and 105.
5436   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5437     return hasSGPR104_SGPR105();
5438 
5439   switch (RegNo) {
5440   case AMDGPU::SRC_SHARED_BASE:
5441   case AMDGPU::SRC_SHARED_LIMIT:
5442   case AMDGPU::SRC_PRIVATE_BASE:
5443   case AMDGPU::SRC_PRIVATE_LIMIT:
5444   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5445     return isGFX9Plus();
5446   case AMDGPU::TBA:
5447   case AMDGPU::TBA_LO:
5448   case AMDGPU::TBA_HI:
5449   case AMDGPU::TMA:
5450   case AMDGPU::TMA_LO:
5451   case AMDGPU::TMA_HI:
5452     return !isGFX9Plus();
5453   case AMDGPU::XNACK_MASK:
5454   case AMDGPU::XNACK_MASK_LO:
5455   case AMDGPU::XNACK_MASK_HI:
5456     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5457   case AMDGPU::SGPR_NULL:
5458     return isGFX10Plus();
5459   default:
5460     break;
5461   }
5462 
5463   if (isCI())
5464     return true;
5465 
5466   if (isSI() || isGFX10Plus()) {
5467     // No flat_scr on SI.
5468     // On GFX10 flat scratch is not a valid register operand and can only be
5469     // accessed with s_setreg/s_getreg.
5470     switch (RegNo) {
5471     case AMDGPU::FLAT_SCR:
5472     case AMDGPU::FLAT_SCR_LO:
5473     case AMDGPU::FLAT_SCR_HI:
5474       return false;
5475     default:
5476       return true;
5477     }
5478   }
5479 
5480   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5481   // SI/CI have.
5482   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5483     return hasSGPR102_SGPR103();
5484 
5485   return true;
5486 }
5487 
5488 OperandMatchResultTy
5489 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5490                               OperandMode Mode) {
5491   // Try to parse with a custom parser
5492   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5493 
5494   // If we successfully parsed the operand or if there as an error parsing,
5495   // we are done.
5496   //
5497   // If we are parsing after we reach EndOfStatement then this means we
5498   // are appending default values to the Operands list.  This is only done
5499   // by custom parser, so we shouldn't continue on to the generic parsing.
5500   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5501       isToken(AsmToken::EndOfStatement))
5502     return ResTy;
5503 
5504   SMLoc RBraceLoc;
5505   SMLoc LBraceLoc = getLoc();
5506   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5507     unsigned Prefix = Operands.size();
5508 
5509     for (;;) {
5510       auto Loc = getLoc();
5511       ResTy = parseReg(Operands);
5512       if (ResTy == MatchOperand_NoMatch)
5513         Error(Loc, "expected a register");
5514       if (ResTy != MatchOperand_Success)
5515         return MatchOperand_ParseFail;
5516 
5517       RBraceLoc = getLoc();
5518       if (trySkipToken(AsmToken::RBrac))
5519         break;
5520 
5521       if (!skipToken(AsmToken::Comma,
5522                      "expected a comma or a closing square bracket")) {
5523         return MatchOperand_ParseFail;
5524       }
5525     }
5526 
5527     if (Operands.size() - Prefix > 1) {
5528       Operands.insert(Operands.begin() + Prefix,
5529                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5530       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5531     }
5532 
5533     return MatchOperand_Success;
5534   }
5535 
5536   return parseRegOrImm(Operands);
5537 }
5538 
5539 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5540   // Clear any forced encodings from the previous instruction.
5541   setForcedEncodingSize(0);
5542   setForcedDPP(false);
5543   setForcedSDWA(false);
5544 
5545   if (Name.endswith("_e64")) {
5546     setForcedEncodingSize(64);
5547     return Name.substr(0, Name.size() - 4);
5548   } else if (Name.endswith("_e32")) {
5549     setForcedEncodingSize(32);
5550     return Name.substr(0, Name.size() - 4);
5551   } else if (Name.endswith("_dpp")) {
5552     setForcedDPP(true);
5553     return Name.substr(0, Name.size() - 4);
5554   } else if (Name.endswith("_sdwa")) {
5555     setForcedSDWA(true);
5556     return Name.substr(0, Name.size() - 5);
5557   }
5558   return Name;
5559 }
5560 
5561 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5562                                        StringRef Name,
5563                                        SMLoc NameLoc, OperandVector &Operands) {
5564   // Add the instruction mnemonic
5565   Name = parseMnemonicSuffix(Name);
5566   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5567 
5568   bool IsMIMG = Name.startswith("image_");
5569 
5570   while (!trySkipToken(AsmToken::EndOfStatement)) {
5571     OperandMode Mode = OperandMode_Default;
5572     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5573       Mode = OperandMode_NSA;
5574     CPolSeen = 0;
5575     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5576 
5577     if (Res != MatchOperand_Success) {
5578       checkUnsupportedInstruction(Name, NameLoc);
5579       if (!Parser.hasPendingError()) {
5580         // FIXME: use real operand location rather than the current location.
5581         StringRef Msg =
5582           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5583                                             "not a valid operand.";
5584         Error(getLoc(), Msg);
5585       }
5586       while (!trySkipToken(AsmToken::EndOfStatement)) {
5587         lex();
5588       }
5589       return true;
5590     }
5591 
5592     // Eat the comma or space if there is one.
5593     trySkipToken(AsmToken::Comma);
5594   }
5595 
5596   return false;
5597 }
5598 
5599 //===----------------------------------------------------------------------===//
5600 // Utility functions
5601 //===----------------------------------------------------------------------===//
5602 
5603 OperandMatchResultTy
5604 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5605 
5606   if (!trySkipId(Prefix, AsmToken::Colon))
5607     return MatchOperand_NoMatch;
5608 
5609   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5610 }
5611 
5612 OperandMatchResultTy
5613 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5614                                     AMDGPUOperand::ImmTy ImmTy,
5615                                     bool (*ConvertResult)(int64_t&)) {
5616   SMLoc S = getLoc();
5617   int64_t Value = 0;
5618 
5619   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5620   if (Res != MatchOperand_Success)
5621     return Res;
5622 
5623   if (ConvertResult && !ConvertResult(Value)) {
5624     Error(S, "invalid " + StringRef(Prefix) + " value.");
5625   }
5626 
5627   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5628   return MatchOperand_Success;
5629 }
5630 
5631 OperandMatchResultTy
5632 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5633                                              OperandVector &Operands,
5634                                              AMDGPUOperand::ImmTy ImmTy,
5635                                              bool (*ConvertResult)(int64_t&)) {
5636   SMLoc S = getLoc();
5637   if (!trySkipId(Prefix, AsmToken::Colon))
5638     return MatchOperand_NoMatch;
5639 
5640   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5641     return MatchOperand_ParseFail;
5642 
5643   unsigned Val = 0;
5644   const unsigned MaxSize = 4;
5645 
5646   // FIXME: How to verify the number of elements matches the number of src
5647   // operands?
5648   for (int I = 0; ; ++I) {
5649     int64_t Op;
5650     SMLoc Loc = getLoc();
5651     if (!parseExpr(Op))
5652       return MatchOperand_ParseFail;
5653 
5654     if (Op != 0 && Op != 1) {
5655       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5656       return MatchOperand_ParseFail;
5657     }
5658 
5659     Val |= (Op << I);
5660 
5661     if (trySkipToken(AsmToken::RBrac))
5662       break;
5663 
5664     if (I + 1 == MaxSize) {
5665       Error(getLoc(), "expected a closing square bracket");
5666       return MatchOperand_ParseFail;
5667     }
5668 
5669     if (!skipToken(AsmToken::Comma, "expected a comma"))
5670       return MatchOperand_ParseFail;
5671   }
5672 
5673   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5674   return MatchOperand_Success;
5675 }
5676 
5677 OperandMatchResultTy
5678 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5679                                AMDGPUOperand::ImmTy ImmTy) {
5680   int64_t Bit;
5681   SMLoc S = getLoc();
5682 
5683   if (trySkipId(Name)) {
5684     Bit = 1;
5685   } else if (trySkipId("no", Name)) {
5686     Bit = 0;
5687   } else {
5688     return MatchOperand_NoMatch;
5689   }
5690 
5691   if (Name == "r128" && !hasMIMG_R128()) {
5692     Error(S, "r128 modifier is not supported on this GPU");
5693     return MatchOperand_ParseFail;
5694   }
5695   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5696     Error(S, "a16 modifier is not supported on this GPU");
5697     return MatchOperand_ParseFail;
5698   }
5699 
5700   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5701     ImmTy = AMDGPUOperand::ImmTyR128A16;
5702 
5703   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5704   return MatchOperand_Success;
5705 }
5706 
5707 OperandMatchResultTy
5708 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5709   unsigned CPolOn = 0;
5710   unsigned CPolOff = 0;
5711   SMLoc S = getLoc();
5712 
5713   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5714   if (isGFX940() && !Mnemo.startswith("s_")) {
5715     if (trySkipId("sc0"))
5716       CPolOn = AMDGPU::CPol::SC0;
5717     else if (trySkipId("nosc0"))
5718       CPolOff = AMDGPU::CPol::SC0;
5719     else if (trySkipId("nt"))
5720       CPolOn = AMDGPU::CPol::NT;
5721     else if (trySkipId("nont"))
5722       CPolOff = AMDGPU::CPol::NT;
5723     else if (trySkipId("sc1"))
5724       CPolOn = AMDGPU::CPol::SC1;
5725     else if (trySkipId("nosc1"))
5726       CPolOff = AMDGPU::CPol::SC1;
5727     else
5728       return MatchOperand_NoMatch;
5729   }
5730   else if (trySkipId("glc"))
5731     CPolOn = AMDGPU::CPol::GLC;
5732   else if (trySkipId("noglc"))
5733     CPolOff = AMDGPU::CPol::GLC;
5734   else if (trySkipId("slc"))
5735     CPolOn = AMDGPU::CPol::SLC;
5736   else if (trySkipId("noslc"))
5737     CPolOff = AMDGPU::CPol::SLC;
5738   else if (trySkipId("dlc"))
5739     CPolOn = AMDGPU::CPol::DLC;
5740   else if (trySkipId("nodlc"))
5741     CPolOff = AMDGPU::CPol::DLC;
5742   else if (trySkipId("scc"))
5743     CPolOn = AMDGPU::CPol::SCC;
5744   else if (trySkipId("noscc"))
5745     CPolOff = AMDGPU::CPol::SCC;
5746   else
5747     return MatchOperand_NoMatch;
5748 
5749   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5750     Error(S, "dlc modifier is not supported on this GPU");
5751     return MatchOperand_ParseFail;
5752   }
5753 
5754   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5755     Error(S, "scc modifier is not supported on this GPU");
5756     return MatchOperand_ParseFail;
5757   }
5758 
5759   if (CPolSeen & (CPolOn | CPolOff)) {
5760     Error(S, "duplicate cache policy modifier");
5761     return MatchOperand_ParseFail;
5762   }
5763 
5764   CPolSeen |= (CPolOn | CPolOff);
5765 
5766   for (unsigned I = 1; I != Operands.size(); ++I) {
5767     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5768     if (Op.isCPol()) {
5769       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5770       return MatchOperand_Success;
5771     }
5772   }
5773 
5774   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5775                                               AMDGPUOperand::ImmTyCPol));
5776 
5777   return MatchOperand_Success;
5778 }
5779 
5780 static void addOptionalImmOperand(
5781   MCInst& Inst, const OperandVector& Operands,
5782   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5783   AMDGPUOperand::ImmTy ImmT,
5784   int64_t Default = 0) {
5785   auto i = OptionalIdx.find(ImmT);
5786   if (i != OptionalIdx.end()) {
5787     unsigned Idx = i->second;
5788     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5789   } else {
5790     Inst.addOperand(MCOperand::createImm(Default));
5791   }
5792 }
5793 
5794 OperandMatchResultTy
5795 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5796                                        StringRef &Value,
5797                                        SMLoc &StringLoc) {
5798   if (!trySkipId(Prefix, AsmToken::Colon))
5799     return MatchOperand_NoMatch;
5800 
5801   StringLoc = getLoc();
5802   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5803                                                   : MatchOperand_ParseFail;
5804 }
5805 
5806 //===----------------------------------------------------------------------===//
5807 // MTBUF format
5808 //===----------------------------------------------------------------------===//
5809 
5810 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5811                                   int64_t MaxVal,
5812                                   int64_t &Fmt) {
5813   int64_t Val;
5814   SMLoc Loc = getLoc();
5815 
5816   auto Res = parseIntWithPrefix(Pref, Val);
5817   if (Res == MatchOperand_ParseFail)
5818     return false;
5819   if (Res == MatchOperand_NoMatch)
5820     return true;
5821 
5822   if (Val < 0 || Val > MaxVal) {
5823     Error(Loc, Twine("out of range ", StringRef(Pref)));
5824     return false;
5825   }
5826 
5827   Fmt = Val;
5828   return true;
5829 }
5830 
5831 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5832 // values to live in a joint format operand in the MCInst encoding.
5833 OperandMatchResultTy
5834 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5835   using namespace llvm::AMDGPU::MTBUFFormat;
5836 
5837   int64_t Dfmt = DFMT_UNDEF;
5838   int64_t Nfmt = NFMT_UNDEF;
5839 
5840   // dfmt and nfmt can appear in either order, and each is optional.
5841   for (int I = 0; I < 2; ++I) {
5842     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5843       return MatchOperand_ParseFail;
5844 
5845     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5846       return MatchOperand_ParseFail;
5847     }
5848     // Skip optional comma between dfmt/nfmt
5849     // but guard against 2 commas following each other.
5850     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5851         !peekToken().is(AsmToken::Comma)) {
5852       trySkipToken(AsmToken::Comma);
5853     }
5854   }
5855 
5856   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5857     return MatchOperand_NoMatch;
5858 
5859   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5860   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5861 
5862   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5863   return MatchOperand_Success;
5864 }
5865 
5866 OperandMatchResultTy
5867 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5868   using namespace llvm::AMDGPU::MTBUFFormat;
5869 
5870   int64_t Fmt = UFMT_UNDEF;
5871 
5872   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5873     return MatchOperand_ParseFail;
5874 
5875   if (Fmt == UFMT_UNDEF)
5876     return MatchOperand_NoMatch;
5877 
5878   Format = Fmt;
5879   return MatchOperand_Success;
5880 }
5881 
5882 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5883                                     int64_t &Nfmt,
5884                                     StringRef FormatStr,
5885                                     SMLoc Loc) {
5886   using namespace llvm::AMDGPU::MTBUFFormat;
5887   int64_t Format;
5888 
5889   Format = getDfmt(FormatStr);
5890   if (Format != DFMT_UNDEF) {
5891     Dfmt = Format;
5892     return true;
5893   }
5894 
5895   Format = getNfmt(FormatStr, getSTI());
5896   if (Format != NFMT_UNDEF) {
5897     Nfmt = Format;
5898     return true;
5899   }
5900 
5901   Error(Loc, "unsupported format");
5902   return false;
5903 }
5904 
5905 OperandMatchResultTy
5906 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5907                                           SMLoc FormatLoc,
5908                                           int64_t &Format) {
5909   using namespace llvm::AMDGPU::MTBUFFormat;
5910 
5911   int64_t Dfmt = DFMT_UNDEF;
5912   int64_t Nfmt = NFMT_UNDEF;
5913   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5914     return MatchOperand_ParseFail;
5915 
5916   if (trySkipToken(AsmToken::Comma)) {
5917     StringRef Str;
5918     SMLoc Loc = getLoc();
5919     if (!parseId(Str, "expected a format string") ||
5920         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5921       return MatchOperand_ParseFail;
5922     }
5923     if (Dfmt == DFMT_UNDEF) {
5924       Error(Loc, "duplicate numeric format");
5925       return MatchOperand_ParseFail;
5926     } else if (Nfmt == NFMT_UNDEF) {
5927       Error(Loc, "duplicate data format");
5928       return MatchOperand_ParseFail;
5929     }
5930   }
5931 
5932   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5933   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5934 
5935   if (isGFX10Plus()) {
5936     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5937     if (Ufmt == UFMT_UNDEF) {
5938       Error(FormatLoc, "unsupported format");
5939       return MatchOperand_ParseFail;
5940     }
5941     Format = Ufmt;
5942   } else {
5943     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5944   }
5945 
5946   return MatchOperand_Success;
5947 }
5948 
5949 OperandMatchResultTy
5950 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5951                                             SMLoc Loc,
5952                                             int64_t &Format) {
5953   using namespace llvm::AMDGPU::MTBUFFormat;
5954 
5955   auto Id = getUnifiedFormat(FormatStr);
5956   if (Id == UFMT_UNDEF)
5957     return MatchOperand_NoMatch;
5958 
5959   if (!isGFX10Plus()) {
5960     Error(Loc, "unified format is not supported on this GPU");
5961     return MatchOperand_ParseFail;
5962   }
5963 
5964   Format = Id;
5965   return MatchOperand_Success;
5966 }
5967 
5968 OperandMatchResultTy
5969 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5970   using namespace llvm::AMDGPU::MTBUFFormat;
5971   SMLoc Loc = getLoc();
5972 
5973   if (!parseExpr(Format))
5974     return MatchOperand_ParseFail;
5975   if (!isValidFormatEncoding(Format, getSTI())) {
5976     Error(Loc, "out of range format");
5977     return MatchOperand_ParseFail;
5978   }
5979 
5980   return MatchOperand_Success;
5981 }
5982 
5983 OperandMatchResultTy
5984 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5985   using namespace llvm::AMDGPU::MTBUFFormat;
5986 
5987   if (!trySkipId("format", AsmToken::Colon))
5988     return MatchOperand_NoMatch;
5989 
5990   if (trySkipToken(AsmToken::LBrac)) {
5991     StringRef FormatStr;
5992     SMLoc Loc = getLoc();
5993     if (!parseId(FormatStr, "expected a format string"))
5994       return MatchOperand_ParseFail;
5995 
5996     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5997     if (Res == MatchOperand_NoMatch)
5998       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5999     if (Res != MatchOperand_Success)
6000       return Res;
6001 
6002     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6003       return MatchOperand_ParseFail;
6004 
6005     return MatchOperand_Success;
6006   }
6007 
6008   return parseNumericFormat(Format);
6009 }
6010 
6011 OperandMatchResultTy
6012 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6013   using namespace llvm::AMDGPU::MTBUFFormat;
6014 
6015   int64_t Format = getDefaultFormatEncoding(getSTI());
6016   OperandMatchResultTy Res;
6017   SMLoc Loc = getLoc();
6018 
6019   // Parse legacy format syntax.
6020   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6021   if (Res == MatchOperand_ParseFail)
6022     return Res;
6023 
6024   bool FormatFound = (Res == MatchOperand_Success);
6025 
6026   Operands.push_back(
6027     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6028 
6029   if (FormatFound)
6030     trySkipToken(AsmToken::Comma);
6031 
6032   if (isToken(AsmToken::EndOfStatement)) {
6033     // We are expecting an soffset operand,
6034     // but let matcher handle the error.
6035     return MatchOperand_Success;
6036   }
6037 
6038   // Parse soffset.
6039   Res = parseRegOrImm(Operands);
6040   if (Res != MatchOperand_Success)
6041     return Res;
6042 
6043   trySkipToken(AsmToken::Comma);
6044 
6045   if (!FormatFound) {
6046     Res = parseSymbolicOrNumericFormat(Format);
6047     if (Res == MatchOperand_ParseFail)
6048       return Res;
6049     if (Res == MatchOperand_Success) {
6050       auto Size = Operands.size();
6051       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6052       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6053       Op.setImm(Format);
6054     }
6055     return MatchOperand_Success;
6056   }
6057 
6058   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6059     Error(getLoc(), "duplicate format");
6060     return MatchOperand_ParseFail;
6061   }
6062   return MatchOperand_Success;
6063 }
6064 
6065 //===----------------------------------------------------------------------===//
6066 // ds
6067 //===----------------------------------------------------------------------===//
6068 
6069 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6070                                     const OperandVector &Operands) {
6071   OptionalImmIndexMap OptionalIdx;
6072 
6073   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6074     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6075 
6076     // Add the register arguments
6077     if (Op.isReg()) {
6078       Op.addRegOperands(Inst, 1);
6079       continue;
6080     }
6081 
6082     // Handle optional arguments
6083     OptionalIdx[Op.getImmTy()] = i;
6084   }
6085 
6086   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6087   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6088   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6089 
6090   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6091 }
6092 
6093 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6094                                 bool IsGdsHardcoded) {
6095   OptionalImmIndexMap OptionalIdx;
6096 
6097   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6098     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6099 
6100     // Add the register arguments
6101     if (Op.isReg()) {
6102       Op.addRegOperands(Inst, 1);
6103       continue;
6104     }
6105 
6106     if (Op.isToken() && Op.getToken() == "gds") {
6107       IsGdsHardcoded = true;
6108       continue;
6109     }
6110 
6111     // Handle optional arguments
6112     OptionalIdx[Op.getImmTy()] = i;
6113   }
6114 
6115   AMDGPUOperand::ImmTy OffsetType =
6116     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6117      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6118      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6119                                                       AMDGPUOperand::ImmTyOffset;
6120 
6121   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6122 
6123   if (!IsGdsHardcoded) {
6124     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6125   }
6126   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6127 }
6128 
6129 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6130   OptionalImmIndexMap OptionalIdx;
6131 
6132   unsigned OperandIdx[4];
6133   unsigned EnMask = 0;
6134   int SrcIdx = 0;
6135 
6136   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6137     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6138 
6139     // Add the register arguments
6140     if (Op.isReg()) {
6141       assert(SrcIdx < 4);
6142       OperandIdx[SrcIdx] = Inst.size();
6143       Op.addRegOperands(Inst, 1);
6144       ++SrcIdx;
6145       continue;
6146     }
6147 
6148     if (Op.isOff()) {
6149       assert(SrcIdx < 4);
6150       OperandIdx[SrcIdx] = Inst.size();
6151       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6152       ++SrcIdx;
6153       continue;
6154     }
6155 
6156     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6157       Op.addImmOperands(Inst, 1);
6158       continue;
6159     }
6160 
6161     if (Op.isToken() && Op.getToken() == "done")
6162       continue;
6163 
6164     // Handle optional arguments
6165     OptionalIdx[Op.getImmTy()] = i;
6166   }
6167 
6168   assert(SrcIdx == 4);
6169 
6170   bool Compr = false;
6171   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6172     Compr = true;
6173     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6174     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6175     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6176   }
6177 
6178   for (auto i = 0; i < SrcIdx; ++i) {
6179     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6180       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6181     }
6182   }
6183 
6184   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6185   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6186 
6187   Inst.addOperand(MCOperand::createImm(EnMask));
6188 }
6189 
6190 //===----------------------------------------------------------------------===//
6191 // s_waitcnt
6192 //===----------------------------------------------------------------------===//
6193 
6194 static bool
6195 encodeCnt(
6196   const AMDGPU::IsaVersion ISA,
6197   int64_t &IntVal,
6198   int64_t CntVal,
6199   bool Saturate,
6200   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6201   unsigned (*decode)(const IsaVersion &Version, unsigned))
6202 {
6203   bool Failed = false;
6204 
6205   IntVal = encode(ISA, IntVal, CntVal);
6206   if (CntVal != decode(ISA, IntVal)) {
6207     if (Saturate) {
6208       IntVal = encode(ISA, IntVal, -1);
6209     } else {
6210       Failed = true;
6211     }
6212   }
6213   return Failed;
6214 }
6215 
6216 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6217 
6218   SMLoc CntLoc = getLoc();
6219   StringRef CntName = getTokenStr();
6220 
6221   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6222       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6223     return false;
6224 
6225   int64_t CntVal;
6226   SMLoc ValLoc = getLoc();
6227   if (!parseExpr(CntVal))
6228     return false;
6229 
6230   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6231 
6232   bool Failed = true;
6233   bool Sat = CntName.endswith("_sat");
6234 
6235   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6236     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6237   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6238     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6239   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6240     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6241   } else {
6242     Error(CntLoc, "invalid counter name " + CntName);
6243     return false;
6244   }
6245 
6246   if (Failed) {
6247     Error(ValLoc, "too large value for " + CntName);
6248     return false;
6249   }
6250 
6251   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6252     return false;
6253 
6254   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6255     if (isToken(AsmToken::EndOfStatement)) {
6256       Error(getLoc(), "expected a counter name");
6257       return false;
6258     }
6259   }
6260 
6261   return true;
6262 }
6263 
6264 OperandMatchResultTy
6265 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6266   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6267   int64_t Waitcnt = getWaitcntBitMask(ISA);
6268   SMLoc S = getLoc();
6269 
6270   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6271     while (!isToken(AsmToken::EndOfStatement)) {
6272       if (!parseCnt(Waitcnt))
6273         return MatchOperand_ParseFail;
6274     }
6275   } else {
6276     if (!parseExpr(Waitcnt))
6277       return MatchOperand_ParseFail;
6278   }
6279 
6280   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6281   return MatchOperand_Success;
6282 }
6283 
6284 bool
6285 AMDGPUOperand::isSWaitCnt() const {
6286   return isImm();
6287 }
6288 
6289 //===----------------------------------------------------------------------===//
6290 // hwreg
6291 //===----------------------------------------------------------------------===//
6292 
6293 bool
6294 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6295                                 OperandInfoTy &Offset,
6296                                 OperandInfoTy &Width) {
6297   using namespace llvm::AMDGPU::Hwreg;
6298 
6299   // The register may be specified by name or using a numeric code
6300   HwReg.Loc = getLoc();
6301   if (isToken(AsmToken::Identifier) &&
6302       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6303     HwReg.IsSymbolic = true;
6304     lex(); // skip register name
6305   } else if (!parseExpr(HwReg.Id, "a register name")) {
6306     return false;
6307   }
6308 
6309   if (trySkipToken(AsmToken::RParen))
6310     return true;
6311 
6312   // parse optional params
6313   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6314     return false;
6315 
6316   Offset.Loc = getLoc();
6317   if (!parseExpr(Offset.Id))
6318     return false;
6319 
6320   if (!skipToken(AsmToken::Comma, "expected a comma"))
6321     return false;
6322 
6323   Width.Loc = getLoc();
6324   return parseExpr(Width.Id) &&
6325          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6326 }
6327 
6328 bool
6329 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6330                                const OperandInfoTy &Offset,
6331                                const OperandInfoTy &Width) {
6332 
6333   using namespace llvm::AMDGPU::Hwreg;
6334 
6335   if (HwReg.IsSymbolic) {
6336     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6337       Error(HwReg.Loc,
6338             "specified hardware register is not supported on this GPU");
6339       return false;
6340     }
6341   } else {
6342     if (!isValidHwreg(HwReg.Id)) {
6343       Error(HwReg.Loc,
6344             "invalid code of hardware register: only 6-bit values are legal");
6345       return false;
6346     }
6347   }
6348   if (!isValidHwregOffset(Offset.Id)) {
6349     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6350     return false;
6351   }
6352   if (!isValidHwregWidth(Width.Id)) {
6353     Error(Width.Loc,
6354           "invalid bitfield width: only values from 1 to 32 are legal");
6355     return false;
6356   }
6357   return true;
6358 }
6359 
6360 OperandMatchResultTy
6361 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6362   using namespace llvm::AMDGPU::Hwreg;
6363 
6364   int64_t ImmVal = 0;
6365   SMLoc Loc = getLoc();
6366 
6367   if (trySkipId("hwreg", AsmToken::LParen)) {
6368     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6369     OperandInfoTy Offset(OFFSET_DEFAULT_);
6370     OperandInfoTy Width(WIDTH_DEFAULT_);
6371     if (parseHwregBody(HwReg, Offset, Width) &&
6372         validateHwreg(HwReg, Offset, Width)) {
6373       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6374     } else {
6375       return MatchOperand_ParseFail;
6376     }
6377   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6378     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6379       Error(Loc, "invalid immediate: only 16-bit values are legal");
6380       return MatchOperand_ParseFail;
6381     }
6382   } else {
6383     return MatchOperand_ParseFail;
6384   }
6385 
6386   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6387   return MatchOperand_Success;
6388 }
6389 
6390 bool AMDGPUOperand::isHwreg() const {
6391   return isImmTy(ImmTyHwreg);
6392 }
6393 
6394 //===----------------------------------------------------------------------===//
6395 // sendmsg
6396 //===----------------------------------------------------------------------===//
6397 
6398 bool
6399 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6400                                   OperandInfoTy &Op,
6401                                   OperandInfoTy &Stream) {
6402   using namespace llvm::AMDGPU::SendMsg;
6403 
6404   Msg.Loc = getLoc();
6405   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6406     Msg.IsSymbolic = true;
6407     lex(); // skip message name
6408   } else if (!parseExpr(Msg.Id, "a message name")) {
6409     return false;
6410   }
6411 
6412   if (trySkipToken(AsmToken::Comma)) {
6413     Op.IsDefined = true;
6414     Op.Loc = getLoc();
6415     if (isToken(AsmToken::Identifier) &&
6416         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6417       lex(); // skip operation name
6418     } else if (!parseExpr(Op.Id, "an operation name")) {
6419       return false;
6420     }
6421 
6422     if (trySkipToken(AsmToken::Comma)) {
6423       Stream.IsDefined = true;
6424       Stream.Loc = getLoc();
6425       if (!parseExpr(Stream.Id))
6426         return false;
6427     }
6428   }
6429 
6430   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6431 }
6432 
6433 bool
6434 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6435                                  const OperandInfoTy &Op,
6436                                  const OperandInfoTy &Stream) {
6437   using namespace llvm::AMDGPU::SendMsg;
6438 
6439   // Validation strictness depends on whether message is specified
6440   // in a symbolic or in a numeric form. In the latter case
6441   // only encoding possibility is checked.
6442   bool Strict = Msg.IsSymbolic;
6443 
6444   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6445     Error(Msg.Loc, "invalid message id");
6446     return false;
6447   }
6448   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6449     if (Op.IsDefined) {
6450       Error(Op.Loc, "message does not support operations");
6451     } else {
6452       Error(Msg.Loc, "missing message operation");
6453     }
6454     return false;
6455   }
6456   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6457     Error(Op.Loc, "invalid operation id");
6458     return false;
6459   }
6460   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6461     Error(Stream.Loc, "message operation does not support streams");
6462     return false;
6463   }
6464   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6465     Error(Stream.Loc, "invalid message stream id");
6466     return false;
6467   }
6468   return true;
6469 }
6470 
6471 OperandMatchResultTy
6472 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6473   using namespace llvm::AMDGPU::SendMsg;
6474 
6475   int64_t ImmVal = 0;
6476   SMLoc Loc = getLoc();
6477 
6478   if (trySkipId("sendmsg", AsmToken::LParen)) {
6479     OperandInfoTy Msg(ID_UNKNOWN_);
6480     OperandInfoTy Op(OP_NONE_);
6481     OperandInfoTy Stream(STREAM_ID_NONE_);
6482     if (parseSendMsgBody(Msg, Op, Stream) &&
6483         validateSendMsg(Msg, Op, Stream)) {
6484       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6485     } else {
6486       return MatchOperand_ParseFail;
6487     }
6488   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6489     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6490       Error(Loc, "invalid immediate: only 16-bit values are legal");
6491       return MatchOperand_ParseFail;
6492     }
6493   } else {
6494     return MatchOperand_ParseFail;
6495   }
6496 
6497   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6498   return MatchOperand_Success;
6499 }
6500 
6501 bool AMDGPUOperand::isSendMsg() const {
6502   return isImmTy(ImmTySendMsg);
6503 }
6504 
6505 //===----------------------------------------------------------------------===//
6506 // v_interp
6507 //===----------------------------------------------------------------------===//
6508 
6509 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6510   StringRef Str;
6511   SMLoc S = getLoc();
6512 
6513   if (!parseId(Str))
6514     return MatchOperand_NoMatch;
6515 
6516   int Slot = StringSwitch<int>(Str)
6517     .Case("p10", 0)
6518     .Case("p20", 1)
6519     .Case("p0", 2)
6520     .Default(-1);
6521 
6522   if (Slot == -1) {
6523     Error(S, "invalid interpolation slot");
6524     return MatchOperand_ParseFail;
6525   }
6526 
6527   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6528                                               AMDGPUOperand::ImmTyInterpSlot));
6529   return MatchOperand_Success;
6530 }
6531 
6532 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6533   StringRef Str;
6534   SMLoc S = getLoc();
6535 
6536   if (!parseId(Str))
6537     return MatchOperand_NoMatch;
6538 
6539   if (!Str.startswith("attr")) {
6540     Error(S, "invalid interpolation attribute");
6541     return MatchOperand_ParseFail;
6542   }
6543 
6544   StringRef Chan = Str.take_back(2);
6545   int AttrChan = StringSwitch<int>(Chan)
6546     .Case(".x", 0)
6547     .Case(".y", 1)
6548     .Case(".z", 2)
6549     .Case(".w", 3)
6550     .Default(-1);
6551   if (AttrChan == -1) {
6552     Error(S, "invalid or missing interpolation attribute channel");
6553     return MatchOperand_ParseFail;
6554   }
6555 
6556   Str = Str.drop_back(2).drop_front(4);
6557 
6558   uint8_t Attr;
6559   if (Str.getAsInteger(10, Attr)) {
6560     Error(S, "invalid or missing interpolation attribute number");
6561     return MatchOperand_ParseFail;
6562   }
6563 
6564   if (Attr > 63) {
6565     Error(S, "out of bounds interpolation attribute number");
6566     return MatchOperand_ParseFail;
6567   }
6568 
6569   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6570 
6571   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6572                                               AMDGPUOperand::ImmTyInterpAttr));
6573   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6574                                               AMDGPUOperand::ImmTyAttrChan));
6575   return MatchOperand_Success;
6576 }
6577 
6578 //===----------------------------------------------------------------------===//
6579 // exp
6580 //===----------------------------------------------------------------------===//
6581 
6582 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6583   using namespace llvm::AMDGPU::Exp;
6584 
6585   StringRef Str;
6586   SMLoc S = getLoc();
6587 
6588   if (!parseId(Str))
6589     return MatchOperand_NoMatch;
6590 
6591   unsigned Id = getTgtId(Str);
6592   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6593     Error(S, (Id == ET_INVALID) ?
6594                 "invalid exp target" :
6595                 "exp target is not supported on this GPU");
6596     return MatchOperand_ParseFail;
6597   }
6598 
6599   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6600                                               AMDGPUOperand::ImmTyExpTgt));
6601   return MatchOperand_Success;
6602 }
6603 
6604 //===----------------------------------------------------------------------===//
6605 // parser helpers
6606 //===----------------------------------------------------------------------===//
6607 
6608 bool
6609 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6610   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6611 }
6612 
6613 bool
6614 AMDGPUAsmParser::isId(const StringRef Id) const {
6615   return isId(getToken(), Id);
6616 }
6617 
6618 bool
6619 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6620   return getTokenKind() == Kind;
6621 }
6622 
6623 bool
6624 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6625   if (isId(Id)) {
6626     lex();
6627     return true;
6628   }
6629   return false;
6630 }
6631 
6632 bool
6633 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6634   if (isToken(AsmToken::Identifier)) {
6635     StringRef Tok = getTokenStr();
6636     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6637       lex();
6638       return true;
6639     }
6640   }
6641   return false;
6642 }
6643 
6644 bool
6645 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6646   if (isId(Id) && peekToken().is(Kind)) {
6647     lex();
6648     lex();
6649     return true;
6650   }
6651   return false;
6652 }
6653 
6654 bool
6655 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6656   if (isToken(Kind)) {
6657     lex();
6658     return true;
6659   }
6660   return false;
6661 }
6662 
6663 bool
6664 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6665                            const StringRef ErrMsg) {
6666   if (!trySkipToken(Kind)) {
6667     Error(getLoc(), ErrMsg);
6668     return false;
6669   }
6670   return true;
6671 }
6672 
6673 bool
6674 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6675   SMLoc S = getLoc();
6676 
6677   const MCExpr *Expr;
6678   if (Parser.parseExpression(Expr))
6679     return false;
6680 
6681   if (Expr->evaluateAsAbsolute(Imm))
6682     return true;
6683 
6684   if (Expected.empty()) {
6685     Error(S, "expected absolute expression");
6686   } else {
6687     Error(S, Twine("expected ", Expected) +
6688              Twine(" or an absolute expression"));
6689   }
6690   return false;
6691 }
6692 
6693 bool
6694 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6695   SMLoc S = getLoc();
6696 
6697   const MCExpr *Expr;
6698   if (Parser.parseExpression(Expr))
6699     return false;
6700 
6701   int64_t IntVal;
6702   if (Expr->evaluateAsAbsolute(IntVal)) {
6703     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6704   } else {
6705     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6706   }
6707   return true;
6708 }
6709 
6710 bool
6711 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6712   if (isToken(AsmToken::String)) {
6713     Val = getToken().getStringContents();
6714     lex();
6715     return true;
6716   } else {
6717     Error(getLoc(), ErrMsg);
6718     return false;
6719   }
6720 }
6721 
6722 bool
6723 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6724   if (isToken(AsmToken::Identifier)) {
6725     Val = getTokenStr();
6726     lex();
6727     return true;
6728   } else {
6729     if (!ErrMsg.empty())
6730       Error(getLoc(), ErrMsg);
6731     return false;
6732   }
6733 }
6734 
6735 AsmToken
6736 AMDGPUAsmParser::getToken() const {
6737   return Parser.getTok();
6738 }
6739 
6740 AsmToken
6741 AMDGPUAsmParser::peekToken() {
6742   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6743 }
6744 
6745 void
6746 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6747   auto TokCount = getLexer().peekTokens(Tokens);
6748 
6749   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6750     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6751 }
6752 
6753 AsmToken::TokenKind
6754 AMDGPUAsmParser::getTokenKind() const {
6755   return getLexer().getKind();
6756 }
6757 
6758 SMLoc
6759 AMDGPUAsmParser::getLoc() const {
6760   return getToken().getLoc();
6761 }
6762 
6763 StringRef
6764 AMDGPUAsmParser::getTokenStr() const {
6765   return getToken().getString();
6766 }
6767 
6768 void
6769 AMDGPUAsmParser::lex() {
6770   Parser.Lex();
6771 }
6772 
6773 SMLoc
6774 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6775                                const OperandVector &Operands) const {
6776   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6777     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6778     if (Test(Op))
6779       return Op.getStartLoc();
6780   }
6781   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6782 }
6783 
6784 SMLoc
6785 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6786                            const OperandVector &Operands) const {
6787   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6788   return getOperandLoc(Test, Operands);
6789 }
6790 
6791 SMLoc
6792 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6793                            const OperandVector &Operands) const {
6794   auto Test = [=](const AMDGPUOperand& Op) {
6795     return Op.isRegKind() && Op.getReg() == Reg;
6796   };
6797   return getOperandLoc(Test, Operands);
6798 }
6799 
6800 SMLoc
6801 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6802   auto Test = [](const AMDGPUOperand& Op) {
6803     return Op.IsImmKindLiteral() || Op.isExpr();
6804   };
6805   return getOperandLoc(Test, Operands);
6806 }
6807 
6808 SMLoc
6809 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6810   auto Test = [](const AMDGPUOperand& Op) {
6811     return Op.isImmKindConst();
6812   };
6813   return getOperandLoc(Test, Operands);
6814 }
6815 
6816 //===----------------------------------------------------------------------===//
6817 // swizzle
6818 //===----------------------------------------------------------------------===//
6819 
6820 LLVM_READNONE
6821 static unsigned
6822 encodeBitmaskPerm(const unsigned AndMask,
6823                   const unsigned OrMask,
6824                   const unsigned XorMask) {
6825   using namespace llvm::AMDGPU::Swizzle;
6826 
6827   return BITMASK_PERM_ENC |
6828          (AndMask << BITMASK_AND_SHIFT) |
6829          (OrMask  << BITMASK_OR_SHIFT)  |
6830          (XorMask << BITMASK_XOR_SHIFT);
6831 }
6832 
6833 bool
6834 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6835                                      const unsigned MinVal,
6836                                      const unsigned MaxVal,
6837                                      const StringRef ErrMsg,
6838                                      SMLoc &Loc) {
6839   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6840     return false;
6841   }
6842   Loc = getLoc();
6843   if (!parseExpr(Op)) {
6844     return false;
6845   }
6846   if (Op < MinVal || Op > MaxVal) {
6847     Error(Loc, ErrMsg);
6848     return false;
6849   }
6850 
6851   return true;
6852 }
6853 
6854 bool
6855 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6856                                       const unsigned MinVal,
6857                                       const unsigned MaxVal,
6858                                       const StringRef ErrMsg) {
6859   SMLoc Loc;
6860   for (unsigned i = 0; i < OpNum; ++i) {
6861     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6862       return false;
6863   }
6864 
6865   return true;
6866 }
6867 
6868 bool
6869 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6870   using namespace llvm::AMDGPU::Swizzle;
6871 
6872   int64_t Lane[LANE_NUM];
6873   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6874                            "expected a 2-bit lane id")) {
6875     Imm = QUAD_PERM_ENC;
6876     for (unsigned I = 0; I < LANE_NUM; ++I) {
6877       Imm |= Lane[I] << (LANE_SHIFT * I);
6878     }
6879     return true;
6880   }
6881   return false;
6882 }
6883 
6884 bool
6885 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6886   using namespace llvm::AMDGPU::Swizzle;
6887 
6888   SMLoc Loc;
6889   int64_t GroupSize;
6890   int64_t LaneIdx;
6891 
6892   if (!parseSwizzleOperand(GroupSize,
6893                            2, 32,
6894                            "group size must be in the interval [2,32]",
6895                            Loc)) {
6896     return false;
6897   }
6898   if (!isPowerOf2_64(GroupSize)) {
6899     Error(Loc, "group size must be a power of two");
6900     return false;
6901   }
6902   if (parseSwizzleOperand(LaneIdx,
6903                           0, GroupSize - 1,
6904                           "lane id must be in the interval [0,group size - 1]",
6905                           Loc)) {
6906     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6907     return true;
6908   }
6909   return false;
6910 }
6911 
6912 bool
6913 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6914   using namespace llvm::AMDGPU::Swizzle;
6915 
6916   SMLoc Loc;
6917   int64_t GroupSize;
6918 
6919   if (!parseSwizzleOperand(GroupSize,
6920                            2, 32,
6921                            "group size must be in the interval [2,32]",
6922                            Loc)) {
6923     return false;
6924   }
6925   if (!isPowerOf2_64(GroupSize)) {
6926     Error(Loc, "group size must be a power of two");
6927     return false;
6928   }
6929 
6930   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6931   return true;
6932 }
6933 
6934 bool
6935 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6936   using namespace llvm::AMDGPU::Swizzle;
6937 
6938   SMLoc Loc;
6939   int64_t GroupSize;
6940 
6941   if (!parseSwizzleOperand(GroupSize,
6942                            1, 16,
6943                            "group size must be in the interval [1,16]",
6944                            Loc)) {
6945     return false;
6946   }
6947   if (!isPowerOf2_64(GroupSize)) {
6948     Error(Loc, "group size must be a power of two");
6949     return false;
6950   }
6951 
6952   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6953   return true;
6954 }
6955 
6956 bool
6957 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6958   using namespace llvm::AMDGPU::Swizzle;
6959 
6960   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6961     return false;
6962   }
6963 
6964   StringRef Ctl;
6965   SMLoc StrLoc = getLoc();
6966   if (!parseString(Ctl)) {
6967     return false;
6968   }
6969   if (Ctl.size() != BITMASK_WIDTH) {
6970     Error(StrLoc, "expected a 5-character mask");
6971     return false;
6972   }
6973 
6974   unsigned AndMask = 0;
6975   unsigned OrMask = 0;
6976   unsigned XorMask = 0;
6977 
6978   for (size_t i = 0; i < Ctl.size(); ++i) {
6979     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6980     switch(Ctl[i]) {
6981     default:
6982       Error(StrLoc, "invalid mask");
6983       return false;
6984     case '0':
6985       break;
6986     case '1':
6987       OrMask |= Mask;
6988       break;
6989     case 'p':
6990       AndMask |= Mask;
6991       break;
6992     case 'i':
6993       AndMask |= Mask;
6994       XorMask |= Mask;
6995       break;
6996     }
6997   }
6998 
6999   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7000   return true;
7001 }
7002 
7003 bool
7004 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7005 
7006   SMLoc OffsetLoc = getLoc();
7007 
7008   if (!parseExpr(Imm, "a swizzle macro")) {
7009     return false;
7010   }
7011   if (!isUInt<16>(Imm)) {
7012     Error(OffsetLoc, "expected a 16-bit offset");
7013     return false;
7014   }
7015   return true;
7016 }
7017 
7018 bool
7019 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7020   using namespace llvm::AMDGPU::Swizzle;
7021 
7022   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7023 
7024     SMLoc ModeLoc = getLoc();
7025     bool Ok = false;
7026 
7027     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7028       Ok = parseSwizzleQuadPerm(Imm);
7029     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7030       Ok = parseSwizzleBitmaskPerm(Imm);
7031     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7032       Ok = parseSwizzleBroadcast(Imm);
7033     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7034       Ok = parseSwizzleSwap(Imm);
7035     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7036       Ok = parseSwizzleReverse(Imm);
7037     } else {
7038       Error(ModeLoc, "expected a swizzle mode");
7039     }
7040 
7041     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7042   }
7043 
7044   return false;
7045 }
7046 
7047 OperandMatchResultTy
7048 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7049   SMLoc S = getLoc();
7050   int64_t Imm = 0;
7051 
7052   if (trySkipId("offset")) {
7053 
7054     bool Ok = false;
7055     if (skipToken(AsmToken::Colon, "expected a colon")) {
7056       if (trySkipId("swizzle")) {
7057         Ok = parseSwizzleMacro(Imm);
7058       } else {
7059         Ok = parseSwizzleOffset(Imm);
7060       }
7061     }
7062 
7063     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7064 
7065     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7066   } else {
7067     // Swizzle "offset" operand is optional.
7068     // If it is omitted, try parsing other optional operands.
7069     return parseOptionalOpr(Operands);
7070   }
7071 }
7072 
7073 bool
7074 AMDGPUOperand::isSwizzle() const {
7075   return isImmTy(ImmTySwizzle);
7076 }
7077 
7078 //===----------------------------------------------------------------------===//
7079 // VGPR Index Mode
7080 //===----------------------------------------------------------------------===//
7081 
7082 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7083 
7084   using namespace llvm::AMDGPU::VGPRIndexMode;
7085 
7086   if (trySkipToken(AsmToken::RParen)) {
7087     return OFF;
7088   }
7089 
7090   int64_t Imm = 0;
7091 
7092   while (true) {
7093     unsigned Mode = 0;
7094     SMLoc S = getLoc();
7095 
7096     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7097       if (trySkipId(IdSymbolic[ModeId])) {
7098         Mode = 1 << ModeId;
7099         break;
7100       }
7101     }
7102 
7103     if (Mode == 0) {
7104       Error(S, (Imm == 0)?
7105                "expected a VGPR index mode or a closing parenthesis" :
7106                "expected a VGPR index mode");
7107       return UNDEF;
7108     }
7109 
7110     if (Imm & Mode) {
7111       Error(S, "duplicate VGPR index mode");
7112       return UNDEF;
7113     }
7114     Imm |= Mode;
7115 
7116     if (trySkipToken(AsmToken::RParen))
7117       break;
7118     if (!skipToken(AsmToken::Comma,
7119                    "expected a comma or a closing parenthesis"))
7120       return UNDEF;
7121   }
7122 
7123   return Imm;
7124 }
7125 
7126 OperandMatchResultTy
7127 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7128 
7129   using namespace llvm::AMDGPU::VGPRIndexMode;
7130 
7131   int64_t Imm = 0;
7132   SMLoc S = getLoc();
7133 
7134   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7135     Imm = parseGPRIdxMacro();
7136     if (Imm == UNDEF)
7137       return MatchOperand_ParseFail;
7138   } else {
7139     if (getParser().parseAbsoluteExpression(Imm))
7140       return MatchOperand_ParseFail;
7141     if (Imm < 0 || !isUInt<4>(Imm)) {
7142       Error(S, "invalid immediate: only 4-bit values are legal");
7143       return MatchOperand_ParseFail;
7144     }
7145   }
7146 
7147   Operands.push_back(
7148       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7149   return MatchOperand_Success;
7150 }
7151 
7152 bool AMDGPUOperand::isGPRIdxMode() const {
7153   return isImmTy(ImmTyGprIdxMode);
7154 }
7155 
7156 //===----------------------------------------------------------------------===//
7157 // sopp branch targets
7158 //===----------------------------------------------------------------------===//
7159 
7160 OperandMatchResultTy
7161 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7162 
7163   // Make sure we are not parsing something
7164   // that looks like a label or an expression but is not.
7165   // This will improve error messages.
7166   if (isRegister() || isModifier())
7167     return MatchOperand_NoMatch;
7168 
7169   if (!parseExpr(Operands))
7170     return MatchOperand_ParseFail;
7171 
7172   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7173   assert(Opr.isImm() || Opr.isExpr());
7174   SMLoc Loc = Opr.getStartLoc();
7175 
7176   // Currently we do not support arbitrary expressions as branch targets.
7177   // Only labels and absolute expressions are accepted.
7178   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7179     Error(Loc, "expected an absolute expression or a label");
7180   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7181     Error(Loc, "expected a 16-bit signed jump offset");
7182   }
7183 
7184   return MatchOperand_Success;
7185 }
7186 
7187 //===----------------------------------------------------------------------===//
7188 // Boolean holding registers
7189 //===----------------------------------------------------------------------===//
7190 
7191 OperandMatchResultTy
7192 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7193   return parseReg(Operands);
7194 }
7195 
7196 //===----------------------------------------------------------------------===//
7197 // mubuf
7198 //===----------------------------------------------------------------------===//
7199 
7200 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7201   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7202 }
7203 
7204 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7205                                    const OperandVector &Operands,
7206                                    bool IsAtomic,
7207                                    bool IsLds) {
7208   bool IsLdsOpcode = IsLds;
7209   bool HasLdsModifier = false;
7210   OptionalImmIndexMap OptionalIdx;
7211   unsigned FirstOperandIdx = 1;
7212   bool IsAtomicReturn = false;
7213 
7214   if (IsAtomic) {
7215     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7216       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7217       if (!Op.isCPol())
7218         continue;
7219       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7220       break;
7221     }
7222 
7223     if (!IsAtomicReturn) {
7224       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7225       if (NewOpc != -1)
7226         Inst.setOpcode(NewOpc);
7227     }
7228 
7229     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7230                       SIInstrFlags::IsAtomicRet;
7231   }
7232 
7233   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7234     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7235 
7236     // Add the register arguments
7237     if (Op.isReg()) {
7238       Op.addRegOperands(Inst, 1);
7239       // Insert a tied src for atomic return dst.
7240       // This cannot be postponed as subsequent calls to
7241       // addImmOperands rely on correct number of MC operands.
7242       if (IsAtomicReturn && i == FirstOperandIdx)
7243         Op.addRegOperands(Inst, 1);
7244       continue;
7245     }
7246 
7247     // Handle the case where soffset is an immediate
7248     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7249       Op.addImmOperands(Inst, 1);
7250       continue;
7251     }
7252 
7253     HasLdsModifier |= Op.isLDS();
7254 
7255     // Handle tokens like 'offen' which are sometimes hard-coded into the
7256     // asm string.  There are no MCInst operands for these.
7257     if (Op.isToken()) {
7258       continue;
7259     }
7260     assert(Op.isImm());
7261 
7262     // Handle optional arguments
7263     OptionalIdx[Op.getImmTy()] = i;
7264   }
7265 
7266   // This is a workaround for an llvm quirk which may result in an
7267   // incorrect instruction selection. Lds and non-lds versions of
7268   // MUBUF instructions are identical except that lds versions
7269   // have mandatory 'lds' modifier. However this modifier follows
7270   // optional modifiers and llvm asm matcher regards this 'lds'
7271   // modifier as an optional one. As a result, an lds version
7272   // of opcode may be selected even if it has no 'lds' modifier.
7273   if (IsLdsOpcode && !HasLdsModifier) {
7274     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7275     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7276       Inst.setOpcode(NoLdsOpcode);
7277       IsLdsOpcode = false;
7278     }
7279   }
7280 
7281   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7282   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7283 
7284   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7285     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7286   }
7287   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7288 }
7289 
7290 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7291   OptionalImmIndexMap OptionalIdx;
7292 
7293   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7294     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7295 
7296     // Add the register arguments
7297     if (Op.isReg()) {
7298       Op.addRegOperands(Inst, 1);
7299       continue;
7300     }
7301 
7302     // Handle the case where soffset is an immediate
7303     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7304       Op.addImmOperands(Inst, 1);
7305       continue;
7306     }
7307 
7308     // Handle tokens like 'offen' which are sometimes hard-coded into the
7309     // asm string.  There are no MCInst operands for these.
7310     if (Op.isToken()) {
7311       continue;
7312     }
7313     assert(Op.isImm());
7314 
7315     // Handle optional arguments
7316     OptionalIdx[Op.getImmTy()] = i;
7317   }
7318 
7319   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7320                         AMDGPUOperand::ImmTyOffset);
7321   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7322   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7323   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7324   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7325 }
7326 
7327 //===----------------------------------------------------------------------===//
7328 // mimg
7329 //===----------------------------------------------------------------------===//
7330 
7331 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7332                               bool IsAtomic) {
7333   unsigned I = 1;
7334   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7335   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7336     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7337   }
7338 
7339   if (IsAtomic) {
7340     // Add src, same as dst
7341     assert(Desc.getNumDefs() == 1);
7342     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7343   }
7344 
7345   OptionalImmIndexMap OptionalIdx;
7346 
7347   for (unsigned E = Operands.size(); I != E; ++I) {
7348     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7349 
7350     // Add the register arguments
7351     if (Op.isReg()) {
7352       Op.addRegOperands(Inst, 1);
7353     } else if (Op.isImmModifier()) {
7354       OptionalIdx[Op.getImmTy()] = I;
7355     } else if (!Op.isToken()) {
7356       llvm_unreachable("unexpected operand type");
7357     }
7358   }
7359 
7360   bool IsGFX10Plus = isGFX10Plus();
7361 
7362   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7363   if (IsGFX10Plus)
7364     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7365   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7366   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7367   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7368   if (IsGFX10Plus)
7369     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7370   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7371     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7372   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7373   if (!IsGFX10Plus)
7374     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7375   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7376 }
7377 
7378 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7379   cvtMIMG(Inst, Operands, true);
7380 }
7381 
7382 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7383   OptionalImmIndexMap OptionalIdx;
7384   bool IsAtomicReturn = false;
7385 
7386   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7387     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7388     if (!Op.isCPol())
7389       continue;
7390     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7391     break;
7392   }
7393 
7394   if (!IsAtomicReturn) {
7395     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7396     if (NewOpc != -1)
7397       Inst.setOpcode(NewOpc);
7398   }
7399 
7400   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7401                     SIInstrFlags::IsAtomicRet;
7402 
7403   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7404     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7405 
7406     // Add the register arguments
7407     if (Op.isReg()) {
7408       Op.addRegOperands(Inst, 1);
7409       if (IsAtomicReturn && i == 1)
7410         Op.addRegOperands(Inst, 1);
7411       continue;
7412     }
7413 
7414     // Handle the case where soffset is an immediate
7415     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7416       Op.addImmOperands(Inst, 1);
7417       continue;
7418     }
7419 
7420     // Handle tokens like 'offen' which are sometimes hard-coded into the
7421     // asm string.  There are no MCInst operands for these.
7422     if (Op.isToken()) {
7423       continue;
7424     }
7425     assert(Op.isImm());
7426 
7427     // Handle optional arguments
7428     OptionalIdx[Op.getImmTy()] = i;
7429   }
7430 
7431   if ((int)Inst.getNumOperands() <=
7432       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7433     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7434   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7435 }
7436 
7437 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7438                                       const OperandVector &Operands) {
7439   for (unsigned I = 1; I < Operands.size(); ++I) {
7440     auto &Operand = (AMDGPUOperand &)*Operands[I];
7441     if (Operand.isReg())
7442       Operand.addRegOperands(Inst, 1);
7443   }
7444 
7445   Inst.addOperand(MCOperand::createImm(1)); // a16
7446 }
7447 
7448 //===----------------------------------------------------------------------===//
7449 // smrd
7450 //===----------------------------------------------------------------------===//
7451 
7452 bool AMDGPUOperand::isSMRDOffset8() const {
7453   return isImm() && isUInt<8>(getImm());
7454 }
7455 
7456 bool AMDGPUOperand::isSMEMOffset() const {
7457   return isImm(); // Offset range is checked later by validator.
7458 }
7459 
7460 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7461   // 32-bit literals are only supported on CI and we only want to use them
7462   // when the offset is > 8-bits.
7463   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7464 }
7465 
7466 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7467   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7468 }
7469 
7470 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7471   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7472 }
7473 
7474 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7475   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7476 }
7477 
7478 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7479   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7480 }
7481 
7482 //===----------------------------------------------------------------------===//
7483 // vop3
7484 //===----------------------------------------------------------------------===//
7485 
7486 static bool ConvertOmodMul(int64_t &Mul) {
7487   if (Mul != 1 && Mul != 2 && Mul != 4)
7488     return false;
7489 
7490   Mul >>= 1;
7491   return true;
7492 }
7493 
7494 static bool ConvertOmodDiv(int64_t &Div) {
7495   if (Div == 1) {
7496     Div = 0;
7497     return true;
7498   }
7499 
7500   if (Div == 2) {
7501     Div = 3;
7502     return true;
7503   }
7504 
7505   return false;
7506 }
7507 
7508 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7509 // This is intentional and ensures compatibility with sp3.
7510 // See bug 35397 for details.
7511 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7512   if (BoundCtrl == 0 || BoundCtrl == 1) {
7513     BoundCtrl = 1;
7514     return true;
7515   }
7516   return false;
7517 }
7518 
7519 // Note: the order in this table matches the order of operands in AsmString.
7520 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7521   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7522   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7523   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7524   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7525   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7526   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7527   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7528   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7529   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7530   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7531   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7532   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7533   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7534   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7535   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7536   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7537   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7538   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7539   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7540   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7541   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7542   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7543   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7544   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7545   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7546   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7547   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7548   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7549   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7550   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7551   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7552   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7553   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7554   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7555   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7556   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7557   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7558   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7559   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7560   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7561   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7562 };
7563 
7564 void AMDGPUAsmParser::onBeginOfFile() {
7565   if (!getParser().getStreamer().getTargetStreamer() ||
7566       getSTI().getTargetTriple().getArch() == Triple::r600)
7567     return;
7568 
7569   if (!getTargetStreamer().getTargetID())
7570     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7571 
7572   if (isHsaAbiVersion3AndAbove(&getSTI()))
7573     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7574 }
7575 
7576 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7577 
7578   OperandMatchResultTy res = parseOptionalOpr(Operands);
7579 
7580   // This is a hack to enable hardcoded mandatory operands which follow
7581   // optional operands.
7582   //
7583   // Current design assumes that all operands after the first optional operand
7584   // are also optional. However implementation of some instructions violates
7585   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7586   //
7587   // To alleviate this problem, we have to (implicitly) parse extra operands
7588   // to make sure autogenerated parser of custom operands never hit hardcoded
7589   // mandatory operands.
7590 
7591   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7592     if (res != MatchOperand_Success ||
7593         isToken(AsmToken::EndOfStatement))
7594       break;
7595 
7596     trySkipToken(AsmToken::Comma);
7597     res = parseOptionalOpr(Operands);
7598   }
7599 
7600   return res;
7601 }
7602 
7603 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7604   OperandMatchResultTy res;
7605   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7606     // try to parse any optional operand here
7607     if (Op.IsBit) {
7608       res = parseNamedBit(Op.Name, Operands, Op.Type);
7609     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7610       res = parseOModOperand(Operands);
7611     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7612                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7613                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7614       res = parseSDWASel(Operands, Op.Name, Op.Type);
7615     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7616       res = parseSDWADstUnused(Operands);
7617     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7618                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7619                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7620                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7621       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7622                                         Op.ConvertResult);
7623     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7624       res = parseDim(Operands);
7625     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7626       res = parseCPol(Operands);
7627     } else {
7628       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7629     }
7630     if (res != MatchOperand_NoMatch) {
7631       return res;
7632     }
7633   }
7634   return MatchOperand_NoMatch;
7635 }
7636 
7637 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7638   StringRef Name = getTokenStr();
7639   if (Name == "mul") {
7640     return parseIntWithPrefix("mul", Operands,
7641                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7642   }
7643 
7644   if (Name == "div") {
7645     return parseIntWithPrefix("div", Operands,
7646                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7647   }
7648 
7649   return MatchOperand_NoMatch;
7650 }
7651 
7652 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7653   cvtVOP3P(Inst, Operands);
7654 
7655   int Opc = Inst.getOpcode();
7656 
7657   int SrcNum;
7658   const int Ops[] = { AMDGPU::OpName::src0,
7659                       AMDGPU::OpName::src1,
7660                       AMDGPU::OpName::src2 };
7661   for (SrcNum = 0;
7662        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7663        ++SrcNum);
7664   assert(SrcNum > 0);
7665 
7666   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7667   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7668 
7669   if ((OpSel & (1 << SrcNum)) != 0) {
7670     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7671     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7672     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7673   }
7674 }
7675 
7676 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7677       // 1. This operand is input modifiers
7678   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7679       // 2. This is not last operand
7680       && Desc.NumOperands > (OpNum + 1)
7681       // 3. Next operand is register class
7682       && Desc.OpInfo[OpNum + 1].RegClass != -1
7683       // 4. Next register is not tied to any other operand
7684       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7685 }
7686 
7687 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7688 {
7689   OptionalImmIndexMap OptionalIdx;
7690   unsigned Opc = Inst.getOpcode();
7691 
7692   unsigned I = 1;
7693   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7694   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7695     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7696   }
7697 
7698   for (unsigned E = Operands.size(); I != E; ++I) {
7699     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7700     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7701       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7702     } else if (Op.isInterpSlot() ||
7703                Op.isInterpAttr() ||
7704                Op.isAttrChan()) {
7705       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7706     } else if (Op.isImmModifier()) {
7707       OptionalIdx[Op.getImmTy()] = I;
7708     } else {
7709       llvm_unreachable("unhandled operand type");
7710     }
7711   }
7712 
7713   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7714     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7715   }
7716 
7717   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7718     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7719   }
7720 
7721   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7722     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7723   }
7724 }
7725 
7726 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7727                               OptionalImmIndexMap &OptionalIdx) {
7728   unsigned Opc = Inst.getOpcode();
7729 
7730   unsigned I = 1;
7731   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7732   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7733     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7734   }
7735 
7736   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7737     // This instruction has src modifiers
7738     for (unsigned E = Operands.size(); I != E; ++I) {
7739       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7740       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7741         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7742       } else if (Op.isImmModifier()) {
7743         OptionalIdx[Op.getImmTy()] = I;
7744       } else if (Op.isRegOrImm()) {
7745         Op.addRegOrImmOperands(Inst, 1);
7746       } else {
7747         llvm_unreachable("unhandled operand type");
7748       }
7749     }
7750   } else {
7751     // No src modifiers
7752     for (unsigned E = Operands.size(); I != E; ++I) {
7753       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7754       if (Op.isMod()) {
7755         OptionalIdx[Op.getImmTy()] = I;
7756       } else {
7757         Op.addRegOrImmOperands(Inst, 1);
7758       }
7759     }
7760   }
7761 
7762   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7763     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7764   }
7765 
7766   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7767     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7768   }
7769 
7770   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7771   // it has src2 register operand that is tied to dst operand
7772   // we don't allow modifiers for this operand in assembler so src2_modifiers
7773   // should be 0.
7774   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7775       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7776       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7777       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7778       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7779       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7780       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7781       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7782       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7783       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7784       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7785     auto it = Inst.begin();
7786     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7787     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7788     ++it;
7789     // Copy the operand to ensure it's not invalidated when Inst grows.
7790     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7791   }
7792 }
7793 
7794 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7795   OptionalImmIndexMap OptionalIdx;
7796   cvtVOP3(Inst, Operands, OptionalIdx);
7797 }
7798 
7799 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7800                                OptionalImmIndexMap &OptIdx) {
7801   const int Opc = Inst.getOpcode();
7802   const MCInstrDesc &Desc = MII.get(Opc);
7803 
7804   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7805 
7806   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7807     assert(!IsPacked);
7808     Inst.addOperand(Inst.getOperand(0));
7809   }
7810 
7811   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7812   // instruction, and then figure out where to actually put the modifiers
7813 
7814   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7815   if (OpSelIdx != -1) {
7816     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7817   }
7818 
7819   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7820   if (OpSelHiIdx != -1) {
7821     int DefaultVal = IsPacked ? -1 : 0;
7822     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7823                           DefaultVal);
7824   }
7825 
7826   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7827   if (NegLoIdx != -1) {
7828     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7829     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7830   }
7831 
7832   const int Ops[] = { AMDGPU::OpName::src0,
7833                       AMDGPU::OpName::src1,
7834                       AMDGPU::OpName::src2 };
7835   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7836                          AMDGPU::OpName::src1_modifiers,
7837                          AMDGPU::OpName::src2_modifiers };
7838 
7839   unsigned OpSel = 0;
7840   unsigned OpSelHi = 0;
7841   unsigned NegLo = 0;
7842   unsigned NegHi = 0;
7843 
7844   if (OpSelIdx != -1)
7845     OpSel = Inst.getOperand(OpSelIdx).getImm();
7846 
7847   if (OpSelHiIdx != -1)
7848     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7849 
7850   if (NegLoIdx != -1) {
7851     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7852     NegLo = Inst.getOperand(NegLoIdx).getImm();
7853     NegHi = Inst.getOperand(NegHiIdx).getImm();
7854   }
7855 
7856   for (int J = 0; J < 3; ++J) {
7857     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7858     if (OpIdx == -1)
7859       break;
7860 
7861     uint32_t ModVal = 0;
7862 
7863     if ((OpSel & (1 << J)) != 0)
7864       ModVal |= SISrcMods::OP_SEL_0;
7865 
7866     if ((OpSelHi & (1 << J)) != 0)
7867       ModVal |= SISrcMods::OP_SEL_1;
7868 
7869     if ((NegLo & (1 << J)) != 0)
7870       ModVal |= SISrcMods::NEG;
7871 
7872     if ((NegHi & (1 << J)) != 0)
7873       ModVal |= SISrcMods::NEG_HI;
7874 
7875     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7876 
7877     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7878   }
7879 }
7880 
7881 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7882   OptionalImmIndexMap OptIdx;
7883   cvtVOP3(Inst, Operands, OptIdx);
7884   cvtVOP3P(Inst, Operands, OptIdx);
7885 }
7886 
7887 //===----------------------------------------------------------------------===//
7888 // dpp
7889 //===----------------------------------------------------------------------===//
7890 
7891 bool AMDGPUOperand::isDPP8() const {
7892   return isImmTy(ImmTyDPP8);
7893 }
7894 
7895 bool AMDGPUOperand::isDPPCtrl() const {
7896   using namespace AMDGPU::DPP;
7897 
7898   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7899   if (result) {
7900     int64_t Imm = getImm();
7901     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7902            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7903            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7904            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7905            (Imm == DppCtrl::WAVE_SHL1) ||
7906            (Imm == DppCtrl::WAVE_ROL1) ||
7907            (Imm == DppCtrl::WAVE_SHR1) ||
7908            (Imm == DppCtrl::WAVE_ROR1) ||
7909            (Imm == DppCtrl::ROW_MIRROR) ||
7910            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7911            (Imm == DppCtrl::BCAST15) ||
7912            (Imm == DppCtrl::BCAST31) ||
7913            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7914            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7915   }
7916   return false;
7917 }
7918 
7919 //===----------------------------------------------------------------------===//
7920 // mAI
7921 //===----------------------------------------------------------------------===//
7922 
7923 bool AMDGPUOperand::isBLGP() const {
7924   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7925 }
7926 
7927 bool AMDGPUOperand::isCBSZ() const {
7928   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7929 }
7930 
7931 bool AMDGPUOperand::isABID() const {
7932   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7933 }
7934 
7935 bool AMDGPUOperand::isS16Imm() const {
7936   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7937 }
7938 
7939 bool AMDGPUOperand::isU16Imm() const {
7940   return isImm() && isUInt<16>(getImm());
7941 }
7942 
7943 //===----------------------------------------------------------------------===//
7944 // dim
7945 //===----------------------------------------------------------------------===//
7946 
7947 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7948   // We want to allow "dim:1D" etc.,
7949   // but the initial 1 is tokenized as an integer.
7950   std::string Token;
7951   if (isToken(AsmToken::Integer)) {
7952     SMLoc Loc = getToken().getEndLoc();
7953     Token = std::string(getTokenStr());
7954     lex();
7955     if (getLoc() != Loc)
7956       return false;
7957   }
7958 
7959   StringRef Suffix;
7960   if (!parseId(Suffix))
7961     return false;
7962   Token += Suffix;
7963 
7964   StringRef DimId = Token;
7965   if (DimId.startswith("SQ_RSRC_IMG_"))
7966     DimId = DimId.drop_front(12);
7967 
7968   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7969   if (!DimInfo)
7970     return false;
7971 
7972   Encoding = DimInfo->Encoding;
7973   return true;
7974 }
7975 
7976 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7977   if (!isGFX10Plus())
7978     return MatchOperand_NoMatch;
7979 
7980   SMLoc S = getLoc();
7981 
7982   if (!trySkipId("dim", AsmToken::Colon))
7983     return MatchOperand_NoMatch;
7984 
7985   unsigned Encoding;
7986   SMLoc Loc = getLoc();
7987   if (!parseDimId(Encoding)) {
7988     Error(Loc, "invalid dim value");
7989     return MatchOperand_ParseFail;
7990   }
7991 
7992   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7993                                               AMDGPUOperand::ImmTyDim));
7994   return MatchOperand_Success;
7995 }
7996 
7997 //===----------------------------------------------------------------------===//
7998 // dpp
7999 //===----------------------------------------------------------------------===//
8000 
8001 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8002   SMLoc S = getLoc();
8003 
8004   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8005     return MatchOperand_NoMatch;
8006 
8007   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8008 
8009   int64_t Sels[8];
8010 
8011   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8012     return MatchOperand_ParseFail;
8013 
8014   for (size_t i = 0; i < 8; ++i) {
8015     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8016       return MatchOperand_ParseFail;
8017 
8018     SMLoc Loc = getLoc();
8019     if (getParser().parseAbsoluteExpression(Sels[i]))
8020       return MatchOperand_ParseFail;
8021     if (0 > Sels[i] || 7 < Sels[i]) {
8022       Error(Loc, "expected a 3-bit value");
8023       return MatchOperand_ParseFail;
8024     }
8025   }
8026 
8027   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8028     return MatchOperand_ParseFail;
8029 
8030   unsigned DPP8 = 0;
8031   for (size_t i = 0; i < 8; ++i)
8032     DPP8 |= (Sels[i] << (i * 3));
8033 
8034   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8035   return MatchOperand_Success;
8036 }
8037 
8038 bool
8039 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8040                                     const OperandVector &Operands) {
8041   if (Ctrl == "row_newbcast")
8042     return isGFX90A();
8043 
8044   if (Ctrl == "row_share" ||
8045       Ctrl == "row_xmask")
8046     return isGFX10Plus();
8047 
8048   if (Ctrl == "wave_shl" ||
8049       Ctrl == "wave_shr" ||
8050       Ctrl == "wave_rol" ||
8051       Ctrl == "wave_ror" ||
8052       Ctrl == "row_bcast")
8053     return isVI() || isGFX9();
8054 
8055   return Ctrl == "row_mirror" ||
8056          Ctrl == "row_half_mirror" ||
8057          Ctrl == "quad_perm" ||
8058          Ctrl == "row_shl" ||
8059          Ctrl == "row_shr" ||
8060          Ctrl == "row_ror";
8061 }
8062 
8063 int64_t
8064 AMDGPUAsmParser::parseDPPCtrlPerm() {
8065   // quad_perm:[%d,%d,%d,%d]
8066 
8067   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8068     return -1;
8069 
8070   int64_t Val = 0;
8071   for (int i = 0; i < 4; ++i) {
8072     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8073       return -1;
8074 
8075     int64_t Temp;
8076     SMLoc Loc = getLoc();
8077     if (getParser().parseAbsoluteExpression(Temp))
8078       return -1;
8079     if (Temp < 0 || Temp > 3) {
8080       Error(Loc, "expected a 2-bit value");
8081       return -1;
8082     }
8083 
8084     Val += (Temp << i * 2);
8085   }
8086 
8087   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8088     return -1;
8089 
8090   return Val;
8091 }
8092 
8093 int64_t
8094 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8095   using namespace AMDGPU::DPP;
8096 
8097   // sel:%d
8098 
8099   int64_t Val;
8100   SMLoc Loc = getLoc();
8101 
8102   if (getParser().parseAbsoluteExpression(Val))
8103     return -1;
8104 
8105   struct DppCtrlCheck {
8106     int64_t Ctrl;
8107     int Lo;
8108     int Hi;
8109   };
8110 
8111   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8112     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8113     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8114     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8115     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8116     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8117     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8118     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8119     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8120     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8121     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8122     .Default({-1, 0, 0});
8123 
8124   bool Valid;
8125   if (Check.Ctrl == -1) {
8126     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8127     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8128   } else {
8129     Valid = Check.Lo <= Val && Val <= Check.Hi;
8130     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8131   }
8132 
8133   if (!Valid) {
8134     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8135     return -1;
8136   }
8137 
8138   return Val;
8139 }
8140 
8141 OperandMatchResultTy
8142 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8143   using namespace AMDGPU::DPP;
8144 
8145   if (!isToken(AsmToken::Identifier) ||
8146       !isSupportedDPPCtrl(getTokenStr(), Operands))
8147     return MatchOperand_NoMatch;
8148 
8149   SMLoc S = getLoc();
8150   int64_t Val = -1;
8151   StringRef Ctrl;
8152 
8153   parseId(Ctrl);
8154 
8155   if (Ctrl == "row_mirror") {
8156     Val = DppCtrl::ROW_MIRROR;
8157   } else if (Ctrl == "row_half_mirror") {
8158     Val = DppCtrl::ROW_HALF_MIRROR;
8159   } else {
8160     if (skipToken(AsmToken::Colon, "expected a colon")) {
8161       if (Ctrl == "quad_perm") {
8162         Val = parseDPPCtrlPerm();
8163       } else {
8164         Val = parseDPPCtrlSel(Ctrl);
8165       }
8166     }
8167   }
8168 
8169   if (Val == -1)
8170     return MatchOperand_ParseFail;
8171 
8172   Operands.push_back(
8173     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8174   return MatchOperand_Success;
8175 }
8176 
8177 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8178   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8179 }
8180 
8181 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8182   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8183 }
8184 
8185 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8186   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8187 }
8188 
8189 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8190   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8191 }
8192 
8193 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8194   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8195 }
8196 
8197 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8198   OptionalImmIndexMap OptionalIdx;
8199 
8200   unsigned Opc = Inst.getOpcode();
8201   bool HasModifiers =
8202       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8203   unsigned I = 1;
8204   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8205   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8206     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8207   }
8208 
8209   int Fi = 0;
8210   for (unsigned E = Operands.size(); I != E; ++I) {
8211     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8212                                             MCOI::TIED_TO);
8213     if (TiedTo != -1) {
8214       assert((unsigned)TiedTo < Inst.getNumOperands());
8215       // handle tied old or src2 for MAC instructions
8216       Inst.addOperand(Inst.getOperand(TiedTo));
8217     }
8218     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8219     // Add the register arguments
8220     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8221       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8222       // Skip it.
8223       continue;
8224     }
8225 
8226     if (IsDPP8) {
8227       if (Op.isDPP8()) {
8228         Op.addImmOperands(Inst, 1);
8229       } else if (HasModifiers &&
8230                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8231         Op.addRegWithFPInputModsOperands(Inst, 2);
8232       } else if (Op.isFI()) {
8233         Fi = Op.getImm();
8234       } else if (Op.isReg()) {
8235         Op.addRegOperands(Inst, 1);
8236       } else {
8237         llvm_unreachable("Invalid operand type");
8238       }
8239     } else {
8240       if (HasModifiers &&
8241           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8242         Op.addRegWithFPInputModsOperands(Inst, 2);
8243       } else if (Op.isReg()) {
8244         Op.addRegOperands(Inst, 1);
8245       } else if (Op.isDPPCtrl()) {
8246         Op.addImmOperands(Inst, 1);
8247       } else if (Op.isImm()) {
8248         // Handle optional arguments
8249         OptionalIdx[Op.getImmTy()] = I;
8250       } else {
8251         llvm_unreachable("Invalid operand type");
8252       }
8253     }
8254   }
8255 
8256   if (IsDPP8) {
8257     using namespace llvm::AMDGPU::DPP;
8258     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8259   } else {
8260     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8261     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8262     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8263     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8264       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8265     }
8266   }
8267 }
8268 
8269 //===----------------------------------------------------------------------===//
8270 // sdwa
8271 //===----------------------------------------------------------------------===//
8272 
8273 OperandMatchResultTy
8274 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8275                               AMDGPUOperand::ImmTy Type) {
8276   using namespace llvm::AMDGPU::SDWA;
8277 
8278   SMLoc S = getLoc();
8279   StringRef Value;
8280   OperandMatchResultTy res;
8281 
8282   SMLoc StringLoc;
8283   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8284   if (res != MatchOperand_Success) {
8285     return res;
8286   }
8287 
8288   int64_t Int;
8289   Int = StringSwitch<int64_t>(Value)
8290         .Case("BYTE_0", SdwaSel::BYTE_0)
8291         .Case("BYTE_1", SdwaSel::BYTE_1)
8292         .Case("BYTE_2", SdwaSel::BYTE_2)
8293         .Case("BYTE_3", SdwaSel::BYTE_3)
8294         .Case("WORD_0", SdwaSel::WORD_0)
8295         .Case("WORD_1", SdwaSel::WORD_1)
8296         .Case("DWORD", SdwaSel::DWORD)
8297         .Default(0xffffffff);
8298 
8299   if (Int == 0xffffffff) {
8300     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8301     return MatchOperand_ParseFail;
8302   }
8303 
8304   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8305   return MatchOperand_Success;
8306 }
8307 
8308 OperandMatchResultTy
8309 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8310   using namespace llvm::AMDGPU::SDWA;
8311 
8312   SMLoc S = getLoc();
8313   StringRef Value;
8314   OperandMatchResultTy res;
8315 
8316   SMLoc StringLoc;
8317   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8318   if (res != MatchOperand_Success) {
8319     return res;
8320   }
8321 
8322   int64_t Int;
8323   Int = StringSwitch<int64_t>(Value)
8324         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8325         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8326         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8327         .Default(0xffffffff);
8328 
8329   if (Int == 0xffffffff) {
8330     Error(StringLoc, "invalid dst_unused value");
8331     return MatchOperand_ParseFail;
8332   }
8333 
8334   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8335   return MatchOperand_Success;
8336 }
8337 
8338 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8339   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8340 }
8341 
8342 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8343   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8344 }
8345 
8346 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8347   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8348 }
8349 
8350 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8351   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8352 }
8353 
8354 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8355   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8356 }
8357 
8358 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8359                               uint64_t BasicInstType,
8360                               bool SkipDstVcc,
8361                               bool SkipSrcVcc) {
8362   using namespace llvm::AMDGPU::SDWA;
8363 
8364   OptionalImmIndexMap OptionalIdx;
8365   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8366   bool SkippedVcc = false;
8367 
8368   unsigned I = 1;
8369   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8370   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8371     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8372   }
8373 
8374   for (unsigned E = Operands.size(); I != E; ++I) {
8375     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8376     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8377         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8378       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8379       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8380       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8381       // Skip VCC only if we didn't skip it on previous iteration.
8382       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8383       if (BasicInstType == SIInstrFlags::VOP2 &&
8384           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8385            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8386         SkippedVcc = true;
8387         continue;
8388       } else if (BasicInstType == SIInstrFlags::VOPC &&
8389                  Inst.getNumOperands() == 0) {
8390         SkippedVcc = true;
8391         continue;
8392       }
8393     }
8394     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8395       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8396     } else if (Op.isImm()) {
8397       // Handle optional arguments
8398       OptionalIdx[Op.getImmTy()] = I;
8399     } else {
8400       llvm_unreachable("Invalid operand type");
8401     }
8402     SkippedVcc = false;
8403   }
8404 
8405   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8406       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8407       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8408     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8409     switch (BasicInstType) {
8410     case SIInstrFlags::VOP1:
8411       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8412       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8413         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8414       }
8415       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8416       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8417       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8418       break;
8419 
8420     case SIInstrFlags::VOP2:
8421       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8422       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8423         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8424       }
8425       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8426       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8427       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8428       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8429       break;
8430 
8431     case SIInstrFlags::VOPC:
8432       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8433         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8434       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8435       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8436       break;
8437 
8438     default:
8439       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8440     }
8441   }
8442 
8443   // special case v_mac_{f16, f32}:
8444   // it has src2 register operand that is tied to dst operand
8445   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8446       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8447     auto it = Inst.begin();
8448     std::advance(
8449       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8450     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8451   }
8452 }
8453 
8454 //===----------------------------------------------------------------------===//
8455 // mAI
8456 //===----------------------------------------------------------------------===//
8457 
8458 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8459   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8460 }
8461 
8462 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8463   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8464 }
8465 
8466 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8467   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8468 }
8469 
8470 /// Force static initialization.
8471 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8472   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8473   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8474 }
8475 
8476 #define GET_REGISTER_MATCHER
8477 #define GET_MATCHER_IMPLEMENTATION
8478 #define GET_MNEMONIC_SPELL_CHECKER
8479 #define GET_MNEMONIC_CHECKER
8480 #include "AMDGPUGenAsmMatcher.inc"
8481 
8482 // This function should be defined after auto-generated include so that we have
8483 // MatchClassKind enum defined
8484 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8485                                                      unsigned Kind) {
8486   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8487   // But MatchInstructionImpl() expects to meet token and fails to validate
8488   // operand. This method checks if we are given immediate operand but expect to
8489   // get corresponding token.
8490   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8491   switch (Kind) {
8492   case MCK_addr64:
8493     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8494   case MCK_gds:
8495     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8496   case MCK_lds:
8497     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8498   case MCK_idxen:
8499     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8500   case MCK_offen:
8501     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8502   case MCK_SSrcB32:
8503     // When operands have expression values, they will return true for isToken,
8504     // because it is not possible to distinguish between a token and an
8505     // expression at parse time. MatchInstructionImpl() will always try to
8506     // match an operand as a token, when isToken returns true, and when the
8507     // name of the expression is not a valid token, the match will fail,
8508     // so we need to handle it here.
8509     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8510   case MCK_SSrcF32:
8511     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8512   case MCK_SoppBrTarget:
8513     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8514   case MCK_VReg32OrOff:
8515     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8516   case MCK_InterpSlot:
8517     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8518   case MCK_Attr:
8519     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8520   case MCK_AttrChan:
8521     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8522   case MCK_ImmSMEMOffset:
8523     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8524   case MCK_SReg_64:
8525   case MCK_SReg_64_XEXEC:
8526     // Null is defined as a 32-bit register but
8527     // it should also be enabled with 64-bit operands.
8528     // The following code enables it for SReg_64 operands
8529     // used as source and destination. Remaining source
8530     // operands are handled in isInlinableImm.
8531     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8532   default:
8533     return Match_InvalidOperand;
8534   }
8535 }
8536 
8537 //===----------------------------------------------------------------------===//
8538 // endpgm
8539 //===----------------------------------------------------------------------===//
8540 
8541 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8542   SMLoc S = getLoc();
8543   int64_t Imm = 0;
8544 
8545   if (!parseExpr(Imm)) {
8546     // The operand is optional, if not present default to 0
8547     Imm = 0;
8548   }
8549 
8550   if (!isUInt<16>(Imm)) {
8551     Error(S, "expected a 16-bit value");
8552     return MatchOperand_ParseFail;
8553   }
8554 
8555   Operands.push_back(
8556       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8557   return MatchOperand_Success;
8558 }
8559 
8560 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8561