1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/TargetParser.h"
39 
40 using namespace llvm;
41 using namespace llvm::AMDGPU;
42 using namespace llvm::amdhsa;
43 
44 namespace {
45 
46 class AMDGPUAsmParser;
47 
48 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
49 
50 //===----------------------------------------------------------------------===//
51 // Operand
52 //===----------------------------------------------------------------------===//
53 
54 class AMDGPUOperand : public MCParsedAsmOperand {
55   enum KindTy {
56     Token,
57     Immediate,
58     Register,
59     Expression
60   } Kind;
61 
62   SMLoc StartLoc, EndLoc;
63   const AMDGPUAsmParser *AsmParser;
64 
65 public:
66   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
67       : Kind(Kind_), AsmParser(AsmParser_) {}
68 
69   using Ptr = std::unique_ptr<AMDGPUOperand>;
70 
71   struct Modifiers {
72     bool Abs = false;
73     bool Neg = false;
74     bool Sext = false;
75 
76     bool hasFPModifiers() const { return Abs || Neg; }
77     bool hasIntModifiers() const { return Sext; }
78     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
79 
80     int64_t getFPModifiersOperand() const {
81       int64_t Operand = 0;
82       Operand |= Abs ? SISrcMods::ABS : 0u;
83       Operand |= Neg ? SISrcMods::NEG : 0u;
84       return Operand;
85     }
86 
87     int64_t getIntModifiersOperand() const {
88       int64_t Operand = 0;
89       Operand |= Sext ? SISrcMods::SEXT : 0u;
90       return Operand;
91     }
92 
93     int64_t getModifiersOperand() const {
94       assert(!(hasFPModifiers() && hasIntModifiers())
95            && "fp and int modifiers should not be used simultaneously");
96       if (hasFPModifiers()) {
97         return getFPModifiersOperand();
98       } else if (hasIntModifiers()) {
99         return getIntModifiersOperand();
100       } else {
101         return 0;
102       }
103     }
104 
105     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
106   };
107 
108   enum ImmTy {
109     ImmTyNone,
110     ImmTyGDS,
111     ImmTyLDS,
112     ImmTyOffen,
113     ImmTyIdxen,
114     ImmTyAddr64,
115     ImmTyOffset,
116     ImmTyInstOffset,
117     ImmTyOffset0,
118     ImmTyOffset1,
119     ImmTyCPol,
120     ImmTySWZ,
121     ImmTyTFE,
122     ImmTyD16,
123     ImmTyClampSI,
124     ImmTyOModSI,
125     ImmTyDPP8,
126     ImmTyDppCtrl,
127     ImmTyDppRowMask,
128     ImmTyDppBankMask,
129     ImmTyDppBoundCtrl,
130     ImmTyDppFi,
131     ImmTySdwaDstSel,
132     ImmTySdwaSrc0Sel,
133     ImmTySdwaSrc1Sel,
134     ImmTySdwaDstUnused,
135     ImmTyDMask,
136     ImmTyDim,
137     ImmTyUNorm,
138     ImmTyDA,
139     ImmTyR128A16,
140     ImmTyA16,
141     ImmTyLWE,
142     ImmTyExpTgt,
143     ImmTyExpCompr,
144     ImmTyExpVM,
145     ImmTyFORMAT,
146     ImmTyHwreg,
147     ImmTyOff,
148     ImmTySendMsg,
149     ImmTyInterpSlot,
150     ImmTyInterpAttr,
151     ImmTyAttrChan,
152     ImmTyOpSel,
153     ImmTyOpSelHi,
154     ImmTyNegLo,
155     ImmTyNegHi,
156     ImmTySwizzle,
157     ImmTyGprIdxMode,
158     ImmTyHigh,
159     ImmTyBLGP,
160     ImmTyCBSZ,
161     ImmTyABID,
162     ImmTyEndpgm,
163   };
164 
165   enum ImmKindTy {
166     ImmKindTyNone,
167     ImmKindTyLiteral,
168     ImmKindTyConst,
169   };
170 
171 private:
172   struct TokOp {
173     const char *Data;
174     unsigned Length;
175   };
176 
177   struct ImmOp {
178     int64_t Val;
179     ImmTy Type;
180     bool IsFPImm;
181     mutable ImmKindTy Kind;
182     Modifiers Mods;
183   };
184 
185   struct RegOp {
186     unsigned RegNo;
187     Modifiers Mods;
188   };
189 
190   union {
191     TokOp Tok;
192     ImmOp Imm;
193     RegOp Reg;
194     const MCExpr *Expr;
195   };
196 
197 public:
198   bool isToken() const override {
199     if (Kind == Token)
200       return true;
201 
202     // When parsing operands, we can't always tell if something was meant to be
203     // a token, like 'gds', or an expression that references a global variable.
204     // In this case, we assume the string is an expression, and if we need to
205     // interpret is a token, then we treat the symbol name as the token.
206     return isSymbolRefExpr();
207   }
208 
209   bool isSymbolRefExpr() const {
210     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
211   }
212 
213   bool isImm() const override {
214     return Kind == Immediate;
215   }
216 
217   void setImmKindNone() const {
218     assert(isImm());
219     Imm.Kind = ImmKindTyNone;
220   }
221 
222   void setImmKindLiteral() const {
223     assert(isImm());
224     Imm.Kind = ImmKindTyLiteral;
225   }
226 
227   void setImmKindConst() const {
228     assert(isImm());
229     Imm.Kind = ImmKindTyConst;
230   }
231 
232   bool IsImmKindLiteral() const {
233     return isImm() && Imm.Kind == ImmKindTyLiteral;
234   }
235 
236   bool isImmKindConst() const {
237     return isImm() && Imm.Kind == ImmKindTyConst;
238   }
239 
240   bool isInlinableImm(MVT type) const;
241   bool isLiteralImm(MVT type) const;
242 
243   bool isRegKind() const {
244     return Kind == Register;
245   }
246 
247   bool isReg() const override {
248     return isRegKind() && !hasModifiers();
249   }
250 
251   bool isRegOrInline(unsigned RCID, MVT type) const {
252     return isRegClass(RCID) || isInlinableImm(type);
253   }
254 
255   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
256     return isRegOrInline(RCID, type) || isLiteralImm(type);
257   }
258 
259   bool isRegOrImmWithInt16InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
261   }
262 
263   bool isRegOrImmWithInt32InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
265   }
266 
267   bool isRegOrImmWithInt64InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
269   }
270 
271   bool isRegOrImmWithFP16InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
273   }
274 
275   bool isRegOrImmWithFP32InputMods() const {
276     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
277   }
278 
279   bool isRegOrImmWithFP64InputMods() const {
280     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
281   }
282 
283   bool isVReg() const {
284     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
285            isRegClass(AMDGPU::VReg_64RegClassID) ||
286            isRegClass(AMDGPU::VReg_96RegClassID) ||
287            isRegClass(AMDGPU::VReg_128RegClassID) ||
288            isRegClass(AMDGPU::VReg_160RegClassID) ||
289            isRegClass(AMDGPU::VReg_192RegClassID) ||
290            isRegClass(AMDGPU::VReg_256RegClassID) ||
291            isRegClass(AMDGPU::VReg_512RegClassID) ||
292            isRegClass(AMDGPU::VReg_1024RegClassID);
293   }
294 
295   bool isVReg32() const {
296     return isRegClass(AMDGPU::VGPR_32RegClassID);
297   }
298 
299   bool isVReg32OrOff() const {
300     return isOff() || isVReg32();
301   }
302 
303   bool isNull() const {
304     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
305   }
306 
307   bool isVRegWithInputMods() const;
308 
309   bool isSDWAOperand(MVT type) const;
310   bool isSDWAFP16Operand() const;
311   bool isSDWAFP32Operand() const;
312   bool isSDWAInt16Operand() const;
313   bool isSDWAInt32Operand() const;
314 
315   bool isImmTy(ImmTy ImmT) const {
316     return isImm() && Imm.Type == ImmT;
317   }
318 
319   bool isImmModifier() const {
320     return isImm() && Imm.Type != ImmTyNone;
321   }
322 
323   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
324   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
325   bool isDMask() const { return isImmTy(ImmTyDMask); }
326   bool isDim() const { return isImmTy(ImmTyDim); }
327   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
328   bool isDA() const { return isImmTy(ImmTyDA); }
329   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
330   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
331   bool isLWE() const { return isImmTy(ImmTyLWE); }
332   bool isOff() const { return isImmTy(ImmTyOff); }
333   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
334   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
335   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
336   bool isOffen() const { return isImmTy(ImmTyOffen); }
337   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
338   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
339   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
340   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
341   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
342 
343   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
344   bool isGDS() const { return isImmTy(ImmTyGDS); }
345   bool isLDS() const { return isImmTy(ImmTyLDS); }
346   bool isCPol() const { return isImmTy(ImmTyCPol); }
347   bool isSWZ() const { return isImmTy(ImmTySWZ); }
348   bool isTFE() const { return isImmTy(ImmTyTFE); }
349   bool isD16() const { return isImmTy(ImmTyD16); }
350   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
351   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
352   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
353   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
354   bool isFI() const { return isImmTy(ImmTyDppFi); }
355   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
356   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
357   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
358   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
359   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
360   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
361   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
362   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
363   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
364   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
365   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
366   bool isHigh() const { return isImmTy(ImmTyHigh); }
367 
368   bool isMod() const {
369     return isClampSI() || isOModSI();
370   }
371 
372   bool isRegOrImm() const {
373     return isReg() || isImm();
374   }
375 
376   bool isRegClass(unsigned RCID) const;
377 
378   bool isInlineValue() const;
379 
380   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
381     return isRegOrInline(RCID, type) && !hasModifiers();
382   }
383 
384   bool isSCSrcB16() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
386   }
387 
388   bool isSCSrcV2B16() const {
389     return isSCSrcB16();
390   }
391 
392   bool isSCSrcB32() const {
393     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
394   }
395 
396   bool isSCSrcB64() const {
397     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
398   }
399 
400   bool isBoolReg() const;
401 
402   bool isSCSrcF16() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
404   }
405 
406   bool isSCSrcV2F16() const {
407     return isSCSrcF16();
408   }
409 
410   bool isSCSrcF32() const {
411     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
412   }
413 
414   bool isSCSrcF64() const {
415     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
416   }
417 
418   bool isSSrcB32() const {
419     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
420   }
421 
422   bool isSSrcB16() const {
423     return isSCSrcB16() || isLiteralImm(MVT::i16);
424   }
425 
426   bool isSSrcV2B16() const {
427     llvm_unreachable("cannot happen");
428     return isSSrcB16();
429   }
430 
431   bool isSSrcB64() const {
432     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
433     // See isVSrc64().
434     return isSCSrcB64() || isLiteralImm(MVT::i64);
435   }
436 
437   bool isSSrcF32() const {
438     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
439   }
440 
441   bool isSSrcF64() const {
442     return isSCSrcB64() || isLiteralImm(MVT::f64);
443   }
444 
445   bool isSSrcF16() const {
446     return isSCSrcB16() || isLiteralImm(MVT::f16);
447   }
448 
449   bool isSSrcV2F16() const {
450     llvm_unreachable("cannot happen");
451     return isSSrcF16();
452   }
453 
454   bool isSSrcV2FP32() const {
455     llvm_unreachable("cannot happen");
456     return isSSrcF32();
457   }
458 
459   bool isSCSrcV2FP32() const {
460     llvm_unreachable("cannot happen");
461     return isSCSrcF32();
462   }
463 
464   bool isSSrcV2INT32() const {
465     llvm_unreachable("cannot happen");
466     return isSSrcB32();
467   }
468 
469   bool isSCSrcV2INT32() const {
470     llvm_unreachable("cannot happen");
471     return isSCSrcB32();
472   }
473 
474   bool isSSrcOrLdsB32() const {
475     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
476            isLiteralImm(MVT::i32) || isExpr();
477   }
478 
479   bool isVCSrcB32() const {
480     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
481   }
482 
483   bool isVCSrcB64() const {
484     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
485   }
486 
487   bool isVCSrcB16() const {
488     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
489   }
490 
491   bool isVCSrcV2B16() const {
492     return isVCSrcB16();
493   }
494 
495   bool isVCSrcF32() const {
496     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
497   }
498 
499   bool isVCSrcF64() const {
500     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
501   }
502 
503   bool isVCSrcF16() const {
504     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
505   }
506 
507   bool isVCSrcV2F16() const {
508     return isVCSrcF16();
509   }
510 
511   bool isVSrcB32() const {
512     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
513   }
514 
515   bool isVSrcB64() const {
516     return isVCSrcF64() || isLiteralImm(MVT::i64);
517   }
518 
519   bool isVSrcB16() const {
520     return isVCSrcB16() || isLiteralImm(MVT::i16);
521   }
522 
523   bool isVSrcV2B16() const {
524     return isVSrcB16() || isLiteralImm(MVT::v2i16);
525   }
526 
527   bool isVCSrcV2FP32() const {
528     return isVCSrcF64();
529   }
530 
531   bool isVSrcV2FP32() const {
532     return isVSrcF64() || isLiteralImm(MVT::v2f32);
533   }
534 
535   bool isVCSrcV2INT32() const {
536     return isVCSrcB64();
537   }
538 
539   bool isVSrcV2INT32() const {
540     return isVSrcB64() || isLiteralImm(MVT::v2i32);
541   }
542 
543   bool isVSrcF32() const {
544     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
545   }
546 
547   bool isVSrcF64() const {
548     return isVCSrcF64() || isLiteralImm(MVT::f64);
549   }
550 
551   bool isVSrcF16() const {
552     return isVCSrcF16() || isLiteralImm(MVT::f16);
553   }
554 
555   bool isVSrcV2F16() const {
556     return isVSrcF16() || isLiteralImm(MVT::v2f16);
557   }
558 
559   bool isVISrcB32() const {
560     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
561   }
562 
563   bool isVISrcB16() const {
564     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
565   }
566 
567   bool isVISrcV2B16() const {
568     return isVISrcB16();
569   }
570 
571   bool isVISrcF32() const {
572     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
573   }
574 
575   bool isVISrcF16() const {
576     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
577   }
578 
579   bool isVISrcV2F16() const {
580     return isVISrcF16() || isVISrcB32();
581   }
582 
583   bool isVISrc_64B64() const {
584     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
585   }
586 
587   bool isVISrc_64F64() const {
588     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
589   }
590 
591   bool isVISrc_64V2FP32() const {
592     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
593   }
594 
595   bool isVISrc_64V2INT32() const {
596     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
597   }
598 
599   bool isVISrc_256B64() const {
600     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
601   }
602 
603   bool isVISrc_256F64() const {
604     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
605   }
606 
607   bool isVISrc_128B16() const {
608     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
609   }
610 
611   bool isVISrc_128V2B16() const {
612     return isVISrc_128B16();
613   }
614 
615   bool isVISrc_128B32() const {
616     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
617   }
618 
619   bool isVISrc_128F32() const {
620     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
621   }
622 
623   bool isVISrc_256V2FP32() const {
624     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
625   }
626 
627   bool isVISrc_256V2INT32() const {
628     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
629   }
630 
631   bool isVISrc_512B32() const {
632     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
633   }
634 
635   bool isVISrc_512B16() const {
636     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
637   }
638 
639   bool isVISrc_512V2B16() const {
640     return isVISrc_512B16();
641   }
642 
643   bool isVISrc_512F32() const {
644     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
645   }
646 
647   bool isVISrc_512F16() const {
648     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
649   }
650 
651   bool isVISrc_512V2F16() const {
652     return isVISrc_512F16() || isVISrc_512B32();
653   }
654 
655   bool isVISrc_1024B32() const {
656     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
657   }
658 
659   bool isVISrc_1024B16() const {
660     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
661   }
662 
663   bool isVISrc_1024V2B16() const {
664     return isVISrc_1024B16();
665   }
666 
667   bool isVISrc_1024F32() const {
668     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
669   }
670 
671   bool isVISrc_1024F16() const {
672     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
673   }
674 
675   bool isVISrc_1024V2F16() const {
676     return isVISrc_1024F16() || isVISrc_1024B32();
677   }
678 
679   bool isAISrcB32() const {
680     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
681   }
682 
683   bool isAISrcB16() const {
684     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
685   }
686 
687   bool isAISrcV2B16() const {
688     return isAISrcB16();
689   }
690 
691   bool isAISrcF32() const {
692     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
693   }
694 
695   bool isAISrcF16() const {
696     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
697   }
698 
699   bool isAISrcV2F16() const {
700     return isAISrcF16() || isAISrcB32();
701   }
702 
703   bool isAISrc_64B64() const {
704     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
705   }
706 
707   bool isAISrc_64F64() const {
708     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
709   }
710 
711   bool isAISrc_128B32() const {
712     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
713   }
714 
715   bool isAISrc_128B16() const {
716     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
717   }
718 
719   bool isAISrc_128V2B16() const {
720     return isAISrc_128B16();
721   }
722 
723   bool isAISrc_128F32() const {
724     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
725   }
726 
727   bool isAISrc_128F16() const {
728     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
729   }
730 
731   bool isAISrc_128V2F16() const {
732     return isAISrc_128F16() || isAISrc_128B32();
733   }
734 
735   bool isVISrc_128F16() const {
736     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
737   }
738 
739   bool isVISrc_128V2F16() const {
740     return isVISrc_128F16() || isVISrc_128B32();
741   }
742 
743   bool isAISrc_256B64() const {
744     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
745   }
746 
747   bool isAISrc_256F64() const {
748     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
749   }
750 
751   bool isAISrc_512B32() const {
752     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
753   }
754 
755   bool isAISrc_512B16() const {
756     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
757   }
758 
759   bool isAISrc_512V2B16() const {
760     return isAISrc_512B16();
761   }
762 
763   bool isAISrc_512F32() const {
764     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
765   }
766 
767   bool isAISrc_512F16() const {
768     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
769   }
770 
771   bool isAISrc_512V2F16() const {
772     return isAISrc_512F16() || isAISrc_512B32();
773   }
774 
775   bool isAISrc_1024B32() const {
776     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
777   }
778 
779   bool isAISrc_1024B16() const {
780     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
781   }
782 
783   bool isAISrc_1024V2B16() const {
784     return isAISrc_1024B16();
785   }
786 
787   bool isAISrc_1024F32() const {
788     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
789   }
790 
791   bool isAISrc_1024F16() const {
792     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
793   }
794 
795   bool isAISrc_1024V2F16() const {
796     return isAISrc_1024F16() || isAISrc_1024B32();
797   }
798 
799   bool isKImmFP32() const {
800     return isLiteralImm(MVT::f32);
801   }
802 
803   bool isKImmFP16() const {
804     return isLiteralImm(MVT::f16);
805   }
806 
807   bool isMem() const override {
808     return false;
809   }
810 
811   bool isExpr() const {
812     return Kind == Expression;
813   }
814 
815   bool isSoppBrTarget() const {
816     return isExpr() || isImm();
817   }
818 
819   bool isSWaitCnt() const;
820   bool isHwreg() const;
821   bool isSendMsg() const;
822   bool isSwizzle() const;
823   bool isSMRDOffset8() const;
824   bool isSMEMOffset() const;
825   bool isSMRDLiteralOffset() const;
826   bool isDPP8() const;
827   bool isDPPCtrl() const;
828   bool isBLGP() const;
829   bool isCBSZ() const;
830   bool isABID() const;
831   bool isGPRIdxMode() const;
832   bool isS16Imm() const;
833   bool isU16Imm() const;
834   bool isEndpgm() const;
835 
836   StringRef getExpressionAsToken() const {
837     assert(isExpr());
838     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
839     return S->getSymbol().getName();
840   }
841 
842   StringRef getToken() const {
843     assert(isToken());
844 
845     if (Kind == Expression)
846       return getExpressionAsToken();
847 
848     return StringRef(Tok.Data, Tok.Length);
849   }
850 
851   int64_t getImm() const {
852     assert(isImm());
853     return Imm.Val;
854   }
855 
856   void setImm(int64_t Val) {
857     assert(isImm());
858     Imm.Val = Val;
859   }
860 
861   ImmTy getImmTy() const {
862     assert(isImm());
863     return Imm.Type;
864   }
865 
866   unsigned getReg() const override {
867     assert(isRegKind());
868     return Reg.RegNo;
869   }
870 
871   SMLoc getStartLoc() const override {
872     return StartLoc;
873   }
874 
875   SMLoc getEndLoc() const override {
876     return EndLoc;
877   }
878 
879   SMRange getLocRange() const {
880     return SMRange(StartLoc, EndLoc);
881   }
882 
883   Modifiers getModifiers() const {
884     assert(isRegKind() || isImmTy(ImmTyNone));
885     return isRegKind() ? Reg.Mods : Imm.Mods;
886   }
887 
888   void setModifiers(Modifiers Mods) {
889     assert(isRegKind() || isImmTy(ImmTyNone));
890     if (isRegKind())
891       Reg.Mods = Mods;
892     else
893       Imm.Mods = Mods;
894   }
895 
896   bool hasModifiers() const {
897     return getModifiers().hasModifiers();
898   }
899 
900   bool hasFPModifiers() const {
901     return getModifiers().hasFPModifiers();
902   }
903 
904   bool hasIntModifiers() const {
905     return getModifiers().hasIntModifiers();
906   }
907 
908   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
909 
910   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
911 
912   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
913 
914   template <unsigned Bitwidth>
915   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
916 
917   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
918     addKImmFPOperands<16>(Inst, N);
919   }
920 
921   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
922     addKImmFPOperands<32>(Inst, N);
923   }
924 
925   void addRegOperands(MCInst &Inst, unsigned N) const;
926 
927   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
928     addRegOperands(Inst, N);
929   }
930 
931   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
932     if (isRegKind())
933       addRegOperands(Inst, N);
934     else if (isExpr())
935       Inst.addOperand(MCOperand::createExpr(Expr));
936     else
937       addImmOperands(Inst, N);
938   }
939 
940   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
941     Modifiers Mods = getModifiers();
942     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
943     if (isRegKind()) {
944       addRegOperands(Inst, N);
945     } else {
946       addImmOperands(Inst, N, false);
947     }
948   }
949 
950   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
951     assert(!hasIntModifiers());
952     addRegOrImmWithInputModsOperands(Inst, N);
953   }
954 
955   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
956     assert(!hasFPModifiers());
957     addRegOrImmWithInputModsOperands(Inst, N);
958   }
959 
960   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
961     Modifiers Mods = getModifiers();
962     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
963     assert(isRegKind());
964     addRegOperands(Inst, N);
965   }
966 
967   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
968     assert(!hasIntModifiers());
969     addRegWithInputModsOperands(Inst, N);
970   }
971 
972   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
973     assert(!hasFPModifiers());
974     addRegWithInputModsOperands(Inst, N);
975   }
976 
977   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
978     if (isImm())
979       addImmOperands(Inst, N);
980     else {
981       assert(isExpr());
982       Inst.addOperand(MCOperand::createExpr(Expr));
983     }
984   }
985 
986   static void printImmTy(raw_ostream& OS, ImmTy Type) {
987     switch (Type) {
988     case ImmTyNone: OS << "None"; break;
989     case ImmTyGDS: OS << "GDS"; break;
990     case ImmTyLDS: OS << "LDS"; break;
991     case ImmTyOffen: OS << "Offen"; break;
992     case ImmTyIdxen: OS << "Idxen"; break;
993     case ImmTyAddr64: OS << "Addr64"; break;
994     case ImmTyOffset: OS << "Offset"; break;
995     case ImmTyInstOffset: OS << "InstOffset"; break;
996     case ImmTyOffset0: OS << "Offset0"; break;
997     case ImmTyOffset1: OS << "Offset1"; break;
998     case ImmTyCPol: OS << "CPol"; break;
999     case ImmTySWZ: OS << "SWZ"; break;
1000     case ImmTyTFE: OS << "TFE"; break;
1001     case ImmTyD16: OS << "D16"; break;
1002     case ImmTyFORMAT: OS << "FORMAT"; break;
1003     case ImmTyClampSI: OS << "ClampSI"; break;
1004     case ImmTyOModSI: OS << "OModSI"; break;
1005     case ImmTyDPP8: OS << "DPP8"; break;
1006     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1007     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1008     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1009     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1010     case ImmTyDppFi: OS << "FI"; break;
1011     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1012     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1013     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1014     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1015     case ImmTyDMask: OS << "DMask"; break;
1016     case ImmTyDim: OS << "Dim"; break;
1017     case ImmTyUNorm: OS << "UNorm"; break;
1018     case ImmTyDA: OS << "DA"; break;
1019     case ImmTyR128A16: OS << "R128A16"; break;
1020     case ImmTyA16: OS << "A16"; break;
1021     case ImmTyLWE: OS << "LWE"; break;
1022     case ImmTyOff: OS << "Off"; break;
1023     case ImmTyExpTgt: OS << "ExpTgt"; break;
1024     case ImmTyExpCompr: OS << "ExpCompr"; break;
1025     case ImmTyExpVM: OS << "ExpVM"; break;
1026     case ImmTyHwreg: OS << "Hwreg"; break;
1027     case ImmTySendMsg: OS << "SendMsg"; break;
1028     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1029     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1030     case ImmTyAttrChan: OS << "AttrChan"; break;
1031     case ImmTyOpSel: OS << "OpSel"; break;
1032     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1033     case ImmTyNegLo: OS << "NegLo"; break;
1034     case ImmTyNegHi: OS << "NegHi"; break;
1035     case ImmTySwizzle: OS << "Swizzle"; break;
1036     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1037     case ImmTyHigh: OS << "High"; break;
1038     case ImmTyBLGP: OS << "BLGP"; break;
1039     case ImmTyCBSZ: OS << "CBSZ"; break;
1040     case ImmTyABID: OS << "ABID"; break;
1041     case ImmTyEndpgm: OS << "Endpgm"; break;
1042     }
1043   }
1044 
1045   void print(raw_ostream &OS) const override {
1046     switch (Kind) {
1047     case Register:
1048       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1049       break;
1050     case Immediate:
1051       OS << '<' << getImm();
1052       if (getImmTy() != ImmTyNone) {
1053         OS << " type: "; printImmTy(OS, getImmTy());
1054       }
1055       OS << " mods: " << Imm.Mods << '>';
1056       break;
1057     case Token:
1058       OS << '\'' << getToken() << '\'';
1059       break;
1060     case Expression:
1061       OS << "<expr " << *Expr << '>';
1062       break;
1063     }
1064   }
1065 
1066   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1067                                       int64_t Val, SMLoc Loc,
1068                                       ImmTy Type = ImmTyNone,
1069                                       bool IsFPImm = false) {
1070     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1071     Op->Imm.Val = Val;
1072     Op->Imm.IsFPImm = IsFPImm;
1073     Op->Imm.Kind = ImmKindTyNone;
1074     Op->Imm.Type = Type;
1075     Op->Imm.Mods = Modifiers();
1076     Op->StartLoc = Loc;
1077     Op->EndLoc = Loc;
1078     return Op;
1079   }
1080 
1081   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1082                                         StringRef Str, SMLoc Loc,
1083                                         bool HasExplicitEncodingSize = true) {
1084     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1085     Res->Tok.Data = Str.data();
1086     Res->Tok.Length = Str.size();
1087     Res->StartLoc = Loc;
1088     Res->EndLoc = Loc;
1089     return Res;
1090   }
1091 
1092   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1093                                       unsigned RegNo, SMLoc S,
1094                                       SMLoc E) {
1095     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1096     Op->Reg.RegNo = RegNo;
1097     Op->Reg.Mods = Modifiers();
1098     Op->StartLoc = S;
1099     Op->EndLoc = E;
1100     return Op;
1101   }
1102 
1103   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1104                                        const class MCExpr *Expr, SMLoc S) {
1105     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1106     Op->Expr = Expr;
1107     Op->StartLoc = S;
1108     Op->EndLoc = S;
1109     return Op;
1110   }
1111 };
1112 
1113 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1114   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1115   return OS;
1116 }
1117 
1118 //===----------------------------------------------------------------------===//
1119 // AsmParser
1120 //===----------------------------------------------------------------------===//
1121 
1122 // Holds info related to the current kernel, e.g. count of SGPRs used.
1123 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1124 // .amdgpu_hsa_kernel or at EOF.
1125 class KernelScopeInfo {
1126   int SgprIndexUnusedMin = -1;
1127   int VgprIndexUnusedMin = -1;
1128   int AgprIndexUnusedMin = -1;
1129   MCContext *Ctx = nullptr;
1130   MCSubtargetInfo const *MSTI = nullptr;
1131 
1132   void usesSgprAt(int i) {
1133     if (i >= SgprIndexUnusedMin) {
1134       SgprIndexUnusedMin = ++i;
1135       if (Ctx) {
1136         MCSymbol* const Sym =
1137           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1138         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1139       }
1140     }
1141   }
1142 
1143   void usesVgprAt(int i) {
1144     if (i >= VgprIndexUnusedMin) {
1145       VgprIndexUnusedMin = ++i;
1146       if (Ctx) {
1147         MCSymbol* const Sym =
1148           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1149         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1150                                          VgprIndexUnusedMin);
1151         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1152       }
1153     }
1154   }
1155 
1156   void usesAgprAt(int i) {
1157     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1158     if (!hasMAIInsts(*MSTI))
1159       return;
1160 
1161     if (i >= AgprIndexUnusedMin) {
1162       AgprIndexUnusedMin = ++i;
1163       if (Ctx) {
1164         MCSymbol* const Sym =
1165           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1166         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1167 
1168         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1169         MCSymbol* const vSym =
1170           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1171         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1172                                          VgprIndexUnusedMin);
1173         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1174       }
1175     }
1176   }
1177 
1178 public:
1179   KernelScopeInfo() = default;
1180 
1181   void initialize(MCContext &Context) {
1182     Ctx = &Context;
1183     MSTI = Ctx->getSubtargetInfo();
1184 
1185     usesSgprAt(SgprIndexUnusedMin = -1);
1186     usesVgprAt(VgprIndexUnusedMin = -1);
1187     if (hasMAIInsts(*MSTI)) {
1188       usesAgprAt(AgprIndexUnusedMin = -1);
1189     }
1190   }
1191 
1192   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1193     switch (RegKind) {
1194       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1195       case IS_AGPR: usesAgprAt(DwordRegIndex + RegWidth - 1); break;
1196       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1197       default: break;
1198     }
1199   }
1200 };
1201 
1202 class AMDGPUAsmParser : public MCTargetAsmParser {
1203   MCAsmParser &Parser;
1204 
1205   // Number of extra operands parsed after the first optional operand.
1206   // This may be necessary to skip hardcoded mandatory operands.
1207   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1208 
1209   unsigned ForcedEncodingSize = 0;
1210   bool ForcedDPP = false;
1211   bool ForcedSDWA = false;
1212   KernelScopeInfo KernelScope;
1213   unsigned CPolSeen;
1214 
1215   /// @name Auto-generated Match Functions
1216   /// {
1217 
1218 #define GET_ASSEMBLER_HEADER
1219 #include "AMDGPUGenAsmMatcher.inc"
1220 
1221   /// }
1222 
1223 private:
1224   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1225   bool OutOfRangeError(SMRange Range);
1226   /// Calculate VGPR/SGPR blocks required for given target, reserved
1227   /// registers, and user-specified NextFreeXGPR values.
1228   ///
1229   /// \param Features [in] Target features, used for bug corrections.
1230   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1231   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1232   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1233   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1234   /// descriptor field, if valid.
1235   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1236   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1237   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1238   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1239   /// \param VGPRBlocks [out] Result VGPR block count.
1240   /// \param SGPRBlocks [out] Result SGPR block count.
1241   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1242                           bool FlatScrUsed, bool XNACKUsed,
1243                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1244                           SMRange VGPRRange, unsigned NextFreeSGPR,
1245                           SMRange SGPRRange, unsigned &VGPRBlocks,
1246                           unsigned &SGPRBlocks);
1247   bool ParseDirectiveAMDGCNTarget();
1248   bool ParseDirectiveAMDHSAKernel();
1249   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1250   bool ParseDirectiveHSACodeObjectVersion();
1251   bool ParseDirectiveHSACodeObjectISA();
1252   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1253   bool ParseDirectiveAMDKernelCodeT();
1254   // TODO: Possibly make subtargetHasRegister const.
1255   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1256   bool ParseDirectiveAMDGPUHsaKernel();
1257 
1258   bool ParseDirectiveISAVersion();
1259   bool ParseDirectiveHSAMetadata();
1260   bool ParseDirectivePALMetadataBegin();
1261   bool ParseDirectivePALMetadata();
1262   bool ParseDirectiveAMDGPULDS();
1263 
1264   /// Common code to parse out a block of text (typically YAML) between start and
1265   /// end directives.
1266   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1267                            const char *AssemblerDirectiveEnd,
1268                            std::string &CollectString);
1269 
1270   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1271                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1272   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1273                            unsigned &RegNum, unsigned &RegWidth,
1274                            bool RestoreOnFailure = false);
1275   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1276                            unsigned &RegNum, unsigned &RegWidth,
1277                            SmallVectorImpl<AsmToken> &Tokens);
1278   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1279                            unsigned &RegWidth,
1280                            SmallVectorImpl<AsmToken> &Tokens);
1281   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1282                            unsigned &RegWidth,
1283                            SmallVectorImpl<AsmToken> &Tokens);
1284   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1285                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1286   bool ParseRegRange(unsigned& Num, unsigned& Width);
1287   unsigned getRegularReg(RegisterKind RegKind,
1288                          unsigned RegNum,
1289                          unsigned RegWidth,
1290                          SMLoc Loc);
1291 
1292   bool isRegister();
1293   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1294   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1295   void initializeGprCountSymbol(RegisterKind RegKind);
1296   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1297                              unsigned RegWidth);
1298   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1299                     bool IsAtomic, bool IsLds = false);
1300   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1301                  bool IsGdsHardcoded);
1302 
1303 public:
1304   enum AMDGPUMatchResultTy {
1305     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1306   };
1307   enum OperandMode {
1308     OperandMode_Default,
1309     OperandMode_NSA,
1310   };
1311 
1312   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1313 
1314   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1315                const MCInstrInfo &MII,
1316                const MCTargetOptions &Options)
1317       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1318     MCAsmParserExtension::Initialize(Parser);
1319 
1320     if (getFeatureBits().none()) {
1321       // Set default features.
1322       copySTI().ToggleFeature("southern-islands");
1323     }
1324 
1325     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1326 
1327     {
1328       // TODO: make those pre-defined variables read-only.
1329       // Currently there is none suitable machinery in the core llvm-mc for this.
1330       // MCSymbol::isRedefinable is intended for another purpose, and
1331       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1332       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1333       MCContext &Ctx = getContext();
1334       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1335         MCSymbol *Sym =
1336             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1337         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1338         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1339         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1340         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1341         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1342       } else {
1343         MCSymbol *Sym =
1344             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1345         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1346         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1347         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1348         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1349         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1350       }
1351       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1352         initializeGprCountSymbol(IS_VGPR);
1353         initializeGprCountSymbol(IS_SGPR);
1354       } else
1355         KernelScope.initialize(getContext());
1356     }
1357   }
1358 
1359   bool hasMIMG_R128() const {
1360     return AMDGPU::hasMIMG_R128(getSTI());
1361   }
1362 
1363   bool hasPackedD16() const {
1364     return AMDGPU::hasPackedD16(getSTI());
1365   }
1366 
1367   bool hasGFX10A16() const {
1368     return AMDGPU::hasGFX10A16(getSTI());
1369   }
1370 
1371   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1372 
1373   bool isSI() const {
1374     return AMDGPU::isSI(getSTI());
1375   }
1376 
1377   bool isCI() const {
1378     return AMDGPU::isCI(getSTI());
1379   }
1380 
1381   bool isVI() const {
1382     return AMDGPU::isVI(getSTI());
1383   }
1384 
1385   bool isGFX9() const {
1386     return AMDGPU::isGFX9(getSTI());
1387   }
1388 
1389   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1390   bool isGFX90A() const {
1391     return AMDGPU::isGFX90A(getSTI());
1392   }
1393 
1394   bool isGFX940() const {
1395     return AMDGPU::isGFX940(getSTI());
1396   }
1397 
1398   bool isGFX9Plus() const {
1399     return AMDGPU::isGFX9Plus(getSTI());
1400   }
1401 
1402   bool isGFX10() const {
1403     return AMDGPU::isGFX10(getSTI());
1404   }
1405 
1406   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1407 
1408   bool isGFX10_BEncoding() const {
1409     return AMDGPU::isGFX10_BEncoding(getSTI());
1410   }
1411 
1412   bool hasInv2PiInlineImm() const {
1413     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1414   }
1415 
1416   bool hasFlatOffsets() const {
1417     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1418   }
1419 
1420   bool hasArchitectedFlatScratch() const {
1421     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1422   }
1423 
1424   bool hasSGPR102_SGPR103() const {
1425     return !isVI() && !isGFX9();
1426   }
1427 
1428   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1429 
1430   bool hasIntClamp() const {
1431     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1432   }
1433 
1434   AMDGPUTargetStreamer &getTargetStreamer() {
1435     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1436     return static_cast<AMDGPUTargetStreamer &>(TS);
1437   }
1438 
1439   const MCRegisterInfo *getMRI() const {
1440     // We need this const_cast because for some reason getContext() is not const
1441     // in MCAsmParser.
1442     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1443   }
1444 
1445   const MCInstrInfo *getMII() const {
1446     return &MII;
1447   }
1448 
1449   const FeatureBitset &getFeatureBits() const {
1450     return getSTI().getFeatureBits();
1451   }
1452 
1453   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1454   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1455   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1456 
1457   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1458   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1459   bool isForcedDPP() const { return ForcedDPP; }
1460   bool isForcedSDWA() const { return ForcedSDWA; }
1461   ArrayRef<unsigned> getMatchedVariants() const;
1462   StringRef getMatchedVariantName() const;
1463 
1464   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1465   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1466                      bool RestoreOnFailure);
1467   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1468   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1469                                         SMLoc &EndLoc) override;
1470   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1471   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1472                                       unsigned Kind) override;
1473   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1474                                OperandVector &Operands, MCStreamer &Out,
1475                                uint64_t &ErrorInfo,
1476                                bool MatchingInlineAsm) override;
1477   bool ParseDirective(AsmToken DirectiveID) override;
1478   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1479                                     OperandMode Mode = OperandMode_Default);
1480   StringRef parseMnemonicSuffix(StringRef Name);
1481   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1482                         SMLoc NameLoc, OperandVector &Operands) override;
1483   //bool ProcessInstruction(MCInst &Inst);
1484 
1485   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1486 
1487   OperandMatchResultTy
1488   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1489                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1490                      bool (*ConvertResult)(int64_t &) = nullptr);
1491 
1492   OperandMatchResultTy
1493   parseOperandArrayWithPrefix(const char *Prefix,
1494                               OperandVector &Operands,
1495                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1496                               bool (*ConvertResult)(int64_t&) = nullptr);
1497 
1498   OperandMatchResultTy
1499   parseNamedBit(StringRef Name, OperandVector &Operands,
1500                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1501   OperandMatchResultTy parseCPol(OperandVector &Operands);
1502   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1503                                              StringRef &Value,
1504                                              SMLoc &StringLoc);
1505 
1506   bool isModifier();
1507   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1508   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1509   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1510   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1511   bool parseSP3NegModifier();
1512   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1513   OperandMatchResultTy parseReg(OperandVector &Operands);
1514   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1515   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1516   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1517   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1518   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1519   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1520   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1521   OperandMatchResultTy parseUfmt(int64_t &Format);
1522   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1523   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1524   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1525   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1526   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1527   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1528   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1529 
1530   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1531   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1532   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1533   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1534 
1535   bool parseCnt(int64_t &IntVal);
1536   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1537   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1538 
1539 private:
1540   struct OperandInfoTy {
1541     SMLoc Loc;
1542     int64_t Id;
1543     bool IsSymbolic = false;
1544     bool IsDefined = false;
1545 
1546     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1547   };
1548 
1549   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1550   bool validateSendMsg(const OperandInfoTy &Msg,
1551                        const OperandInfoTy &Op,
1552                        const OperandInfoTy &Stream);
1553 
1554   bool parseHwregBody(OperandInfoTy &HwReg,
1555                       OperandInfoTy &Offset,
1556                       OperandInfoTy &Width);
1557   bool validateHwreg(const OperandInfoTy &HwReg,
1558                      const OperandInfoTy &Offset,
1559                      const OperandInfoTy &Width);
1560 
1561   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1562   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1563 
1564   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1565                       const OperandVector &Operands) const;
1566   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1567   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1568   SMLoc getLitLoc(const OperandVector &Operands) const;
1569   SMLoc getConstLoc(const OperandVector &Operands) const;
1570 
1571   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1572   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1573   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1574   bool validateSOPLiteral(const MCInst &Inst) const;
1575   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1576   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1577   bool validateIntClampSupported(const MCInst &Inst);
1578   bool validateMIMGAtomicDMask(const MCInst &Inst);
1579   bool validateMIMGGatherDMask(const MCInst &Inst);
1580   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1581   bool validateMIMGDataSize(const MCInst &Inst);
1582   bool validateMIMGAddrSize(const MCInst &Inst);
1583   bool validateMIMGD16(const MCInst &Inst);
1584   bool validateMIMGDim(const MCInst &Inst);
1585   bool validateMIMGMSAA(const MCInst &Inst);
1586   bool validateOpSel(const MCInst &Inst);
1587   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1588   bool validateVccOperand(unsigned Reg) const;
1589   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1590   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1591   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1592   bool validateAGPRLdSt(const MCInst &Inst) const;
1593   bool validateVGPRAlign(const MCInst &Inst) const;
1594   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1595   bool validateDivScale(const MCInst &Inst);
1596   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1597                              const SMLoc &IDLoc);
1598   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1599   unsigned getConstantBusLimit(unsigned Opcode) const;
1600   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1601   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1602   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1603 
1604   bool isSupportedMnemo(StringRef Mnemo,
1605                         const FeatureBitset &FBS);
1606   bool isSupportedMnemo(StringRef Mnemo,
1607                         const FeatureBitset &FBS,
1608                         ArrayRef<unsigned> Variants);
1609   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1610 
1611   bool isId(const StringRef Id) const;
1612   bool isId(const AsmToken &Token, const StringRef Id) const;
1613   bool isToken(const AsmToken::TokenKind Kind) const;
1614   bool trySkipId(const StringRef Id);
1615   bool trySkipId(const StringRef Pref, const StringRef Id);
1616   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1617   bool trySkipToken(const AsmToken::TokenKind Kind);
1618   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1619   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1620   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1621 
1622   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1623   AsmToken::TokenKind getTokenKind() const;
1624   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1625   bool parseExpr(OperandVector &Operands);
1626   StringRef getTokenStr() const;
1627   AsmToken peekToken();
1628   AsmToken getToken() const;
1629   SMLoc getLoc() const;
1630   void lex();
1631 
1632 public:
1633   void onBeginOfFile() override;
1634 
1635   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1636   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1637 
1638   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1639   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1640   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1641   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1642   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1643   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1644 
1645   bool parseSwizzleOperand(int64_t &Op,
1646                            const unsigned MinVal,
1647                            const unsigned MaxVal,
1648                            const StringRef ErrMsg,
1649                            SMLoc &Loc);
1650   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1651                             const unsigned MinVal,
1652                             const unsigned MaxVal,
1653                             const StringRef ErrMsg);
1654   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1655   bool parseSwizzleOffset(int64_t &Imm);
1656   bool parseSwizzleMacro(int64_t &Imm);
1657   bool parseSwizzleQuadPerm(int64_t &Imm);
1658   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1659   bool parseSwizzleBroadcast(int64_t &Imm);
1660   bool parseSwizzleSwap(int64_t &Imm);
1661   bool parseSwizzleReverse(int64_t &Imm);
1662 
1663   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1664   int64_t parseGPRIdxMacro();
1665 
1666   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1667   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1668   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1669   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1670 
1671   AMDGPUOperand::Ptr defaultCPol() const;
1672 
1673   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1674   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1675   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1676   AMDGPUOperand::Ptr defaultFlatOffset() const;
1677 
1678   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1679 
1680   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1681                OptionalImmIndexMap &OptionalIdx);
1682   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1683   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1684   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1685   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1686                 OptionalImmIndexMap &OptionalIdx);
1687 
1688   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1689 
1690   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1691                bool IsAtomic = false);
1692   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1693   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1694 
1695   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1696 
1697   bool parseDimId(unsigned &Encoding);
1698   OperandMatchResultTy parseDim(OperandVector &Operands);
1699   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1700   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1701   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1702   int64_t parseDPPCtrlSel(StringRef Ctrl);
1703   int64_t parseDPPCtrlPerm();
1704   AMDGPUOperand::Ptr defaultRowMask() const;
1705   AMDGPUOperand::Ptr defaultBankMask() const;
1706   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1707   AMDGPUOperand::Ptr defaultFI() const;
1708   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1709   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1710 
1711   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1712                                     AMDGPUOperand::ImmTy Type);
1713   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1714   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1715   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1716   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1717   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1718   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1719   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1720                uint64_t BasicInstType,
1721                bool SkipDstVcc = false,
1722                bool SkipSrcVcc = false);
1723 
1724   AMDGPUOperand::Ptr defaultBLGP() const;
1725   AMDGPUOperand::Ptr defaultCBSZ() const;
1726   AMDGPUOperand::Ptr defaultABID() const;
1727 
1728   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1729   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1730 };
1731 
1732 struct OptionalOperand {
1733   const char *Name;
1734   AMDGPUOperand::ImmTy Type;
1735   bool IsBit;
1736   bool (*ConvertResult)(int64_t&);
1737 };
1738 
1739 } // end anonymous namespace
1740 
1741 // May be called with integer type with equivalent bitwidth.
1742 static const fltSemantics *getFltSemantics(unsigned Size) {
1743   switch (Size) {
1744   case 4:
1745     return &APFloat::IEEEsingle();
1746   case 8:
1747     return &APFloat::IEEEdouble();
1748   case 2:
1749     return &APFloat::IEEEhalf();
1750   default:
1751     llvm_unreachable("unsupported fp type");
1752   }
1753 }
1754 
1755 static const fltSemantics *getFltSemantics(MVT VT) {
1756   return getFltSemantics(VT.getSizeInBits() / 8);
1757 }
1758 
1759 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1760   switch (OperandType) {
1761   case AMDGPU::OPERAND_REG_IMM_INT32:
1762   case AMDGPU::OPERAND_REG_IMM_FP32:
1763   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1764   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1765   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1766   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1767   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1768   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1769   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1770   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1771   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1772   case AMDGPU::OPERAND_KIMM32:
1773     return &APFloat::IEEEsingle();
1774   case AMDGPU::OPERAND_REG_IMM_INT64:
1775   case AMDGPU::OPERAND_REG_IMM_FP64:
1776   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1777   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1778   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1779     return &APFloat::IEEEdouble();
1780   case AMDGPU::OPERAND_REG_IMM_INT16:
1781   case AMDGPU::OPERAND_REG_IMM_FP16:
1782   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1783   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1784   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1785   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1786   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1787   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1788   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1789   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1790   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1791   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1792   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1793   case AMDGPU::OPERAND_KIMM16:
1794     return &APFloat::IEEEhalf();
1795   default:
1796     llvm_unreachable("unsupported fp type");
1797   }
1798 }
1799 
1800 //===----------------------------------------------------------------------===//
1801 // Operand
1802 //===----------------------------------------------------------------------===//
1803 
1804 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1805   bool Lost;
1806 
1807   // Convert literal to single precision
1808   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1809                                                APFloat::rmNearestTiesToEven,
1810                                                &Lost);
1811   // We allow precision lost but not overflow or underflow
1812   if (Status != APFloat::opOK &&
1813       Lost &&
1814       ((Status & APFloat::opOverflow)  != 0 ||
1815        (Status & APFloat::opUnderflow) != 0)) {
1816     return false;
1817   }
1818 
1819   return true;
1820 }
1821 
1822 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1823   return isUIntN(Size, Val) || isIntN(Size, Val);
1824 }
1825 
1826 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1827   if (VT.getScalarType() == MVT::i16) {
1828     // FP immediate values are broken.
1829     return isInlinableIntLiteral(Val);
1830   }
1831 
1832   // f16/v2f16 operands work correctly for all values.
1833   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1834 }
1835 
1836 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1837 
1838   // This is a hack to enable named inline values like
1839   // shared_base with both 32-bit and 64-bit operands.
1840   // Note that these values are defined as
1841   // 32-bit operands only.
1842   if (isInlineValue()) {
1843     return true;
1844   }
1845 
1846   if (!isImmTy(ImmTyNone)) {
1847     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1848     return false;
1849   }
1850   // TODO: We should avoid using host float here. It would be better to
1851   // check the float bit values which is what a few other places do.
1852   // We've had bot failures before due to weird NaN support on mips hosts.
1853 
1854   APInt Literal(64, Imm.Val);
1855 
1856   if (Imm.IsFPImm) { // We got fp literal token
1857     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1858       return AMDGPU::isInlinableLiteral64(Imm.Val,
1859                                           AsmParser->hasInv2PiInlineImm());
1860     }
1861 
1862     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1863     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1864       return false;
1865 
1866     if (type.getScalarSizeInBits() == 16) {
1867       return isInlineableLiteralOp16(
1868         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1869         type, AsmParser->hasInv2PiInlineImm());
1870     }
1871 
1872     // Check if single precision literal is inlinable
1873     return AMDGPU::isInlinableLiteral32(
1874       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1875       AsmParser->hasInv2PiInlineImm());
1876   }
1877 
1878   // We got int literal token.
1879   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1880     return AMDGPU::isInlinableLiteral64(Imm.Val,
1881                                         AsmParser->hasInv2PiInlineImm());
1882   }
1883 
1884   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1885     return false;
1886   }
1887 
1888   if (type.getScalarSizeInBits() == 16) {
1889     return isInlineableLiteralOp16(
1890       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1891       type, AsmParser->hasInv2PiInlineImm());
1892   }
1893 
1894   return AMDGPU::isInlinableLiteral32(
1895     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1896     AsmParser->hasInv2PiInlineImm());
1897 }
1898 
1899 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1900   // Check that this immediate can be added as literal
1901   if (!isImmTy(ImmTyNone)) {
1902     return false;
1903   }
1904 
1905   if (!Imm.IsFPImm) {
1906     // We got int literal token.
1907 
1908     if (type == MVT::f64 && hasFPModifiers()) {
1909       // Cannot apply fp modifiers to int literals preserving the same semantics
1910       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1911       // disable these cases.
1912       return false;
1913     }
1914 
1915     unsigned Size = type.getSizeInBits();
1916     if (Size == 64)
1917       Size = 32;
1918 
1919     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1920     // types.
1921     return isSafeTruncation(Imm.Val, Size);
1922   }
1923 
1924   // We got fp literal token
1925   if (type == MVT::f64) { // Expected 64-bit fp operand
1926     // We would set low 64-bits of literal to zeroes but we accept this literals
1927     return true;
1928   }
1929 
1930   if (type == MVT::i64) { // Expected 64-bit int operand
1931     // We don't allow fp literals in 64-bit integer instructions. It is
1932     // unclear how we should encode them.
1933     return false;
1934   }
1935 
1936   // We allow fp literals with f16x2 operands assuming that the specified
1937   // literal goes into the lower half and the upper half is zero. We also
1938   // require that the literal may be losslessly converted to f16.
1939   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1940                      (type == MVT::v2i16)? MVT::i16 :
1941                      (type == MVT::v2f32)? MVT::f32 : type;
1942 
1943   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1944   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1945 }
1946 
1947 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1948   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1949 }
1950 
1951 bool AMDGPUOperand::isVRegWithInputMods() const {
1952   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1953          // GFX90A allows DPP on 64-bit operands.
1954          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1955           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1956 }
1957 
1958 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1959   if (AsmParser->isVI())
1960     return isVReg32();
1961   else if (AsmParser->isGFX9Plus())
1962     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1963   else
1964     return false;
1965 }
1966 
1967 bool AMDGPUOperand::isSDWAFP16Operand() const {
1968   return isSDWAOperand(MVT::f16);
1969 }
1970 
1971 bool AMDGPUOperand::isSDWAFP32Operand() const {
1972   return isSDWAOperand(MVT::f32);
1973 }
1974 
1975 bool AMDGPUOperand::isSDWAInt16Operand() const {
1976   return isSDWAOperand(MVT::i16);
1977 }
1978 
1979 bool AMDGPUOperand::isSDWAInt32Operand() const {
1980   return isSDWAOperand(MVT::i32);
1981 }
1982 
1983 bool AMDGPUOperand::isBoolReg() const {
1984   auto FB = AsmParser->getFeatureBits();
1985   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1986                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1987 }
1988 
1989 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1990 {
1991   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1992   assert(Size == 2 || Size == 4 || Size == 8);
1993 
1994   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1995 
1996   if (Imm.Mods.Abs) {
1997     Val &= ~FpSignMask;
1998   }
1999   if (Imm.Mods.Neg) {
2000     Val ^= FpSignMask;
2001   }
2002 
2003   return Val;
2004 }
2005 
2006 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2007   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2008                              Inst.getNumOperands())) {
2009     addLiteralImmOperand(Inst, Imm.Val,
2010                          ApplyModifiers &
2011                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2012   } else {
2013     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2014     Inst.addOperand(MCOperand::createImm(Imm.Val));
2015     setImmKindNone();
2016   }
2017 }
2018 
2019 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2020   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2021   auto OpNum = Inst.getNumOperands();
2022   // Check that this operand accepts literals
2023   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2024 
2025   if (ApplyModifiers) {
2026     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2027     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2028     Val = applyInputFPModifiers(Val, Size);
2029   }
2030 
2031   APInt Literal(64, Val);
2032   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2033 
2034   if (Imm.IsFPImm) { // We got fp literal token
2035     switch (OpTy) {
2036     case AMDGPU::OPERAND_REG_IMM_INT64:
2037     case AMDGPU::OPERAND_REG_IMM_FP64:
2038     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2039     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2040     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2041       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2042                                        AsmParser->hasInv2PiInlineImm())) {
2043         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2044         setImmKindConst();
2045         return;
2046       }
2047 
2048       // Non-inlineable
2049       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2050         // For fp operands we check if low 32 bits are zeros
2051         if (Literal.getLoBits(32) != 0) {
2052           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2053           "Can't encode literal as exact 64-bit floating-point operand. "
2054           "Low 32-bits will be set to zero");
2055         }
2056 
2057         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2058         setImmKindLiteral();
2059         return;
2060       }
2061 
2062       // We don't allow fp literals in 64-bit integer instructions. It is
2063       // unclear how we should encode them. This case should be checked earlier
2064       // in predicate methods (isLiteralImm())
2065       llvm_unreachable("fp literal in 64-bit integer instruction.");
2066 
2067     case AMDGPU::OPERAND_REG_IMM_INT32:
2068     case AMDGPU::OPERAND_REG_IMM_FP32:
2069     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2070     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2071     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2072     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2073     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2074     case AMDGPU::OPERAND_REG_IMM_INT16:
2075     case AMDGPU::OPERAND_REG_IMM_FP16:
2076     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2077     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2078     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2079     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2080     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2081     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2082     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2083     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2084     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2085     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2086     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2087     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2088     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2089     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2090     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2091     case AMDGPU::OPERAND_KIMM32:
2092     case AMDGPU::OPERAND_KIMM16: {
2093       bool lost;
2094       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2095       // Convert literal to single precision
2096       FPLiteral.convert(*getOpFltSemantics(OpTy),
2097                         APFloat::rmNearestTiesToEven, &lost);
2098       // We allow precision lost but not overflow or underflow. This should be
2099       // checked earlier in isLiteralImm()
2100 
2101       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2102       Inst.addOperand(MCOperand::createImm(ImmVal));
2103       setImmKindLiteral();
2104       return;
2105     }
2106     default:
2107       llvm_unreachable("invalid operand size");
2108     }
2109 
2110     return;
2111   }
2112 
2113   // We got int literal token.
2114   // Only sign extend inline immediates.
2115   switch (OpTy) {
2116   case AMDGPU::OPERAND_REG_IMM_INT32:
2117   case AMDGPU::OPERAND_REG_IMM_FP32:
2118   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2119   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2120   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2121   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2122   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2123   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2124   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2125   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2126   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2127   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2128   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2129     if (isSafeTruncation(Val, 32) &&
2130         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2131                                      AsmParser->hasInv2PiInlineImm())) {
2132       Inst.addOperand(MCOperand::createImm(Val));
2133       setImmKindConst();
2134       return;
2135     }
2136 
2137     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2138     setImmKindLiteral();
2139     return;
2140 
2141   case AMDGPU::OPERAND_REG_IMM_INT64:
2142   case AMDGPU::OPERAND_REG_IMM_FP64:
2143   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2144   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2145   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2146     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2147       Inst.addOperand(MCOperand::createImm(Val));
2148       setImmKindConst();
2149       return;
2150     }
2151 
2152     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2153     setImmKindLiteral();
2154     return;
2155 
2156   case AMDGPU::OPERAND_REG_IMM_INT16:
2157   case AMDGPU::OPERAND_REG_IMM_FP16:
2158   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2159   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2160   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2161   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2162   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2163     if (isSafeTruncation(Val, 16) &&
2164         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2165                                      AsmParser->hasInv2PiInlineImm())) {
2166       Inst.addOperand(MCOperand::createImm(Val));
2167       setImmKindConst();
2168       return;
2169     }
2170 
2171     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2172     setImmKindLiteral();
2173     return;
2174 
2175   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2176   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2177   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2178   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2179     assert(isSafeTruncation(Val, 16));
2180     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2181                                         AsmParser->hasInv2PiInlineImm()));
2182 
2183     Inst.addOperand(MCOperand::createImm(Val));
2184     return;
2185   }
2186   case AMDGPU::OPERAND_KIMM32:
2187     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2188     setImmKindNone();
2189     return;
2190   case AMDGPU::OPERAND_KIMM16:
2191     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2192     setImmKindNone();
2193     return;
2194   default:
2195     llvm_unreachable("invalid operand size");
2196   }
2197 }
2198 
2199 template <unsigned Bitwidth>
2200 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2201   APInt Literal(64, Imm.Val);
2202   setImmKindNone();
2203 
2204   if (!Imm.IsFPImm) {
2205     // We got int literal token.
2206     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2207     return;
2208   }
2209 
2210   bool Lost;
2211   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2212   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2213                     APFloat::rmNearestTiesToEven, &Lost);
2214   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2215 }
2216 
2217 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2218   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2219 }
2220 
2221 static bool isInlineValue(unsigned Reg) {
2222   switch (Reg) {
2223   case AMDGPU::SRC_SHARED_BASE:
2224   case AMDGPU::SRC_SHARED_LIMIT:
2225   case AMDGPU::SRC_PRIVATE_BASE:
2226   case AMDGPU::SRC_PRIVATE_LIMIT:
2227   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2228     return true;
2229   case AMDGPU::SRC_VCCZ:
2230   case AMDGPU::SRC_EXECZ:
2231   case AMDGPU::SRC_SCC:
2232     return true;
2233   case AMDGPU::SGPR_NULL:
2234     return true;
2235   default:
2236     return false;
2237   }
2238 }
2239 
2240 bool AMDGPUOperand::isInlineValue() const {
2241   return isRegKind() && ::isInlineValue(getReg());
2242 }
2243 
2244 //===----------------------------------------------------------------------===//
2245 // AsmParser
2246 //===----------------------------------------------------------------------===//
2247 
2248 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2249   if (Is == IS_VGPR) {
2250     switch (RegWidth) {
2251       default: return -1;
2252       case 1: return AMDGPU::VGPR_32RegClassID;
2253       case 2: return AMDGPU::VReg_64RegClassID;
2254       case 3: return AMDGPU::VReg_96RegClassID;
2255       case 4: return AMDGPU::VReg_128RegClassID;
2256       case 5: return AMDGPU::VReg_160RegClassID;
2257       case 6: return AMDGPU::VReg_192RegClassID;
2258       case 7: return AMDGPU::VReg_224RegClassID;
2259       case 8: return AMDGPU::VReg_256RegClassID;
2260       case 16: return AMDGPU::VReg_512RegClassID;
2261       case 32: return AMDGPU::VReg_1024RegClassID;
2262     }
2263   } else if (Is == IS_TTMP) {
2264     switch (RegWidth) {
2265       default: return -1;
2266       case 1: return AMDGPU::TTMP_32RegClassID;
2267       case 2: return AMDGPU::TTMP_64RegClassID;
2268       case 4: return AMDGPU::TTMP_128RegClassID;
2269       case 8: return AMDGPU::TTMP_256RegClassID;
2270       case 16: return AMDGPU::TTMP_512RegClassID;
2271     }
2272   } else if (Is == IS_SGPR) {
2273     switch (RegWidth) {
2274       default: return -1;
2275       case 1: return AMDGPU::SGPR_32RegClassID;
2276       case 2: return AMDGPU::SGPR_64RegClassID;
2277       case 3: return AMDGPU::SGPR_96RegClassID;
2278       case 4: return AMDGPU::SGPR_128RegClassID;
2279       case 5: return AMDGPU::SGPR_160RegClassID;
2280       case 6: return AMDGPU::SGPR_192RegClassID;
2281       case 7: return AMDGPU::SGPR_224RegClassID;
2282       case 8: return AMDGPU::SGPR_256RegClassID;
2283       case 16: return AMDGPU::SGPR_512RegClassID;
2284     }
2285   } else if (Is == IS_AGPR) {
2286     switch (RegWidth) {
2287       default: return -1;
2288       case 1: return AMDGPU::AGPR_32RegClassID;
2289       case 2: return AMDGPU::AReg_64RegClassID;
2290       case 3: return AMDGPU::AReg_96RegClassID;
2291       case 4: return AMDGPU::AReg_128RegClassID;
2292       case 5: return AMDGPU::AReg_160RegClassID;
2293       case 6: return AMDGPU::AReg_192RegClassID;
2294       case 7: return AMDGPU::AReg_224RegClassID;
2295       case 8: return AMDGPU::AReg_256RegClassID;
2296       case 16: return AMDGPU::AReg_512RegClassID;
2297       case 32: return AMDGPU::AReg_1024RegClassID;
2298     }
2299   }
2300   return -1;
2301 }
2302 
2303 static unsigned getSpecialRegForName(StringRef RegName) {
2304   return StringSwitch<unsigned>(RegName)
2305     .Case("exec", AMDGPU::EXEC)
2306     .Case("vcc", AMDGPU::VCC)
2307     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2308     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2309     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2310     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2311     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2312     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2313     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2314     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2315     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2316     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2317     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2318     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2319     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2320     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2321     .Case("m0", AMDGPU::M0)
2322     .Case("vccz", AMDGPU::SRC_VCCZ)
2323     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2324     .Case("execz", AMDGPU::SRC_EXECZ)
2325     .Case("src_execz", AMDGPU::SRC_EXECZ)
2326     .Case("scc", AMDGPU::SRC_SCC)
2327     .Case("src_scc", AMDGPU::SRC_SCC)
2328     .Case("tba", AMDGPU::TBA)
2329     .Case("tma", AMDGPU::TMA)
2330     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2331     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2332     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2333     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2334     .Case("vcc_lo", AMDGPU::VCC_LO)
2335     .Case("vcc_hi", AMDGPU::VCC_HI)
2336     .Case("exec_lo", AMDGPU::EXEC_LO)
2337     .Case("exec_hi", AMDGPU::EXEC_HI)
2338     .Case("tma_lo", AMDGPU::TMA_LO)
2339     .Case("tma_hi", AMDGPU::TMA_HI)
2340     .Case("tba_lo", AMDGPU::TBA_LO)
2341     .Case("tba_hi", AMDGPU::TBA_HI)
2342     .Case("pc", AMDGPU::PC_REG)
2343     .Case("null", AMDGPU::SGPR_NULL)
2344     .Default(AMDGPU::NoRegister);
2345 }
2346 
2347 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2348                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2349   auto R = parseRegister();
2350   if (!R) return true;
2351   assert(R->isReg());
2352   RegNo = R->getReg();
2353   StartLoc = R->getStartLoc();
2354   EndLoc = R->getEndLoc();
2355   return false;
2356 }
2357 
2358 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2359                                     SMLoc &EndLoc) {
2360   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2361 }
2362 
2363 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2364                                                        SMLoc &StartLoc,
2365                                                        SMLoc &EndLoc) {
2366   bool Result =
2367       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2368   bool PendingErrors = getParser().hasPendingError();
2369   getParser().clearPendingErrors();
2370   if (PendingErrors)
2371     return MatchOperand_ParseFail;
2372   if (Result)
2373     return MatchOperand_NoMatch;
2374   return MatchOperand_Success;
2375 }
2376 
2377 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2378                                             RegisterKind RegKind, unsigned Reg1,
2379                                             SMLoc Loc) {
2380   switch (RegKind) {
2381   case IS_SPECIAL:
2382     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2383       Reg = AMDGPU::EXEC;
2384       RegWidth = 2;
2385       return true;
2386     }
2387     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2388       Reg = AMDGPU::FLAT_SCR;
2389       RegWidth = 2;
2390       return true;
2391     }
2392     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2393       Reg = AMDGPU::XNACK_MASK;
2394       RegWidth = 2;
2395       return true;
2396     }
2397     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2398       Reg = AMDGPU::VCC;
2399       RegWidth = 2;
2400       return true;
2401     }
2402     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2403       Reg = AMDGPU::TBA;
2404       RegWidth = 2;
2405       return true;
2406     }
2407     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2408       Reg = AMDGPU::TMA;
2409       RegWidth = 2;
2410       return true;
2411     }
2412     Error(Loc, "register does not fit in the list");
2413     return false;
2414   case IS_VGPR:
2415   case IS_SGPR:
2416   case IS_AGPR:
2417   case IS_TTMP:
2418     if (Reg1 != Reg + RegWidth) {
2419       Error(Loc, "registers in a list must have consecutive indices");
2420       return false;
2421     }
2422     RegWidth++;
2423     return true;
2424   default:
2425     llvm_unreachable("unexpected register kind");
2426   }
2427 }
2428 
2429 struct RegInfo {
2430   StringLiteral Name;
2431   RegisterKind Kind;
2432 };
2433 
2434 static constexpr RegInfo RegularRegisters[] = {
2435   {{"v"},    IS_VGPR},
2436   {{"s"},    IS_SGPR},
2437   {{"ttmp"}, IS_TTMP},
2438   {{"acc"},  IS_AGPR},
2439   {{"a"},    IS_AGPR},
2440 };
2441 
2442 static bool isRegularReg(RegisterKind Kind) {
2443   return Kind == IS_VGPR ||
2444          Kind == IS_SGPR ||
2445          Kind == IS_TTMP ||
2446          Kind == IS_AGPR;
2447 }
2448 
2449 static const RegInfo* getRegularRegInfo(StringRef Str) {
2450   for (const RegInfo &Reg : RegularRegisters)
2451     if (Str.startswith(Reg.Name))
2452       return &Reg;
2453   return nullptr;
2454 }
2455 
2456 static bool getRegNum(StringRef Str, unsigned& Num) {
2457   return !Str.getAsInteger(10, Num);
2458 }
2459 
2460 bool
2461 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2462                             const AsmToken &NextToken) const {
2463 
2464   // A list of consecutive registers: [s0,s1,s2,s3]
2465   if (Token.is(AsmToken::LBrac))
2466     return true;
2467 
2468   if (!Token.is(AsmToken::Identifier))
2469     return false;
2470 
2471   // A single register like s0 or a range of registers like s[0:1]
2472 
2473   StringRef Str = Token.getString();
2474   const RegInfo *Reg = getRegularRegInfo(Str);
2475   if (Reg) {
2476     StringRef RegName = Reg->Name;
2477     StringRef RegSuffix = Str.substr(RegName.size());
2478     if (!RegSuffix.empty()) {
2479       unsigned Num;
2480       // A single register with an index: rXX
2481       if (getRegNum(RegSuffix, Num))
2482         return true;
2483     } else {
2484       // A range of registers: r[XX:YY].
2485       if (NextToken.is(AsmToken::LBrac))
2486         return true;
2487     }
2488   }
2489 
2490   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2491 }
2492 
2493 bool
2494 AMDGPUAsmParser::isRegister()
2495 {
2496   return isRegister(getToken(), peekToken());
2497 }
2498 
2499 unsigned
2500 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2501                                unsigned RegNum,
2502                                unsigned RegWidth,
2503                                SMLoc Loc) {
2504 
2505   assert(isRegularReg(RegKind));
2506 
2507   unsigned AlignSize = 1;
2508   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2509     // SGPR and TTMP registers must be aligned.
2510     // Max required alignment is 4 dwords.
2511     AlignSize = std::min(RegWidth, 4u);
2512   }
2513 
2514   if (RegNum % AlignSize != 0) {
2515     Error(Loc, "invalid register alignment");
2516     return AMDGPU::NoRegister;
2517   }
2518 
2519   unsigned RegIdx = RegNum / AlignSize;
2520   int RCID = getRegClass(RegKind, RegWidth);
2521   if (RCID == -1) {
2522     Error(Loc, "invalid or unsupported register size");
2523     return AMDGPU::NoRegister;
2524   }
2525 
2526   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2527   const MCRegisterClass RC = TRI->getRegClass(RCID);
2528   if (RegIdx >= RC.getNumRegs()) {
2529     Error(Loc, "register index is out of range");
2530     return AMDGPU::NoRegister;
2531   }
2532 
2533   return RC.getRegister(RegIdx);
2534 }
2535 
2536 bool
2537 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2538   int64_t RegLo, RegHi;
2539   if (!skipToken(AsmToken::LBrac, "missing register index"))
2540     return false;
2541 
2542   SMLoc FirstIdxLoc = getLoc();
2543   SMLoc SecondIdxLoc;
2544 
2545   if (!parseExpr(RegLo))
2546     return false;
2547 
2548   if (trySkipToken(AsmToken::Colon)) {
2549     SecondIdxLoc = getLoc();
2550     if (!parseExpr(RegHi))
2551       return false;
2552   } else {
2553     RegHi = RegLo;
2554   }
2555 
2556   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2557     return false;
2558 
2559   if (!isUInt<32>(RegLo)) {
2560     Error(FirstIdxLoc, "invalid register index");
2561     return false;
2562   }
2563 
2564   if (!isUInt<32>(RegHi)) {
2565     Error(SecondIdxLoc, "invalid register index");
2566     return false;
2567   }
2568 
2569   if (RegLo > RegHi) {
2570     Error(FirstIdxLoc, "first register index should not exceed second index");
2571     return false;
2572   }
2573 
2574   Num = static_cast<unsigned>(RegLo);
2575   Width = (RegHi - RegLo) + 1;
2576   return true;
2577 }
2578 
2579 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2580                                           unsigned &RegNum, unsigned &RegWidth,
2581                                           SmallVectorImpl<AsmToken> &Tokens) {
2582   assert(isToken(AsmToken::Identifier));
2583   unsigned Reg = getSpecialRegForName(getTokenStr());
2584   if (Reg) {
2585     RegNum = 0;
2586     RegWidth = 1;
2587     RegKind = IS_SPECIAL;
2588     Tokens.push_back(getToken());
2589     lex(); // skip register name
2590   }
2591   return Reg;
2592 }
2593 
2594 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2595                                           unsigned &RegNum, unsigned &RegWidth,
2596                                           SmallVectorImpl<AsmToken> &Tokens) {
2597   assert(isToken(AsmToken::Identifier));
2598   StringRef RegName = getTokenStr();
2599   auto Loc = getLoc();
2600 
2601   const RegInfo *RI = getRegularRegInfo(RegName);
2602   if (!RI) {
2603     Error(Loc, "invalid register name");
2604     return AMDGPU::NoRegister;
2605   }
2606 
2607   Tokens.push_back(getToken());
2608   lex(); // skip register name
2609 
2610   RegKind = RI->Kind;
2611   StringRef RegSuffix = RegName.substr(RI->Name.size());
2612   if (!RegSuffix.empty()) {
2613     // Single 32-bit register: vXX.
2614     if (!getRegNum(RegSuffix, RegNum)) {
2615       Error(Loc, "invalid register index");
2616       return AMDGPU::NoRegister;
2617     }
2618     RegWidth = 1;
2619   } else {
2620     // Range of registers: v[XX:YY]. ":YY" is optional.
2621     if (!ParseRegRange(RegNum, RegWidth))
2622       return AMDGPU::NoRegister;
2623   }
2624 
2625   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2626 }
2627 
2628 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2629                                        unsigned &RegWidth,
2630                                        SmallVectorImpl<AsmToken> &Tokens) {
2631   unsigned Reg = AMDGPU::NoRegister;
2632   auto ListLoc = getLoc();
2633 
2634   if (!skipToken(AsmToken::LBrac,
2635                  "expected a register or a list of registers")) {
2636     return AMDGPU::NoRegister;
2637   }
2638 
2639   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2640 
2641   auto Loc = getLoc();
2642   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2643     return AMDGPU::NoRegister;
2644   if (RegWidth != 1) {
2645     Error(Loc, "expected a single 32-bit register");
2646     return AMDGPU::NoRegister;
2647   }
2648 
2649   for (; trySkipToken(AsmToken::Comma); ) {
2650     RegisterKind NextRegKind;
2651     unsigned NextReg, NextRegNum, NextRegWidth;
2652     Loc = getLoc();
2653 
2654     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2655                              NextRegNum, NextRegWidth,
2656                              Tokens)) {
2657       return AMDGPU::NoRegister;
2658     }
2659     if (NextRegWidth != 1) {
2660       Error(Loc, "expected a single 32-bit register");
2661       return AMDGPU::NoRegister;
2662     }
2663     if (NextRegKind != RegKind) {
2664       Error(Loc, "registers in a list must be of the same kind");
2665       return AMDGPU::NoRegister;
2666     }
2667     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2668       return AMDGPU::NoRegister;
2669   }
2670 
2671   if (!skipToken(AsmToken::RBrac,
2672                  "expected a comma or a closing square bracket")) {
2673     return AMDGPU::NoRegister;
2674   }
2675 
2676   if (isRegularReg(RegKind))
2677     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2678 
2679   return Reg;
2680 }
2681 
2682 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2683                                           unsigned &RegNum, unsigned &RegWidth,
2684                                           SmallVectorImpl<AsmToken> &Tokens) {
2685   auto Loc = getLoc();
2686   Reg = AMDGPU::NoRegister;
2687 
2688   if (isToken(AsmToken::Identifier)) {
2689     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2690     if (Reg == AMDGPU::NoRegister)
2691       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2692   } else {
2693     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2694   }
2695 
2696   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2697   if (Reg == AMDGPU::NoRegister) {
2698     assert(Parser.hasPendingError());
2699     return false;
2700   }
2701 
2702   if (!subtargetHasRegister(*TRI, Reg)) {
2703     if (Reg == AMDGPU::SGPR_NULL) {
2704       Error(Loc, "'null' operand is not supported on this GPU");
2705     } else {
2706       Error(Loc, "register not available on this GPU");
2707     }
2708     return false;
2709   }
2710 
2711   return true;
2712 }
2713 
2714 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2715                                           unsigned &RegNum, unsigned &RegWidth,
2716                                           bool RestoreOnFailure /*=false*/) {
2717   Reg = AMDGPU::NoRegister;
2718 
2719   SmallVector<AsmToken, 1> Tokens;
2720   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2721     if (RestoreOnFailure) {
2722       while (!Tokens.empty()) {
2723         getLexer().UnLex(Tokens.pop_back_val());
2724       }
2725     }
2726     return true;
2727   }
2728   return false;
2729 }
2730 
2731 Optional<StringRef>
2732 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2733   switch (RegKind) {
2734   case IS_VGPR:
2735     return StringRef(".amdgcn.next_free_vgpr");
2736   case IS_SGPR:
2737     return StringRef(".amdgcn.next_free_sgpr");
2738   default:
2739     return None;
2740   }
2741 }
2742 
2743 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2744   auto SymbolName = getGprCountSymbolName(RegKind);
2745   assert(SymbolName && "initializing invalid register kind");
2746   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2747   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2748 }
2749 
2750 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2751                                             unsigned DwordRegIndex,
2752                                             unsigned RegWidth) {
2753   // Symbols are only defined for GCN targets
2754   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2755     return true;
2756 
2757   auto SymbolName = getGprCountSymbolName(RegKind);
2758   if (!SymbolName)
2759     return true;
2760   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2761 
2762   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2763   int64_t OldCount;
2764 
2765   if (!Sym->isVariable())
2766     return !Error(getLoc(),
2767                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2768   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2769     return !Error(
2770         getLoc(),
2771         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2772 
2773   if (OldCount <= NewMax)
2774     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2775 
2776   return true;
2777 }
2778 
2779 std::unique_ptr<AMDGPUOperand>
2780 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2781   const auto &Tok = getToken();
2782   SMLoc StartLoc = Tok.getLoc();
2783   SMLoc EndLoc = Tok.getEndLoc();
2784   RegisterKind RegKind;
2785   unsigned Reg, RegNum, RegWidth;
2786 
2787   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2788     return nullptr;
2789   }
2790   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2791     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2792       return nullptr;
2793   } else
2794     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2795   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2796 }
2797 
2798 OperandMatchResultTy
2799 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2800   // TODO: add syntactic sugar for 1/(2*PI)
2801 
2802   assert(!isRegister());
2803   assert(!isModifier());
2804 
2805   const auto& Tok = getToken();
2806   const auto& NextTok = peekToken();
2807   bool IsReal = Tok.is(AsmToken::Real);
2808   SMLoc S = getLoc();
2809   bool Negate = false;
2810 
2811   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2812     lex();
2813     IsReal = true;
2814     Negate = true;
2815   }
2816 
2817   if (IsReal) {
2818     // Floating-point expressions are not supported.
2819     // Can only allow floating-point literals with an
2820     // optional sign.
2821 
2822     StringRef Num = getTokenStr();
2823     lex();
2824 
2825     APFloat RealVal(APFloat::IEEEdouble());
2826     auto roundMode = APFloat::rmNearestTiesToEven;
2827     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2828       return MatchOperand_ParseFail;
2829     }
2830     if (Negate)
2831       RealVal.changeSign();
2832 
2833     Operands.push_back(
2834       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2835                                AMDGPUOperand::ImmTyNone, true));
2836 
2837     return MatchOperand_Success;
2838 
2839   } else {
2840     int64_t IntVal;
2841     const MCExpr *Expr;
2842     SMLoc S = getLoc();
2843 
2844     if (HasSP3AbsModifier) {
2845       // This is a workaround for handling expressions
2846       // as arguments of SP3 'abs' modifier, for example:
2847       //     |1.0|
2848       //     |-1|
2849       //     |1+x|
2850       // This syntax is not compatible with syntax of standard
2851       // MC expressions (due to the trailing '|').
2852       SMLoc EndLoc;
2853       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2854         return MatchOperand_ParseFail;
2855     } else {
2856       if (Parser.parseExpression(Expr))
2857         return MatchOperand_ParseFail;
2858     }
2859 
2860     if (Expr->evaluateAsAbsolute(IntVal)) {
2861       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2862     } else {
2863       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2864     }
2865 
2866     return MatchOperand_Success;
2867   }
2868 
2869   return MatchOperand_NoMatch;
2870 }
2871 
2872 OperandMatchResultTy
2873 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2874   if (!isRegister())
2875     return MatchOperand_NoMatch;
2876 
2877   if (auto R = parseRegister()) {
2878     assert(R->isReg());
2879     Operands.push_back(std::move(R));
2880     return MatchOperand_Success;
2881   }
2882   return MatchOperand_ParseFail;
2883 }
2884 
2885 OperandMatchResultTy
2886 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2887   auto res = parseReg(Operands);
2888   if (res != MatchOperand_NoMatch) {
2889     return res;
2890   } else if (isModifier()) {
2891     return MatchOperand_NoMatch;
2892   } else {
2893     return parseImm(Operands, HasSP3AbsMod);
2894   }
2895 }
2896 
2897 bool
2898 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2899   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2900     const auto &str = Token.getString();
2901     return str == "abs" || str == "neg" || str == "sext";
2902   }
2903   return false;
2904 }
2905 
2906 bool
2907 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2908   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2909 }
2910 
2911 bool
2912 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2913   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2914 }
2915 
2916 bool
2917 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2918   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2919 }
2920 
2921 // Check if this is an operand modifier or an opcode modifier
2922 // which may look like an expression but it is not. We should
2923 // avoid parsing these modifiers as expressions. Currently
2924 // recognized sequences are:
2925 //   |...|
2926 //   abs(...)
2927 //   neg(...)
2928 //   sext(...)
2929 //   -reg
2930 //   -|...|
2931 //   -abs(...)
2932 //   name:...
2933 // Note that simple opcode modifiers like 'gds' may be parsed as
2934 // expressions; this is a special case. See getExpressionAsToken.
2935 //
2936 bool
2937 AMDGPUAsmParser::isModifier() {
2938 
2939   AsmToken Tok = getToken();
2940   AsmToken NextToken[2];
2941   peekTokens(NextToken);
2942 
2943   return isOperandModifier(Tok, NextToken[0]) ||
2944          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2945          isOpcodeModifierWithVal(Tok, NextToken[0]);
2946 }
2947 
2948 // Check if the current token is an SP3 'neg' modifier.
2949 // Currently this modifier is allowed in the following context:
2950 //
2951 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2952 // 2. Before an 'abs' modifier: -abs(...)
2953 // 3. Before an SP3 'abs' modifier: -|...|
2954 //
2955 // In all other cases "-" is handled as a part
2956 // of an expression that follows the sign.
2957 //
2958 // Note: When "-" is followed by an integer literal,
2959 // this is interpreted as integer negation rather
2960 // than a floating-point NEG modifier applied to N.
2961 // Beside being contr-intuitive, such use of floating-point
2962 // NEG modifier would have resulted in different meaning
2963 // of integer literals used with VOP1/2/C and VOP3,
2964 // for example:
2965 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2966 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2967 // Negative fp literals with preceding "-" are
2968 // handled likewise for uniformity
2969 //
2970 bool
2971 AMDGPUAsmParser::parseSP3NegModifier() {
2972 
2973   AsmToken NextToken[2];
2974   peekTokens(NextToken);
2975 
2976   if (isToken(AsmToken::Minus) &&
2977       (isRegister(NextToken[0], NextToken[1]) ||
2978        NextToken[0].is(AsmToken::Pipe) ||
2979        isId(NextToken[0], "abs"))) {
2980     lex();
2981     return true;
2982   }
2983 
2984   return false;
2985 }
2986 
2987 OperandMatchResultTy
2988 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2989                                               bool AllowImm) {
2990   bool Neg, SP3Neg;
2991   bool Abs, SP3Abs;
2992   SMLoc Loc;
2993 
2994   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2995   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2996     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2997     return MatchOperand_ParseFail;
2998   }
2999 
3000   SP3Neg = parseSP3NegModifier();
3001 
3002   Loc = getLoc();
3003   Neg = trySkipId("neg");
3004   if (Neg && SP3Neg) {
3005     Error(Loc, "expected register or immediate");
3006     return MatchOperand_ParseFail;
3007   }
3008   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3009     return MatchOperand_ParseFail;
3010 
3011   Abs = trySkipId("abs");
3012   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3013     return MatchOperand_ParseFail;
3014 
3015   Loc = getLoc();
3016   SP3Abs = trySkipToken(AsmToken::Pipe);
3017   if (Abs && SP3Abs) {
3018     Error(Loc, "expected register or immediate");
3019     return MatchOperand_ParseFail;
3020   }
3021 
3022   OperandMatchResultTy Res;
3023   if (AllowImm) {
3024     Res = parseRegOrImm(Operands, SP3Abs);
3025   } else {
3026     Res = parseReg(Operands);
3027   }
3028   if (Res != MatchOperand_Success) {
3029     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3030   }
3031 
3032   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3033     return MatchOperand_ParseFail;
3034   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3035     return MatchOperand_ParseFail;
3036   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3037     return MatchOperand_ParseFail;
3038 
3039   AMDGPUOperand::Modifiers Mods;
3040   Mods.Abs = Abs || SP3Abs;
3041   Mods.Neg = Neg || SP3Neg;
3042 
3043   if (Mods.hasFPModifiers()) {
3044     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3045     if (Op.isExpr()) {
3046       Error(Op.getStartLoc(), "expected an absolute expression");
3047       return MatchOperand_ParseFail;
3048     }
3049     Op.setModifiers(Mods);
3050   }
3051   return MatchOperand_Success;
3052 }
3053 
3054 OperandMatchResultTy
3055 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3056                                                bool AllowImm) {
3057   bool Sext = trySkipId("sext");
3058   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3059     return MatchOperand_ParseFail;
3060 
3061   OperandMatchResultTy Res;
3062   if (AllowImm) {
3063     Res = parseRegOrImm(Operands);
3064   } else {
3065     Res = parseReg(Operands);
3066   }
3067   if (Res != MatchOperand_Success) {
3068     return Sext? MatchOperand_ParseFail : Res;
3069   }
3070 
3071   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3072     return MatchOperand_ParseFail;
3073 
3074   AMDGPUOperand::Modifiers Mods;
3075   Mods.Sext = Sext;
3076 
3077   if (Mods.hasIntModifiers()) {
3078     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3079     if (Op.isExpr()) {
3080       Error(Op.getStartLoc(), "expected an absolute expression");
3081       return MatchOperand_ParseFail;
3082     }
3083     Op.setModifiers(Mods);
3084   }
3085 
3086   return MatchOperand_Success;
3087 }
3088 
3089 OperandMatchResultTy
3090 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3091   return parseRegOrImmWithFPInputMods(Operands, false);
3092 }
3093 
3094 OperandMatchResultTy
3095 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3096   return parseRegOrImmWithIntInputMods(Operands, false);
3097 }
3098 
3099 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3100   auto Loc = getLoc();
3101   if (trySkipId("off")) {
3102     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3103                                                 AMDGPUOperand::ImmTyOff, false));
3104     return MatchOperand_Success;
3105   }
3106 
3107   if (!isRegister())
3108     return MatchOperand_NoMatch;
3109 
3110   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3111   if (Reg) {
3112     Operands.push_back(std::move(Reg));
3113     return MatchOperand_Success;
3114   }
3115 
3116   return MatchOperand_ParseFail;
3117 
3118 }
3119 
3120 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3121   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3122 
3123   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3124       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3125       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3126       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3127     return Match_InvalidOperand;
3128 
3129   if ((TSFlags & SIInstrFlags::VOP3) &&
3130       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3131       getForcedEncodingSize() != 64)
3132     return Match_PreferE32;
3133 
3134   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3135       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3136     // v_mac_f32/16 allow only dst_sel == DWORD;
3137     auto OpNum =
3138         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3139     const auto &Op = Inst.getOperand(OpNum);
3140     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3141       return Match_InvalidOperand;
3142     }
3143   }
3144 
3145   return Match_Success;
3146 }
3147 
3148 static ArrayRef<unsigned> getAllVariants() {
3149   static const unsigned Variants[] = {
3150     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3151     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3152   };
3153 
3154   return makeArrayRef(Variants);
3155 }
3156 
3157 // What asm variants we should check
3158 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3159   if (getForcedEncodingSize() == 32) {
3160     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3161     return makeArrayRef(Variants);
3162   }
3163 
3164   if (isForcedVOP3()) {
3165     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3166     return makeArrayRef(Variants);
3167   }
3168 
3169   if (isForcedSDWA()) {
3170     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3171                                         AMDGPUAsmVariants::SDWA9};
3172     return makeArrayRef(Variants);
3173   }
3174 
3175   if (isForcedDPP()) {
3176     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3177     return makeArrayRef(Variants);
3178   }
3179 
3180   return getAllVariants();
3181 }
3182 
3183 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3184   if (getForcedEncodingSize() == 32)
3185     return "e32";
3186 
3187   if (isForcedVOP3())
3188     return "e64";
3189 
3190   if (isForcedSDWA())
3191     return "sdwa";
3192 
3193   if (isForcedDPP())
3194     return "dpp";
3195 
3196   return "";
3197 }
3198 
3199 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3200   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3201   const unsigned Num = Desc.getNumImplicitUses();
3202   for (unsigned i = 0; i < Num; ++i) {
3203     unsigned Reg = Desc.ImplicitUses[i];
3204     switch (Reg) {
3205     case AMDGPU::FLAT_SCR:
3206     case AMDGPU::VCC:
3207     case AMDGPU::VCC_LO:
3208     case AMDGPU::VCC_HI:
3209     case AMDGPU::M0:
3210       return Reg;
3211     default:
3212       break;
3213     }
3214   }
3215   return AMDGPU::NoRegister;
3216 }
3217 
3218 // NB: This code is correct only when used to check constant
3219 // bus limitations because GFX7 support no f16 inline constants.
3220 // Note that there are no cases when a GFX7 opcode violates
3221 // constant bus limitations due to the use of an f16 constant.
3222 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3223                                        unsigned OpIdx) const {
3224   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3225 
3226   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3227     return false;
3228   }
3229 
3230   const MCOperand &MO = Inst.getOperand(OpIdx);
3231 
3232   int64_t Val = MO.getImm();
3233   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3234 
3235   switch (OpSize) { // expected operand size
3236   case 8:
3237     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3238   case 4:
3239     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3240   case 2: {
3241     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3242     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3243         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3244         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3245       return AMDGPU::isInlinableIntLiteral(Val);
3246 
3247     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3248         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3249         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3250       return AMDGPU::isInlinableIntLiteralV216(Val);
3251 
3252     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3253         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3254         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3255       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3256 
3257     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3258   }
3259   default:
3260     llvm_unreachable("invalid operand size");
3261   }
3262 }
3263 
3264 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3265   if (!isGFX10Plus())
3266     return 1;
3267 
3268   switch (Opcode) {
3269   // 64-bit shift instructions can use only one scalar value input
3270   case AMDGPU::V_LSHLREV_B64_e64:
3271   case AMDGPU::V_LSHLREV_B64_gfx10:
3272   case AMDGPU::V_LSHRREV_B64_e64:
3273   case AMDGPU::V_LSHRREV_B64_gfx10:
3274   case AMDGPU::V_ASHRREV_I64_e64:
3275   case AMDGPU::V_ASHRREV_I64_gfx10:
3276   case AMDGPU::V_LSHL_B64_e64:
3277   case AMDGPU::V_LSHR_B64_e64:
3278   case AMDGPU::V_ASHR_I64_e64:
3279     return 1;
3280   default:
3281     return 2;
3282   }
3283 }
3284 
3285 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3286   const MCOperand &MO = Inst.getOperand(OpIdx);
3287   if (MO.isImm()) {
3288     return !isInlineConstant(Inst, OpIdx);
3289   } else if (MO.isReg()) {
3290     auto Reg = MO.getReg();
3291     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3292     auto PReg = mc2PseudoReg(Reg);
3293     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3294   } else {
3295     return true;
3296   }
3297 }
3298 
3299 bool
3300 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3301                                                 const OperandVector &Operands) {
3302   const unsigned Opcode = Inst.getOpcode();
3303   const MCInstrDesc &Desc = MII.get(Opcode);
3304   unsigned LastSGPR = AMDGPU::NoRegister;
3305   unsigned ConstantBusUseCount = 0;
3306   unsigned NumLiterals = 0;
3307   unsigned LiteralSize;
3308 
3309   if (Desc.TSFlags &
3310       (SIInstrFlags::VOPC |
3311        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3312        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3313        SIInstrFlags::SDWA)) {
3314     // Check special imm operands (used by madmk, etc)
3315     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3316       ++NumLiterals;
3317       LiteralSize = 4;
3318     }
3319 
3320     SmallDenseSet<unsigned> SGPRsUsed;
3321     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3322     if (SGPRUsed != AMDGPU::NoRegister) {
3323       SGPRsUsed.insert(SGPRUsed);
3324       ++ConstantBusUseCount;
3325     }
3326 
3327     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3328     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3329     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3330 
3331     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3332 
3333     for (int OpIdx : OpIndices) {
3334       if (OpIdx == -1) break;
3335 
3336       const MCOperand &MO = Inst.getOperand(OpIdx);
3337       if (usesConstantBus(Inst, OpIdx)) {
3338         if (MO.isReg()) {
3339           LastSGPR = mc2PseudoReg(MO.getReg());
3340           // Pairs of registers with a partial intersections like these
3341           //   s0, s[0:1]
3342           //   flat_scratch_lo, flat_scratch
3343           //   flat_scratch_lo, flat_scratch_hi
3344           // are theoretically valid but they are disabled anyway.
3345           // Note that this code mimics SIInstrInfo::verifyInstruction
3346           if (!SGPRsUsed.count(LastSGPR)) {
3347             SGPRsUsed.insert(LastSGPR);
3348             ++ConstantBusUseCount;
3349           }
3350         } else { // Expression or a literal
3351 
3352           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3353             continue; // special operand like VINTERP attr_chan
3354 
3355           // An instruction may use only one literal.
3356           // This has been validated on the previous step.
3357           // See validateVOPLiteral.
3358           // This literal may be used as more than one operand.
3359           // If all these operands are of the same size,
3360           // this literal counts as one scalar value.
3361           // Otherwise it counts as 2 scalar values.
3362           // See "GFX10 Shader Programming", section 3.6.2.3.
3363 
3364           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3365           if (Size < 4) Size = 4;
3366 
3367           if (NumLiterals == 0) {
3368             NumLiterals = 1;
3369             LiteralSize = Size;
3370           } else if (LiteralSize != Size) {
3371             NumLiterals = 2;
3372           }
3373         }
3374       }
3375     }
3376   }
3377   ConstantBusUseCount += NumLiterals;
3378 
3379   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3380     return true;
3381 
3382   SMLoc LitLoc = getLitLoc(Operands);
3383   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3384   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3385   Error(Loc, "invalid operand (violates constant bus restrictions)");
3386   return false;
3387 }
3388 
3389 bool
3390 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3391                                                  const OperandVector &Operands) {
3392   const unsigned Opcode = Inst.getOpcode();
3393   const MCInstrDesc &Desc = MII.get(Opcode);
3394 
3395   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3396   if (DstIdx == -1 ||
3397       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3398     return true;
3399   }
3400 
3401   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3402 
3403   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3404   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3405   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3406 
3407   assert(DstIdx != -1);
3408   const MCOperand &Dst = Inst.getOperand(DstIdx);
3409   assert(Dst.isReg());
3410 
3411   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3412 
3413   for (int SrcIdx : SrcIndices) {
3414     if (SrcIdx == -1) break;
3415     const MCOperand &Src = Inst.getOperand(SrcIdx);
3416     if (Src.isReg()) {
3417       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3418         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3419         Error(getRegLoc(SrcReg, Operands),
3420           "destination must be different than all sources");
3421         return false;
3422       }
3423     }
3424   }
3425 
3426   return true;
3427 }
3428 
3429 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3430 
3431   const unsigned Opc = Inst.getOpcode();
3432   const MCInstrDesc &Desc = MII.get(Opc);
3433 
3434   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3435     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3436     assert(ClampIdx != -1);
3437     return Inst.getOperand(ClampIdx).getImm() == 0;
3438   }
3439 
3440   return true;
3441 }
3442 
3443 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3444 
3445   const unsigned Opc = Inst.getOpcode();
3446   const MCInstrDesc &Desc = MII.get(Opc);
3447 
3448   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3449     return true;
3450 
3451   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3452   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3453   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3454 
3455   assert(VDataIdx != -1);
3456 
3457   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3458     return true;
3459 
3460   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3461   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3462   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3463   if (DMask == 0)
3464     DMask = 1;
3465 
3466   unsigned DataSize =
3467     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3468   if (hasPackedD16()) {
3469     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3470     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3471       DataSize = (DataSize + 1) / 2;
3472   }
3473 
3474   return (VDataSize / 4) == DataSize + TFESize;
3475 }
3476 
3477 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3478   const unsigned Opc = Inst.getOpcode();
3479   const MCInstrDesc &Desc = MII.get(Opc);
3480 
3481   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3482     return true;
3483 
3484   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3485 
3486   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3487       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3488   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3489   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3490   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3491   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3492 
3493   assert(VAddr0Idx != -1);
3494   assert(SrsrcIdx != -1);
3495   assert(SrsrcIdx > VAddr0Idx);
3496 
3497   if (DimIdx == -1)
3498     return true; // intersect_ray
3499 
3500   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3501   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3502   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3503   unsigned ActualAddrSize =
3504       IsNSA ? SrsrcIdx - VAddr0Idx
3505             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3506   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3507 
3508   unsigned ExpectedAddrSize =
3509       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3510 
3511   if (!IsNSA) {
3512     if (ExpectedAddrSize > 8)
3513       ExpectedAddrSize = 16;
3514 
3515     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3516     // This provides backward compatibility for assembly created
3517     // before 160b/192b/224b types were directly supported.
3518     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3519       return true;
3520   }
3521 
3522   return ActualAddrSize == ExpectedAddrSize;
3523 }
3524 
3525 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3526 
3527   const unsigned Opc = Inst.getOpcode();
3528   const MCInstrDesc &Desc = MII.get(Opc);
3529 
3530   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3531     return true;
3532   if (!Desc.mayLoad() || !Desc.mayStore())
3533     return true; // Not atomic
3534 
3535   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3536   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3537 
3538   // This is an incomplete check because image_atomic_cmpswap
3539   // may only use 0x3 and 0xf while other atomic operations
3540   // may use 0x1 and 0x3. However these limitations are
3541   // verified when we check that dmask matches dst size.
3542   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3543 }
3544 
3545 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3546 
3547   const unsigned Opc = Inst.getOpcode();
3548   const MCInstrDesc &Desc = MII.get(Opc);
3549 
3550   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3551     return true;
3552 
3553   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3554   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3555 
3556   // GATHER4 instructions use dmask in a different fashion compared to
3557   // other MIMG instructions. The only useful DMASK values are
3558   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3559   // (red,red,red,red) etc.) The ISA document doesn't mention
3560   // this.
3561   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3562 }
3563 
3564 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3565   const unsigned Opc = Inst.getOpcode();
3566   const MCInstrDesc &Desc = MII.get(Opc);
3567 
3568   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3569     return true;
3570 
3571   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3572   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3573       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3574 
3575   if (!BaseOpcode->MSAA)
3576     return true;
3577 
3578   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3579   assert(DimIdx != -1);
3580 
3581   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3582   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3583 
3584   return DimInfo->MSAA;
3585 }
3586 
3587 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3588 {
3589   switch (Opcode) {
3590   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3591   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3592   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3593     return true;
3594   default:
3595     return false;
3596   }
3597 }
3598 
3599 // movrels* opcodes should only allow VGPRS as src0.
3600 // This is specified in .td description for vop1/vop3,
3601 // but sdwa is handled differently. See isSDWAOperand.
3602 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3603                                       const OperandVector &Operands) {
3604 
3605   const unsigned Opc = Inst.getOpcode();
3606   const MCInstrDesc &Desc = MII.get(Opc);
3607 
3608   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3609     return true;
3610 
3611   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3612   assert(Src0Idx != -1);
3613 
3614   SMLoc ErrLoc;
3615   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3616   if (Src0.isReg()) {
3617     auto Reg = mc2PseudoReg(Src0.getReg());
3618     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3619     if (!isSGPR(Reg, TRI))
3620       return true;
3621     ErrLoc = getRegLoc(Reg, Operands);
3622   } else {
3623     ErrLoc = getConstLoc(Operands);
3624   }
3625 
3626   Error(ErrLoc, "source operand must be a VGPR");
3627   return false;
3628 }
3629 
3630 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3631                                           const OperandVector &Operands) {
3632 
3633   const unsigned Opc = Inst.getOpcode();
3634 
3635   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3636     return true;
3637 
3638   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3639   assert(Src0Idx != -1);
3640 
3641   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3642   if (!Src0.isReg())
3643     return true;
3644 
3645   auto Reg = mc2PseudoReg(Src0.getReg());
3646   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3647   if (isSGPR(Reg, TRI)) {
3648     Error(getRegLoc(Reg, Operands),
3649           "source operand must be either a VGPR or an inline constant");
3650     return false;
3651   }
3652 
3653   return true;
3654 }
3655 
3656 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3657                                    const OperandVector &Operands) {
3658   const unsigned Opc = Inst.getOpcode();
3659   const MCInstrDesc &Desc = MII.get(Opc);
3660 
3661   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3662     return true;
3663 
3664   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3665   if (Src2Idx == -1)
3666     return true;
3667 
3668   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3669   if (!Src2.isReg())
3670     return true;
3671 
3672   MCRegister Src2Reg = Src2.getReg();
3673   MCRegister DstReg = Inst.getOperand(0).getReg();
3674   if (Src2Reg == DstReg)
3675     return true;
3676 
3677   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3678   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3679     return true;
3680 
3681   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3682     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3683           "source 2 operand must not partially overlap with dst");
3684     return false;
3685   }
3686 
3687   return true;
3688 }
3689 
3690 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3691   switch (Inst.getOpcode()) {
3692   default:
3693     return true;
3694   case V_DIV_SCALE_F32_gfx6_gfx7:
3695   case V_DIV_SCALE_F32_vi:
3696   case V_DIV_SCALE_F32_gfx10:
3697   case V_DIV_SCALE_F64_gfx6_gfx7:
3698   case V_DIV_SCALE_F64_vi:
3699   case V_DIV_SCALE_F64_gfx10:
3700     break;
3701   }
3702 
3703   // TODO: Check that src0 = src1 or src2.
3704 
3705   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3706                     AMDGPU::OpName::src2_modifiers,
3707                     AMDGPU::OpName::src2_modifiers}) {
3708     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3709             .getImm() &
3710         SISrcMods::ABS) {
3711       return false;
3712     }
3713   }
3714 
3715   return true;
3716 }
3717 
3718 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3719 
3720   const unsigned Opc = Inst.getOpcode();
3721   const MCInstrDesc &Desc = MII.get(Opc);
3722 
3723   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3724     return true;
3725 
3726   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3727   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3728     if (isCI() || isSI())
3729       return false;
3730   }
3731 
3732   return true;
3733 }
3734 
3735 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3736   const unsigned Opc = Inst.getOpcode();
3737   const MCInstrDesc &Desc = MII.get(Opc);
3738 
3739   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3740     return true;
3741 
3742   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3743   if (DimIdx < 0)
3744     return true;
3745 
3746   long Imm = Inst.getOperand(DimIdx).getImm();
3747   if (Imm < 0 || Imm >= 8)
3748     return false;
3749 
3750   return true;
3751 }
3752 
3753 static bool IsRevOpcode(const unsigned Opcode)
3754 {
3755   switch (Opcode) {
3756   case AMDGPU::V_SUBREV_F32_e32:
3757   case AMDGPU::V_SUBREV_F32_e64:
3758   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3759   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3760   case AMDGPU::V_SUBREV_F32_e32_vi:
3761   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3762   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3763   case AMDGPU::V_SUBREV_F32_e64_vi:
3764 
3765   case AMDGPU::V_SUBREV_CO_U32_e32:
3766   case AMDGPU::V_SUBREV_CO_U32_e64:
3767   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3768   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3769 
3770   case AMDGPU::V_SUBBREV_U32_e32:
3771   case AMDGPU::V_SUBBREV_U32_e64:
3772   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3773   case AMDGPU::V_SUBBREV_U32_e32_vi:
3774   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3775   case AMDGPU::V_SUBBREV_U32_e64_vi:
3776 
3777   case AMDGPU::V_SUBREV_U32_e32:
3778   case AMDGPU::V_SUBREV_U32_e64:
3779   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3780   case AMDGPU::V_SUBREV_U32_e32_vi:
3781   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3782   case AMDGPU::V_SUBREV_U32_e64_vi:
3783 
3784   case AMDGPU::V_SUBREV_F16_e32:
3785   case AMDGPU::V_SUBREV_F16_e64:
3786   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3787   case AMDGPU::V_SUBREV_F16_e32_vi:
3788   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3789   case AMDGPU::V_SUBREV_F16_e64_vi:
3790 
3791   case AMDGPU::V_SUBREV_U16_e32:
3792   case AMDGPU::V_SUBREV_U16_e64:
3793   case AMDGPU::V_SUBREV_U16_e32_vi:
3794   case AMDGPU::V_SUBREV_U16_e64_vi:
3795 
3796   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3797   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3798   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3799 
3800   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3801   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3802 
3803   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3804   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3805 
3806   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3807   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3808 
3809   case AMDGPU::V_LSHRREV_B32_e32:
3810   case AMDGPU::V_LSHRREV_B32_e64:
3811   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3812   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3813   case AMDGPU::V_LSHRREV_B32_e32_vi:
3814   case AMDGPU::V_LSHRREV_B32_e64_vi:
3815   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3816   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3817 
3818   case AMDGPU::V_ASHRREV_I32_e32:
3819   case AMDGPU::V_ASHRREV_I32_e64:
3820   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3821   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3822   case AMDGPU::V_ASHRREV_I32_e32_vi:
3823   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3824   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3825   case AMDGPU::V_ASHRREV_I32_e64_vi:
3826 
3827   case AMDGPU::V_LSHLREV_B32_e32:
3828   case AMDGPU::V_LSHLREV_B32_e64:
3829   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3830   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3831   case AMDGPU::V_LSHLREV_B32_e32_vi:
3832   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3833   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3834   case AMDGPU::V_LSHLREV_B32_e64_vi:
3835 
3836   case AMDGPU::V_LSHLREV_B16_e32:
3837   case AMDGPU::V_LSHLREV_B16_e64:
3838   case AMDGPU::V_LSHLREV_B16_e32_vi:
3839   case AMDGPU::V_LSHLREV_B16_e64_vi:
3840   case AMDGPU::V_LSHLREV_B16_gfx10:
3841 
3842   case AMDGPU::V_LSHRREV_B16_e32:
3843   case AMDGPU::V_LSHRREV_B16_e64:
3844   case AMDGPU::V_LSHRREV_B16_e32_vi:
3845   case AMDGPU::V_LSHRREV_B16_e64_vi:
3846   case AMDGPU::V_LSHRREV_B16_gfx10:
3847 
3848   case AMDGPU::V_ASHRREV_I16_e32:
3849   case AMDGPU::V_ASHRREV_I16_e64:
3850   case AMDGPU::V_ASHRREV_I16_e32_vi:
3851   case AMDGPU::V_ASHRREV_I16_e64_vi:
3852   case AMDGPU::V_ASHRREV_I16_gfx10:
3853 
3854   case AMDGPU::V_LSHLREV_B64_e64:
3855   case AMDGPU::V_LSHLREV_B64_gfx10:
3856   case AMDGPU::V_LSHLREV_B64_vi:
3857 
3858   case AMDGPU::V_LSHRREV_B64_e64:
3859   case AMDGPU::V_LSHRREV_B64_gfx10:
3860   case AMDGPU::V_LSHRREV_B64_vi:
3861 
3862   case AMDGPU::V_ASHRREV_I64_e64:
3863   case AMDGPU::V_ASHRREV_I64_gfx10:
3864   case AMDGPU::V_ASHRREV_I64_vi:
3865 
3866   case AMDGPU::V_PK_LSHLREV_B16:
3867   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3868   case AMDGPU::V_PK_LSHLREV_B16_vi:
3869 
3870   case AMDGPU::V_PK_LSHRREV_B16:
3871   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3872   case AMDGPU::V_PK_LSHRREV_B16_vi:
3873   case AMDGPU::V_PK_ASHRREV_I16:
3874   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3875   case AMDGPU::V_PK_ASHRREV_I16_vi:
3876     return true;
3877   default:
3878     return false;
3879   }
3880 }
3881 
3882 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3883 
3884   using namespace SIInstrFlags;
3885   const unsigned Opcode = Inst.getOpcode();
3886   const MCInstrDesc &Desc = MII.get(Opcode);
3887 
3888   // lds_direct register is defined so that it can be used
3889   // with 9-bit operands only. Ignore encodings which do not accept these.
3890   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3891   if ((Desc.TSFlags & Enc) == 0)
3892     return None;
3893 
3894   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3895     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3896     if (SrcIdx == -1)
3897       break;
3898     const auto &Src = Inst.getOperand(SrcIdx);
3899     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3900 
3901       if (isGFX90A())
3902         return StringRef("lds_direct is not supported on this GPU");
3903 
3904       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3905         return StringRef("lds_direct cannot be used with this instruction");
3906 
3907       if (SrcName != OpName::src0)
3908         return StringRef("lds_direct may be used as src0 only");
3909     }
3910   }
3911 
3912   return None;
3913 }
3914 
3915 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3916   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3917     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3918     if (Op.isFlatOffset())
3919       return Op.getStartLoc();
3920   }
3921   return getLoc();
3922 }
3923 
3924 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3925                                          const OperandVector &Operands) {
3926   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3927   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3928     return true;
3929 
3930   auto Opcode = Inst.getOpcode();
3931   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3932   assert(OpNum != -1);
3933 
3934   const auto &Op = Inst.getOperand(OpNum);
3935   if (!hasFlatOffsets() && Op.getImm() != 0) {
3936     Error(getFlatOffsetLoc(Operands),
3937           "flat offset modifier is not supported on this GPU");
3938     return false;
3939   }
3940 
3941   // For FLAT segment the offset must be positive;
3942   // MSB is ignored and forced to zero.
3943   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3944     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3945     if (!isIntN(OffsetSize, Op.getImm())) {
3946       Error(getFlatOffsetLoc(Operands),
3947             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3948       return false;
3949     }
3950   } else {
3951     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3952     if (!isUIntN(OffsetSize, Op.getImm())) {
3953       Error(getFlatOffsetLoc(Operands),
3954             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3955       return false;
3956     }
3957   }
3958 
3959   return true;
3960 }
3961 
3962 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3963   // Start with second operand because SMEM Offset cannot be dst or src0.
3964   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3965     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3966     if (Op.isSMEMOffset())
3967       return Op.getStartLoc();
3968   }
3969   return getLoc();
3970 }
3971 
3972 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3973                                          const OperandVector &Operands) {
3974   if (isCI() || isSI())
3975     return true;
3976 
3977   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3978   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3979     return true;
3980 
3981   auto Opcode = Inst.getOpcode();
3982   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3983   if (OpNum == -1)
3984     return true;
3985 
3986   const auto &Op = Inst.getOperand(OpNum);
3987   if (!Op.isImm())
3988     return true;
3989 
3990   uint64_t Offset = Op.getImm();
3991   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3992   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3993       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3994     return true;
3995 
3996   Error(getSMEMOffsetLoc(Operands),
3997         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3998                                "expected a 21-bit signed offset");
3999 
4000   return false;
4001 }
4002 
4003 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4004   unsigned Opcode = Inst.getOpcode();
4005   const MCInstrDesc &Desc = MII.get(Opcode);
4006   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4007     return true;
4008 
4009   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4010   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4011 
4012   const int OpIndices[] = { Src0Idx, Src1Idx };
4013 
4014   unsigned NumExprs = 0;
4015   unsigned NumLiterals = 0;
4016   uint32_t LiteralValue;
4017 
4018   for (int OpIdx : OpIndices) {
4019     if (OpIdx == -1) break;
4020 
4021     const MCOperand &MO = Inst.getOperand(OpIdx);
4022     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4023     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4024       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4025         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4026         if (NumLiterals == 0 || LiteralValue != Value) {
4027           LiteralValue = Value;
4028           ++NumLiterals;
4029         }
4030       } else if (MO.isExpr()) {
4031         ++NumExprs;
4032       }
4033     }
4034   }
4035 
4036   return NumLiterals + NumExprs <= 1;
4037 }
4038 
4039 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4040   const unsigned Opc = Inst.getOpcode();
4041   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4042       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4043     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4044     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4045 
4046     if (OpSel & ~3)
4047       return false;
4048   }
4049 
4050   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4051     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4052     if (OpSelIdx != -1) {
4053       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4054         return false;
4055     }
4056     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4057     if (OpSelHiIdx != -1) {
4058       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4059         return false;
4060     }
4061   }
4062 
4063   return true;
4064 }
4065 
4066 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4067                                   const OperandVector &Operands) {
4068   const unsigned Opc = Inst.getOpcode();
4069   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4070   if (DppCtrlIdx < 0)
4071     return true;
4072   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4073 
4074   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4075     // DPP64 is supported for row_newbcast only.
4076     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4077     if (Src0Idx >= 0 &&
4078         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4079       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4080       Error(S, "64 bit dpp only supports row_newbcast");
4081       return false;
4082     }
4083   }
4084 
4085   return true;
4086 }
4087 
4088 // Check if VCC register matches wavefront size
4089 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4090   auto FB = getFeatureBits();
4091   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4092     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4093 }
4094 
4095 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4096 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4097                                          const OperandVector &Operands) {
4098   unsigned Opcode = Inst.getOpcode();
4099   const MCInstrDesc &Desc = MII.get(Opcode);
4100   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4101   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4102       ImmIdx == -1)
4103     return true;
4104 
4105   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4106   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4107   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4108 
4109   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4110 
4111   unsigned NumExprs = 0;
4112   unsigned NumLiterals = 0;
4113   uint32_t LiteralValue;
4114 
4115   for (int OpIdx : OpIndices) {
4116     if (OpIdx == -1)
4117       continue;
4118 
4119     const MCOperand &MO = Inst.getOperand(OpIdx);
4120     if (!MO.isImm() && !MO.isExpr())
4121       continue;
4122     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4123       continue;
4124 
4125     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4126         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4127       Error(getConstLoc(Operands),
4128             "inline constants are not allowed for this operand");
4129       return false;
4130     }
4131 
4132     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4133       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4134       if (NumLiterals == 0 || LiteralValue != Value) {
4135         LiteralValue = Value;
4136         ++NumLiterals;
4137       }
4138     } else if (MO.isExpr()) {
4139       ++NumExprs;
4140     }
4141   }
4142   NumLiterals += NumExprs;
4143 
4144   if (!NumLiterals)
4145     return true;
4146 
4147   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4148     Error(getLitLoc(Operands), "literal operands are not supported");
4149     return false;
4150   }
4151 
4152   if (NumLiterals > 1) {
4153     Error(getLitLoc(Operands), "only one literal operand is allowed");
4154     return false;
4155   }
4156 
4157   return true;
4158 }
4159 
4160 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4161 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4162                          const MCRegisterInfo *MRI) {
4163   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4164   if (OpIdx < 0)
4165     return -1;
4166 
4167   const MCOperand &Op = Inst.getOperand(OpIdx);
4168   if (!Op.isReg())
4169     return -1;
4170 
4171   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4172   auto Reg = Sub ? Sub : Op.getReg();
4173   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4174   return AGPR32.contains(Reg) ? 1 : 0;
4175 }
4176 
4177 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4178   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4179   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4180                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4181                   SIInstrFlags::DS)) == 0)
4182     return true;
4183 
4184   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4185                                                       : AMDGPU::OpName::vdata;
4186 
4187   const MCRegisterInfo *MRI = getMRI();
4188   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4189   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4190 
4191   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4192     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4193     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4194       return false;
4195   }
4196 
4197   auto FB = getFeatureBits();
4198   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4199     if (DataAreg < 0 || DstAreg < 0)
4200       return true;
4201     return DstAreg == DataAreg;
4202   }
4203 
4204   return DstAreg < 1 && DataAreg < 1;
4205 }
4206 
4207 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4208   auto FB = getFeatureBits();
4209   if (!FB[AMDGPU::FeatureGFX90AInsts])
4210     return true;
4211 
4212   const MCRegisterInfo *MRI = getMRI();
4213   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4214   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4215   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4216     const MCOperand &Op = Inst.getOperand(I);
4217     if (!Op.isReg())
4218       continue;
4219 
4220     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4221     if (!Sub)
4222       continue;
4223 
4224     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4225       return false;
4226     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4227       return false;
4228   }
4229 
4230   return true;
4231 }
4232 
4233 // gfx90a has an undocumented limitation:
4234 // DS_GWS opcodes must use even aligned registers.
4235 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4236                                   const OperandVector &Operands) {
4237   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4238     return true;
4239 
4240   int Opc = Inst.getOpcode();
4241   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4242       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4243     return true;
4244 
4245   const MCRegisterInfo *MRI = getMRI();
4246   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4247   int Data0Pos =
4248       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4249   assert(Data0Pos != -1);
4250   auto Reg = Inst.getOperand(Data0Pos).getReg();
4251   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4252   if (RegIdx & 1) {
4253     SMLoc RegLoc = getRegLoc(Reg, Operands);
4254     Error(RegLoc, "vgpr must be even aligned");
4255     return false;
4256   }
4257 
4258   return true;
4259 }
4260 
4261 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4262                                             const OperandVector &Operands,
4263                                             const SMLoc &IDLoc) {
4264   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4265                                            AMDGPU::OpName::cpol);
4266   if (CPolPos == -1)
4267     return true;
4268 
4269   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4270 
4271   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4272   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4273       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4274     Error(IDLoc, "invalid cache policy for SMRD instruction");
4275     return false;
4276   }
4277 
4278   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4279     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4280     StringRef CStr(S.getPointer());
4281     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4282     Error(S, "scc is not supported on this GPU");
4283     return false;
4284   }
4285 
4286   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4287     return true;
4288 
4289   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4290     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4291       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4292                               : "instruction must use glc");
4293       return false;
4294     }
4295   } else {
4296     if (CPol & CPol::GLC) {
4297       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4298       StringRef CStr(S.getPointer());
4299       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4300       Error(S, isGFX940() ? "instruction must not use sc0"
4301                           : "instruction must not use glc");
4302       return false;
4303     }
4304   }
4305 
4306   return true;
4307 }
4308 
4309 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4310                                           const SMLoc &IDLoc,
4311                                           const OperandVector &Operands) {
4312   if (auto ErrMsg = validateLdsDirect(Inst)) {
4313     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4314     return false;
4315   }
4316   if (!validateSOPLiteral(Inst)) {
4317     Error(getLitLoc(Operands),
4318       "only one literal operand is allowed");
4319     return false;
4320   }
4321   if (!validateVOPLiteral(Inst, Operands)) {
4322     return false;
4323   }
4324   if (!validateConstantBusLimitations(Inst, Operands)) {
4325     return false;
4326   }
4327   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4328     return false;
4329   }
4330   if (!validateIntClampSupported(Inst)) {
4331     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4332       "integer clamping is not supported on this GPU");
4333     return false;
4334   }
4335   if (!validateOpSel(Inst)) {
4336     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4337       "invalid op_sel operand");
4338     return false;
4339   }
4340   if (!validateDPP(Inst, Operands)) {
4341     return false;
4342   }
4343   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4344   if (!validateMIMGD16(Inst)) {
4345     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4346       "d16 modifier is not supported on this GPU");
4347     return false;
4348   }
4349   if (!validateMIMGDim(Inst)) {
4350     Error(IDLoc, "dim modifier is required on this GPU");
4351     return false;
4352   }
4353   if (!validateMIMGMSAA(Inst)) {
4354     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4355           "invalid dim; must be MSAA type");
4356     return false;
4357   }
4358   if (!validateMIMGDataSize(Inst)) {
4359     Error(IDLoc,
4360       "image data size does not match dmask and tfe");
4361     return false;
4362   }
4363   if (!validateMIMGAddrSize(Inst)) {
4364     Error(IDLoc,
4365       "image address size does not match dim and a16");
4366     return false;
4367   }
4368   if (!validateMIMGAtomicDMask(Inst)) {
4369     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4370       "invalid atomic image dmask");
4371     return false;
4372   }
4373   if (!validateMIMGGatherDMask(Inst)) {
4374     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4375       "invalid image_gather dmask: only one bit must be set");
4376     return false;
4377   }
4378   if (!validateMovrels(Inst, Operands)) {
4379     return false;
4380   }
4381   if (!validateFlatOffset(Inst, Operands)) {
4382     return false;
4383   }
4384   if (!validateSMEMOffset(Inst, Operands)) {
4385     return false;
4386   }
4387   if (!validateMAIAccWrite(Inst, Operands)) {
4388     return false;
4389   }
4390   if (!validateMFMA(Inst, Operands)) {
4391     return false;
4392   }
4393   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4394     return false;
4395   }
4396 
4397   if (!validateAGPRLdSt(Inst)) {
4398     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4399     ? "invalid register class: data and dst should be all VGPR or AGPR"
4400     : "invalid register class: agpr loads and stores not supported on this GPU"
4401     );
4402     return false;
4403   }
4404   if (!validateVGPRAlign(Inst)) {
4405     Error(IDLoc,
4406       "invalid register class: vgpr tuples must be 64 bit aligned");
4407     return false;
4408   }
4409   if (!validateGWS(Inst, Operands)) {
4410     return false;
4411   }
4412 
4413   if (!validateDivScale(Inst)) {
4414     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4415     return false;
4416   }
4417   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4418     return false;
4419   }
4420 
4421   return true;
4422 }
4423 
4424 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4425                                             const FeatureBitset &FBS,
4426                                             unsigned VariantID = 0);
4427 
4428 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4429                                 const FeatureBitset &AvailableFeatures,
4430                                 unsigned VariantID);
4431 
4432 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4433                                        const FeatureBitset &FBS) {
4434   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4435 }
4436 
4437 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4438                                        const FeatureBitset &FBS,
4439                                        ArrayRef<unsigned> Variants) {
4440   for (auto Variant : Variants) {
4441     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4442       return true;
4443   }
4444 
4445   return false;
4446 }
4447 
4448 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4449                                                   const SMLoc &IDLoc) {
4450   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4451 
4452   // Check if requested instruction variant is supported.
4453   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4454     return false;
4455 
4456   // This instruction is not supported.
4457   // Clear any other pending errors because they are no longer relevant.
4458   getParser().clearPendingErrors();
4459 
4460   // Requested instruction variant is not supported.
4461   // Check if any other variants are supported.
4462   StringRef VariantName = getMatchedVariantName();
4463   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4464     return Error(IDLoc,
4465                  Twine(VariantName,
4466                        " variant of this instruction is not supported"));
4467   }
4468 
4469   // Finally check if this instruction is supported on any other GPU.
4470   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4471     return Error(IDLoc, "instruction not supported on this GPU");
4472   }
4473 
4474   // Instruction not supported on any GPU. Probably a typo.
4475   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4476   return Error(IDLoc, "invalid instruction" + Suggestion);
4477 }
4478 
4479 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4480                                               OperandVector &Operands,
4481                                               MCStreamer &Out,
4482                                               uint64_t &ErrorInfo,
4483                                               bool MatchingInlineAsm) {
4484   MCInst Inst;
4485   unsigned Result = Match_Success;
4486   for (auto Variant : getMatchedVariants()) {
4487     uint64_t EI;
4488     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4489                                   Variant);
4490     // We order match statuses from least to most specific. We use most specific
4491     // status as resulting
4492     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4493     if ((R == Match_Success) ||
4494         (R == Match_PreferE32) ||
4495         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4496         (R == Match_InvalidOperand && Result != Match_MissingFeature
4497                                    && Result != Match_PreferE32) ||
4498         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4499                                    && Result != Match_MissingFeature
4500                                    && Result != Match_PreferE32)) {
4501       Result = R;
4502       ErrorInfo = EI;
4503     }
4504     if (R == Match_Success)
4505       break;
4506   }
4507 
4508   if (Result == Match_Success) {
4509     if (!validateInstruction(Inst, IDLoc, Operands)) {
4510       return true;
4511     }
4512     Inst.setLoc(IDLoc);
4513     Out.emitInstruction(Inst, getSTI());
4514     return false;
4515   }
4516 
4517   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4518   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4519     return true;
4520   }
4521 
4522   switch (Result) {
4523   default: break;
4524   case Match_MissingFeature:
4525     // It has been verified that the specified instruction
4526     // mnemonic is valid. A match was found but it requires
4527     // features which are not supported on this GPU.
4528     return Error(IDLoc, "operands are not valid for this GPU or mode");
4529 
4530   case Match_InvalidOperand: {
4531     SMLoc ErrorLoc = IDLoc;
4532     if (ErrorInfo != ~0ULL) {
4533       if (ErrorInfo >= Operands.size()) {
4534         return Error(IDLoc, "too few operands for instruction");
4535       }
4536       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4537       if (ErrorLoc == SMLoc())
4538         ErrorLoc = IDLoc;
4539     }
4540     return Error(ErrorLoc, "invalid operand for instruction");
4541   }
4542 
4543   case Match_PreferE32:
4544     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4545                         "should be encoded as e32");
4546   case Match_MnemonicFail:
4547     llvm_unreachable("Invalid instructions should have been handled already");
4548   }
4549   llvm_unreachable("Implement any new match types added!");
4550 }
4551 
4552 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4553   int64_t Tmp = -1;
4554   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4555     return true;
4556   }
4557   if (getParser().parseAbsoluteExpression(Tmp)) {
4558     return true;
4559   }
4560   Ret = static_cast<uint32_t>(Tmp);
4561   return false;
4562 }
4563 
4564 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4565                                                uint32_t &Minor) {
4566   if (ParseAsAbsoluteExpression(Major))
4567     return TokError("invalid major version");
4568 
4569   if (!trySkipToken(AsmToken::Comma))
4570     return TokError("minor version number required, comma expected");
4571 
4572   if (ParseAsAbsoluteExpression(Minor))
4573     return TokError("invalid minor version");
4574 
4575   return false;
4576 }
4577 
4578 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4579   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4580     return TokError("directive only supported for amdgcn architecture");
4581 
4582   std::string TargetIDDirective;
4583   SMLoc TargetStart = getTok().getLoc();
4584   if (getParser().parseEscapedString(TargetIDDirective))
4585     return true;
4586 
4587   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4588   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4589     return getParser().Error(TargetRange.Start,
4590         (Twine(".amdgcn_target directive's target id ") +
4591          Twine(TargetIDDirective) +
4592          Twine(" does not match the specified target id ") +
4593          Twine(getTargetStreamer().getTargetID()->toString())).str());
4594 
4595   return false;
4596 }
4597 
4598 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4599   return Error(Range.Start, "value out of range", Range);
4600 }
4601 
4602 bool AMDGPUAsmParser::calculateGPRBlocks(
4603     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4604     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4605     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4606     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4607   // TODO(scott.linder): These calculations are duplicated from
4608   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4609   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4610 
4611   unsigned NumVGPRs = NextFreeVGPR;
4612   unsigned NumSGPRs = NextFreeSGPR;
4613 
4614   if (Version.Major >= 10)
4615     NumSGPRs = 0;
4616   else {
4617     unsigned MaxAddressableNumSGPRs =
4618         IsaInfo::getAddressableNumSGPRs(&getSTI());
4619 
4620     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4621         NumSGPRs > MaxAddressableNumSGPRs)
4622       return OutOfRangeError(SGPRRange);
4623 
4624     NumSGPRs +=
4625         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4626 
4627     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4628         NumSGPRs > MaxAddressableNumSGPRs)
4629       return OutOfRangeError(SGPRRange);
4630 
4631     if (Features.test(FeatureSGPRInitBug))
4632       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4633   }
4634 
4635   VGPRBlocks =
4636       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4637   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4638 
4639   return false;
4640 }
4641 
4642 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4643   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4644     return TokError("directive only supported for amdgcn architecture");
4645 
4646   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4647     return TokError("directive only supported for amdhsa OS");
4648 
4649   StringRef KernelName;
4650   if (getParser().parseIdentifier(KernelName))
4651     return true;
4652 
4653   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4654 
4655   StringSet<> Seen;
4656 
4657   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4658 
4659   SMRange VGPRRange;
4660   uint64_t NextFreeVGPR = 0;
4661   uint64_t AccumOffset = 0;
4662   uint64_t SharedVGPRCount = 0;
4663   SMRange SGPRRange;
4664   uint64_t NextFreeSGPR = 0;
4665 
4666   // Count the number of user SGPRs implied from the enabled feature bits.
4667   unsigned ImpliedUserSGPRCount = 0;
4668 
4669   // Track if the asm explicitly contains the directive for the user SGPR
4670   // count.
4671   Optional<unsigned> ExplicitUserSGPRCount;
4672   bool ReserveVCC = true;
4673   bool ReserveFlatScr = true;
4674   Optional<bool> EnableWavefrontSize32;
4675 
4676   while (true) {
4677     while (trySkipToken(AsmToken::EndOfStatement));
4678 
4679     StringRef ID;
4680     SMRange IDRange = getTok().getLocRange();
4681     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4682       return true;
4683 
4684     if (ID == ".end_amdhsa_kernel")
4685       break;
4686 
4687     if (Seen.find(ID) != Seen.end())
4688       return TokError(".amdhsa_ directives cannot be repeated");
4689     Seen.insert(ID);
4690 
4691     SMLoc ValStart = getLoc();
4692     int64_t IVal;
4693     if (getParser().parseAbsoluteExpression(IVal))
4694       return true;
4695     SMLoc ValEnd = getLoc();
4696     SMRange ValRange = SMRange(ValStart, ValEnd);
4697 
4698     if (IVal < 0)
4699       return OutOfRangeError(ValRange);
4700 
4701     uint64_t Val = IVal;
4702 
4703 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4704   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4705     return OutOfRangeError(RANGE);                                             \
4706   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4707 
4708     if (ID == ".amdhsa_group_segment_fixed_size") {
4709       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4710         return OutOfRangeError(ValRange);
4711       KD.group_segment_fixed_size = Val;
4712     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4713       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4714         return OutOfRangeError(ValRange);
4715       KD.private_segment_fixed_size = Val;
4716     } else if (ID == ".amdhsa_kernarg_size") {
4717       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4718         return OutOfRangeError(ValRange);
4719       KD.kernarg_size = Val;
4720     } else if (ID == ".amdhsa_user_sgpr_count") {
4721       ExplicitUserSGPRCount = Val;
4722     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4723       if (hasArchitectedFlatScratch())
4724         return Error(IDRange.Start,
4725                      "directive is not supported with architected flat scratch",
4726                      IDRange);
4727       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4728                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4729                        Val, ValRange);
4730       if (Val)
4731         ImpliedUserSGPRCount += 4;
4732     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4733       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4734                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4735                        ValRange);
4736       if (Val)
4737         ImpliedUserSGPRCount += 2;
4738     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4739       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4740                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4741                        ValRange);
4742       if (Val)
4743         ImpliedUserSGPRCount += 2;
4744     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4745       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4746                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4747                        Val, ValRange);
4748       if (Val)
4749         ImpliedUserSGPRCount += 2;
4750     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4751       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4752                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4753                        ValRange);
4754       if (Val)
4755         ImpliedUserSGPRCount += 2;
4756     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4757       if (hasArchitectedFlatScratch())
4758         return Error(IDRange.Start,
4759                      "directive is not supported with architected flat scratch",
4760                      IDRange);
4761       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4762                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4763                        ValRange);
4764       if (Val)
4765         ImpliedUserSGPRCount += 2;
4766     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4767       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4768                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4769                        Val, ValRange);
4770       if (Val)
4771         ImpliedUserSGPRCount += 1;
4772     } else if (ID == ".amdhsa_wavefront_size32") {
4773       if (IVersion.Major < 10)
4774         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4775       EnableWavefrontSize32 = Val;
4776       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4777                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4778                        Val, ValRange);
4779     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4780       if (hasArchitectedFlatScratch())
4781         return Error(IDRange.Start,
4782                      "directive is not supported with architected flat scratch",
4783                      IDRange);
4784       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4785                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4786     } else if (ID == ".amdhsa_enable_private_segment") {
4787       if (!hasArchitectedFlatScratch())
4788         return Error(
4789             IDRange.Start,
4790             "directive is not supported without architected flat scratch",
4791             IDRange);
4792       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4793                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4794     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4795       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4796                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4797                        ValRange);
4798     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4799       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4800                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4801                        ValRange);
4802     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4803       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4804                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4805                        ValRange);
4806     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4807       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4808                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4809                        ValRange);
4810     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4811       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4812                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4813                        ValRange);
4814     } else if (ID == ".amdhsa_next_free_vgpr") {
4815       VGPRRange = ValRange;
4816       NextFreeVGPR = Val;
4817     } else if (ID == ".amdhsa_next_free_sgpr") {
4818       SGPRRange = ValRange;
4819       NextFreeSGPR = Val;
4820     } else if (ID == ".amdhsa_accum_offset") {
4821       if (!isGFX90A())
4822         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4823       AccumOffset = Val;
4824     } else if (ID == ".amdhsa_reserve_vcc") {
4825       if (!isUInt<1>(Val))
4826         return OutOfRangeError(ValRange);
4827       ReserveVCC = Val;
4828     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4829       if (IVersion.Major < 7)
4830         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4831       if (hasArchitectedFlatScratch())
4832         return Error(IDRange.Start,
4833                      "directive is not supported with architected flat scratch",
4834                      IDRange);
4835       if (!isUInt<1>(Val))
4836         return OutOfRangeError(ValRange);
4837       ReserveFlatScr = Val;
4838     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4839       if (IVersion.Major < 8)
4840         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4841       if (!isUInt<1>(Val))
4842         return OutOfRangeError(ValRange);
4843       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4844         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4845                                  IDRange);
4846     } else if (ID == ".amdhsa_float_round_mode_32") {
4847       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4848                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4849     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4850       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4851                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4852     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4853       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4854                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4855     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4856       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4857                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4858                        ValRange);
4859     } else if (ID == ".amdhsa_dx10_clamp") {
4860       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4861                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4862     } else if (ID == ".amdhsa_ieee_mode") {
4863       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4864                        Val, ValRange);
4865     } else if (ID == ".amdhsa_fp16_overflow") {
4866       if (IVersion.Major < 9)
4867         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4868       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4869                        ValRange);
4870     } else if (ID == ".amdhsa_tg_split") {
4871       if (!isGFX90A())
4872         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4873       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4874                        ValRange);
4875     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4876       if (IVersion.Major < 10)
4877         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4878       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4879                        ValRange);
4880     } else if (ID == ".amdhsa_memory_ordered") {
4881       if (IVersion.Major < 10)
4882         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4883       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4884                        ValRange);
4885     } else if (ID == ".amdhsa_forward_progress") {
4886       if (IVersion.Major < 10)
4887         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4888       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4889                        ValRange);
4890     } else if (ID == ".amdhsa_shared_vgpr_count") {
4891       if (IVersion.Major < 10)
4892         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4893       SharedVGPRCount = Val;
4894       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
4895                        COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
4896                        ValRange);
4897     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4898       PARSE_BITS_ENTRY(
4899           KD.compute_pgm_rsrc2,
4900           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4901           ValRange);
4902     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4903       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4904                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4905                        Val, ValRange);
4906     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4907       PARSE_BITS_ENTRY(
4908           KD.compute_pgm_rsrc2,
4909           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4910           ValRange);
4911     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4912       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4913                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4914                        Val, ValRange);
4915     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4916       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4917                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4918                        Val, ValRange);
4919     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4920       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4921                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4922                        Val, ValRange);
4923     } else if (ID == ".amdhsa_exception_int_div_zero") {
4924       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4925                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4926                        Val, ValRange);
4927     } else {
4928       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4929     }
4930 
4931 #undef PARSE_BITS_ENTRY
4932   }
4933 
4934   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4935     return TokError(".amdhsa_next_free_vgpr directive is required");
4936 
4937   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4938     return TokError(".amdhsa_next_free_sgpr directive is required");
4939 
4940   unsigned VGPRBlocks;
4941   unsigned SGPRBlocks;
4942   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4943                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4944                          EnableWavefrontSize32, NextFreeVGPR,
4945                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4946                          SGPRBlocks))
4947     return true;
4948 
4949   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4950           VGPRBlocks))
4951     return OutOfRangeError(VGPRRange);
4952   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4953                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4954 
4955   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4956           SGPRBlocks))
4957     return OutOfRangeError(SGPRRange);
4958   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4959                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4960                   SGPRBlocks);
4961 
4962   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
4963     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
4964                     "enabled user SGPRs");
4965 
4966   unsigned UserSGPRCount =
4967       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
4968 
4969   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4970     return TokError("too many user SGPRs enabled");
4971   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4972                   UserSGPRCount);
4973 
4974   if (isGFX90A()) {
4975     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4976       return TokError(".amdhsa_accum_offset directive is required");
4977     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4978       return TokError("accum_offset should be in range [4..256] in "
4979                       "increments of 4");
4980     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4981       return TokError("accum_offset exceeds total VGPR allocation");
4982     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4983                     (AccumOffset / 4 - 1));
4984   }
4985 
4986   if (IVersion.Major == 10) {
4987     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
4988     if (SharedVGPRCount && EnableWavefrontSize32) {
4989       return TokError("shared_vgpr_count directive not valid on "
4990                       "wavefront size 32");
4991     }
4992     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
4993       return TokError("shared_vgpr_count*2 + "
4994                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
4995                       "exceed 63\n");
4996     }
4997   }
4998 
4999   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5000       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5001       ReserveFlatScr);
5002   return false;
5003 }
5004 
5005 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5006   uint32_t Major;
5007   uint32_t Minor;
5008 
5009   if (ParseDirectiveMajorMinor(Major, Minor))
5010     return true;
5011 
5012   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5013   return false;
5014 }
5015 
5016 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5017   uint32_t Major;
5018   uint32_t Minor;
5019   uint32_t Stepping;
5020   StringRef VendorName;
5021   StringRef ArchName;
5022 
5023   // If this directive has no arguments, then use the ISA version for the
5024   // targeted GPU.
5025   if (isToken(AsmToken::EndOfStatement)) {
5026     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5027     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5028                                                         ISA.Stepping,
5029                                                         "AMD", "AMDGPU");
5030     return false;
5031   }
5032 
5033   if (ParseDirectiveMajorMinor(Major, Minor))
5034     return true;
5035 
5036   if (!trySkipToken(AsmToken::Comma))
5037     return TokError("stepping version number required, comma expected");
5038 
5039   if (ParseAsAbsoluteExpression(Stepping))
5040     return TokError("invalid stepping version");
5041 
5042   if (!trySkipToken(AsmToken::Comma))
5043     return TokError("vendor name required, comma expected");
5044 
5045   if (!parseString(VendorName, "invalid vendor name"))
5046     return true;
5047 
5048   if (!trySkipToken(AsmToken::Comma))
5049     return TokError("arch name required, comma expected");
5050 
5051   if (!parseString(ArchName, "invalid arch name"))
5052     return true;
5053 
5054   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5055                                                       VendorName, ArchName);
5056   return false;
5057 }
5058 
5059 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5060                                                amd_kernel_code_t &Header) {
5061   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5062   // assembly for backwards compatibility.
5063   if (ID == "max_scratch_backing_memory_byte_size") {
5064     Parser.eatToEndOfStatement();
5065     return false;
5066   }
5067 
5068   SmallString<40> ErrStr;
5069   raw_svector_ostream Err(ErrStr);
5070   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5071     return TokError(Err.str());
5072   }
5073   Lex();
5074 
5075   if (ID == "enable_wavefront_size32") {
5076     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5077       if (!isGFX10Plus())
5078         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5079       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5080         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5081     } else {
5082       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5083         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5084     }
5085   }
5086 
5087   if (ID == "wavefront_size") {
5088     if (Header.wavefront_size == 5) {
5089       if (!isGFX10Plus())
5090         return TokError("wavefront_size=5 is only allowed on GFX10+");
5091       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5092         return TokError("wavefront_size=5 requires +WavefrontSize32");
5093     } else if (Header.wavefront_size == 6) {
5094       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5095         return TokError("wavefront_size=6 requires +WavefrontSize64");
5096     }
5097   }
5098 
5099   if (ID == "enable_wgp_mode") {
5100     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5101         !isGFX10Plus())
5102       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5103   }
5104 
5105   if (ID == "enable_mem_ordered") {
5106     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5107         !isGFX10Plus())
5108       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5109   }
5110 
5111   if (ID == "enable_fwd_progress") {
5112     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5113         !isGFX10Plus())
5114       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5115   }
5116 
5117   return false;
5118 }
5119 
5120 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5121   amd_kernel_code_t Header;
5122   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5123 
5124   while (true) {
5125     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5126     // will set the current token to EndOfStatement.
5127     while(trySkipToken(AsmToken::EndOfStatement));
5128 
5129     StringRef ID;
5130     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5131       return true;
5132 
5133     if (ID == ".end_amd_kernel_code_t")
5134       break;
5135 
5136     if (ParseAMDKernelCodeTValue(ID, Header))
5137       return true;
5138   }
5139 
5140   getTargetStreamer().EmitAMDKernelCodeT(Header);
5141 
5142   return false;
5143 }
5144 
5145 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5146   StringRef KernelName;
5147   if (!parseId(KernelName, "expected symbol name"))
5148     return true;
5149 
5150   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5151                                            ELF::STT_AMDGPU_HSA_KERNEL);
5152 
5153   KernelScope.initialize(getContext());
5154   return false;
5155 }
5156 
5157 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5158   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5159     return Error(getLoc(),
5160                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5161                  "architectures");
5162   }
5163 
5164   auto TargetIDDirective = getLexer().getTok().getStringContents();
5165   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5166     return Error(getParser().getTok().getLoc(), "target id must match options");
5167 
5168   getTargetStreamer().EmitISAVersion();
5169   Lex();
5170 
5171   return false;
5172 }
5173 
5174 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5175   const char *AssemblerDirectiveBegin;
5176   const char *AssemblerDirectiveEnd;
5177   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5178       isHsaAbiVersion3AndAbove(&getSTI())
5179           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5180                             HSAMD::V3::AssemblerDirectiveEnd)
5181           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5182                             HSAMD::AssemblerDirectiveEnd);
5183 
5184   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5185     return Error(getLoc(),
5186                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5187                  "not available on non-amdhsa OSes")).str());
5188   }
5189 
5190   std::string HSAMetadataString;
5191   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5192                           HSAMetadataString))
5193     return true;
5194 
5195   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5196     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5197       return Error(getLoc(), "invalid HSA metadata");
5198   } else {
5199     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5200       return Error(getLoc(), "invalid HSA metadata");
5201   }
5202 
5203   return false;
5204 }
5205 
5206 /// Common code to parse out a block of text (typically YAML) between start and
5207 /// end directives.
5208 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5209                                           const char *AssemblerDirectiveEnd,
5210                                           std::string &CollectString) {
5211 
5212   raw_string_ostream CollectStream(CollectString);
5213 
5214   getLexer().setSkipSpace(false);
5215 
5216   bool FoundEnd = false;
5217   while (!isToken(AsmToken::Eof)) {
5218     while (isToken(AsmToken::Space)) {
5219       CollectStream << getTokenStr();
5220       Lex();
5221     }
5222 
5223     if (trySkipId(AssemblerDirectiveEnd)) {
5224       FoundEnd = true;
5225       break;
5226     }
5227 
5228     CollectStream << Parser.parseStringToEndOfStatement()
5229                   << getContext().getAsmInfo()->getSeparatorString();
5230 
5231     Parser.eatToEndOfStatement();
5232   }
5233 
5234   getLexer().setSkipSpace(true);
5235 
5236   if (isToken(AsmToken::Eof) && !FoundEnd) {
5237     return TokError(Twine("expected directive ") +
5238                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5239   }
5240 
5241   CollectStream.flush();
5242   return false;
5243 }
5244 
5245 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5246 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5247   std::string String;
5248   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5249                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5250     return true;
5251 
5252   auto PALMetadata = getTargetStreamer().getPALMetadata();
5253   if (!PALMetadata->setFromString(String))
5254     return Error(getLoc(), "invalid PAL metadata");
5255   return false;
5256 }
5257 
5258 /// Parse the assembler directive for old linear-format PAL metadata.
5259 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5260   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5261     return Error(getLoc(),
5262                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5263                  "not available on non-amdpal OSes")).str());
5264   }
5265 
5266   auto PALMetadata = getTargetStreamer().getPALMetadata();
5267   PALMetadata->setLegacy();
5268   for (;;) {
5269     uint32_t Key, Value;
5270     if (ParseAsAbsoluteExpression(Key)) {
5271       return TokError(Twine("invalid value in ") +
5272                       Twine(PALMD::AssemblerDirective));
5273     }
5274     if (!trySkipToken(AsmToken::Comma)) {
5275       return TokError(Twine("expected an even number of values in ") +
5276                       Twine(PALMD::AssemblerDirective));
5277     }
5278     if (ParseAsAbsoluteExpression(Value)) {
5279       return TokError(Twine("invalid value in ") +
5280                       Twine(PALMD::AssemblerDirective));
5281     }
5282     PALMetadata->setRegister(Key, Value);
5283     if (!trySkipToken(AsmToken::Comma))
5284       break;
5285   }
5286   return false;
5287 }
5288 
5289 /// ParseDirectiveAMDGPULDS
5290 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5291 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5292   if (getParser().checkForValidSection())
5293     return true;
5294 
5295   StringRef Name;
5296   SMLoc NameLoc = getLoc();
5297   if (getParser().parseIdentifier(Name))
5298     return TokError("expected identifier in directive");
5299 
5300   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5301   if (parseToken(AsmToken::Comma, "expected ','"))
5302     return true;
5303 
5304   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5305 
5306   int64_t Size;
5307   SMLoc SizeLoc = getLoc();
5308   if (getParser().parseAbsoluteExpression(Size))
5309     return true;
5310   if (Size < 0)
5311     return Error(SizeLoc, "size must be non-negative");
5312   if (Size > LocalMemorySize)
5313     return Error(SizeLoc, "size is too large");
5314 
5315   int64_t Alignment = 4;
5316   if (trySkipToken(AsmToken::Comma)) {
5317     SMLoc AlignLoc = getLoc();
5318     if (getParser().parseAbsoluteExpression(Alignment))
5319       return true;
5320     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5321       return Error(AlignLoc, "alignment must be a power of two");
5322 
5323     // Alignment larger than the size of LDS is possible in theory, as long
5324     // as the linker manages to place to symbol at address 0, but we do want
5325     // to make sure the alignment fits nicely into a 32-bit integer.
5326     if (Alignment >= 1u << 31)
5327       return Error(AlignLoc, "alignment is too large");
5328   }
5329 
5330   if (parseToken(AsmToken::EndOfStatement,
5331                  "unexpected token in '.amdgpu_lds' directive"))
5332     return true;
5333 
5334   Symbol->redefineIfPossible();
5335   if (!Symbol->isUndefined())
5336     return Error(NameLoc, "invalid symbol redefinition");
5337 
5338   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5339   return false;
5340 }
5341 
5342 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5343   StringRef IDVal = DirectiveID.getString();
5344 
5345   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5346     if (IDVal == ".amdhsa_kernel")
5347      return ParseDirectiveAMDHSAKernel();
5348 
5349     // TODO: Restructure/combine with PAL metadata directive.
5350     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5351       return ParseDirectiveHSAMetadata();
5352   } else {
5353     if (IDVal == ".hsa_code_object_version")
5354       return ParseDirectiveHSACodeObjectVersion();
5355 
5356     if (IDVal == ".hsa_code_object_isa")
5357       return ParseDirectiveHSACodeObjectISA();
5358 
5359     if (IDVal == ".amd_kernel_code_t")
5360       return ParseDirectiveAMDKernelCodeT();
5361 
5362     if (IDVal == ".amdgpu_hsa_kernel")
5363       return ParseDirectiveAMDGPUHsaKernel();
5364 
5365     if (IDVal == ".amd_amdgpu_isa")
5366       return ParseDirectiveISAVersion();
5367 
5368     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5369       return ParseDirectiveHSAMetadata();
5370   }
5371 
5372   if (IDVal == ".amdgcn_target")
5373     return ParseDirectiveAMDGCNTarget();
5374 
5375   if (IDVal == ".amdgpu_lds")
5376     return ParseDirectiveAMDGPULDS();
5377 
5378   if (IDVal == PALMD::AssemblerDirectiveBegin)
5379     return ParseDirectivePALMetadataBegin();
5380 
5381   if (IDVal == PALMD::AssemblerDirective)
5382     return ParseDirectivePALMetadata();
5383 
5384   return true;
5385 }
5386 
5387 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5388                                            unsigned RegNo) {
5389 
5390   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5391     return isGFX9Plus();
5392 
5393   // GFX10 has 2 more SGPRs 104 and 105.
5394   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5395     return hasSGPR104_SGPR105();
5396 
5397   switch (RegNo) {
5398   case AMDGPU::SRC_SHARED_BASE:
5399   case AMDGPU::SRC_SHARED_LIMIT:
5400   case AMDGPU::SRC_PRIVATE_BASE:
5401   case AMDGPU::SRC_PRIVATE_LIMIT:
5402   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5403     return isGFX9Plus();
5404   case AMDGPU::TBA:
5405   case AMDGPU::TBA_LO:
5406   case AMDGPU::TBA_HI:
5407   case AMDGPU::TMA:
5408   case AMDGPU::TMA_LO:
5409   case AMDGPU::TMA_HI:
5410     return !isGFX9Plus();
5411   case AMDGPU::XNACK_MASK:
5412   case AMDGPU::XNACK_MASK_LO:
5413   case AMDGPU::XNACK_MASK_HI:
5414     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5415   case AMDGPU::SGPR_NULL:
5416     return isGFX10Plus();
5417   default:
5418     break;
5419   }
5420 
5421   if (isCI())
5422     return true;
5423 
5424   if (isSI() || isGFX10Plus()) {
5425     // No flat_scr on SI.
5426     // On GFX10 flat scratch is not a valid register operand and can only be
5427     // accessed with s_setreg/s_getreg.
5428     switch (RegNo) {
5429     case AMDGPU::FLAT_SCR:
5430     case AMDGPU::FLAT_SCR_LO:
5431     case AMDGPU::FLAT_SCR_HI:
5432       return false;
5433     default:
5434       return true;
5435     }
5436   }
5437 
5438   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5439   // SI/CI have.
5440   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5441     return hasSGPR102_SGPR103();
5442 
5443   return true;
5444 }
5445 
5446 OperandMatchResultTy
5447 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5448                               OperandMode Mode) {
5449   // Try to parse with a custom parser
5450   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5451 
5452   // If we successfully parsed the operand or if there as an error parsing,
5453   // we are done.
5454   //
5455   // If we are parsing after we reach EndOfStatement then this means we
5456   // are appending default values to the Operands list.  This is only done
5457   // by custom parser, so we shouldn't continue on to the generic parsing.
5458   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5459       isToken(AsmToken::EndOfStatement))
5460     return ResTy;
5461 
5462   SMLoc RBraceLoc;
5463   SMLoc LBraceLoc = getLoc();
5464   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5465     unsigned Prefix = Operands.size();
5466 
5467     for (;;) {
5468       auto Loc = getLoc();
5469       ResTy = parseReg(Operands);
5470       if (ResTy == MatchOperand_NoMatch)
5471         Error(Loc, "expected a register");
5472       if (ResTy != MatchOperand_Success)
5473         return MatchOperand_ParseFail;
5474 
5475       RBraceLoc = getLoc();
5476       if (trySkipToken(AsmToken::RBrac))
5477         break;
5478 
5479       if (!skipToken(AsmToken::Comma,
5480                      "expected a comma or a closing square bracket")) {
5481         return MatchOperand_ParseFail;
5482       }
5483     }
5484 
5485     if (Operands.size() - Prefix > 1) {
5486       Operands.insert(Operands.begin() + Prefix,
5487                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5488       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5489     }
5490 
5491     return MatchOperand_Success;
5492   }
5493 
5494   return parseRegOrImm(Operands);
5495 }
5496 
5497 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5498   // Clear any forced encodings from the previous instruction.
5499   setForcedEncodingSize(0);
5500   setForcedDPP(false);
5501   setForcedSDWA(false);
5502 
5503   if (Name.endswith("_e64")) {
5504     setForcedEncodingSize(64);
5505     return Name.substr(0, Name.size() - 4);
5506   } else if (Name.endswith("_e32")) {
5507     setForcedEncodingSize(32);
5508     return Name.substr(0, Name.size() - 4);
5509   } else if (Name.endswith("_dpp")) {
5510     setForcedDPP(true);
5511     return Name.substr(0, Name.size() - 4);
5512   } else if (Name.endswith("_sdwa")) {
5513     setForcedSDWA(true);
5514     return Name.substr(0, Name.size() - 5);
5515   }
5516   return Name;
5517 }
5518 
5519 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5520                                        StringRef Name,
5521                                        SMLoc NameLoc, OperandVector &Operands) {
5522   // Add the instruction mnemonic
5523   Name = parseMnemonicSuffix(Name);
5524   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5525 
5526   bool IsMIMG = Name.startswith("image_");
5527 
5528   while (!trySkipToken(AsmToken::EndOfStatement)) {
5529     OperandMode Mode = OperandMode_Default;
5530     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5531       Mode = OperandMode_NSA;
5532     CPolSeen = 0;
5533     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5534 
5535     if (Res != MatchOperand_Success) {
5536       checkUnsupportedInstruction(Name, NameLoc);
5537       if (!Parser.hasPendingError()) {
5538         // FIXME: use real operand location rather than the current location.
5539         StringRef Msg =
5540           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5541                                             "not a valid operand.";
5542         Error(getLoc(), Msg);
5543       }
5544       while (!trySkipToken(AsmToken::EndOfStatement)) {
5545         lex();
5546       }
5547       return true;
5548     }
5549 
5550     // Eat the comma or space if there is one.
5551     trySkipToken(AsmToken::Comma);
5552   }
5553 
5554   return false;
5555 }
5556 
5557 //===----------------------------------------------------------------------===//
5558 // Utility functions
5559 //===----------------------------------------------------------------------===//
5560 
5561 OperandMatchResultTy
5562 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5563 
5564   if (!trySkipId(Prefix, AsmToken::Colon))
5565     return MatchOperand_NoMatch;
5566 
5567   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5568 }
5569 
5570 OperandMatchResultTy
5571 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5572                                     AMDGPUOperand::ImmTy ImmTy,
5573                                     bool (*ConvertResult)(int64_t&)) {
5574   SMLoc S = getLoc();
5575   int64_t Value = 0;
5576 
5577   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5578   if (Res != MatchOperand_Success)
5579     return Res;
5580 
5581   if (ConvertResult && !ConvertResult(Value)) {
5582     Error(S, "invalid " + StringRef(Prefix) + " value.");
5583   }
5584 
5585   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5586   return MatchOperand_Success;
5587 }
5588 
5589 OperandMatchResultTy
5590 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5591                                              OperandVector &Operands,
5592                                              AMDGPUOperand::ImmTy ImmTy,
5593                                              bool (*ConvertResult)(int64_t&)) {
5594   SMLoc S = getLoc();
5595   if (!trySkipId(Prefix, AsmToken::Colon))
5596     return MatchOperand_NoMatch;
5597 
5598   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5599     return MatchOperand_ParseFail;
5600 
5601   unsigned Val = 0;
5602   const unsigned MaxSize = 4;
5603 
5604   // FIXME: How to verify the number of elements matches the number of src
5605   // operands?
5606   for (int I = 0; ; ++I) {
5607     int64_t Op;
5608     SMLoc Loc = getLoc();
5609     if (!parseExpr(Op))
5610       return MatchOperand_ParseFail;
5611 
5612     if (Op != 0 && Op != 1) {
5613       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5614       return MatchOperand_ParseFail;
5615     }
5616 
5617     Val |= (Op << I);
5618 
5619     if (trySkipToken(AsmToken::RBrac))
5620       break;
5621 
5622     if (I + 1 == MaxSize) {
5623       Error(getLoc(), "expected a closing square bracket");
5624       return MatchOperand_ParseFail;
5625     }
5626 
5627     if (!skipToken(AsmToken::Comma, "expected a comma"))
5628       return MatchOperand_ParseFail;
5629   }
5630 
5631   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5632   return MatchOperand_Success;
5633 }
5634 
5635 OperandMatchResultTy
5636 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5637                                AMDGPUOperand::ImmTy ImmTy) {
5638   int64_t Bit;
5639   SMLoc S = getLoc();
5640 
5641   if (trySkipId(Name)) {
5642     Bit = 1;
5643   } else if (trySkipId("no", Name)) {
5644     Bit = 0;
5645   } else {
5646     return MatchOperand_NoMatch;
5647   }
5648 
5649   if (Name == "r128" && !hasMIMG_R128()) {
5650     Error(S, "r128 modifier is not supported on this GPU");
5651     return MatchOperand_ParseFail;
5652   }
5653   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5654     Error(S, "a16 modifier is not supported on this GPU");
5655     return MatchOperand_ParseFail;
5656   }
5657 
5658   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5659     ImmTy = AMDGPUOperand::ImmTyR128A16;
5660 
5661   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5662   return MatchOperand_Success;
5663 }
5664 
5665 OperandMatchResultTy
5666 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5667   unsigned CPolOn = 0;
5668   unsigned CPolOff = 0;
5669   SMLoc S = getLoc();
5670 
5671   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5672   if (isGFX940() && !Mnemo.startswith("s_")) {
5673     if (trySkipId("sc0"))
5674       CPolOn = AMDGPU::CPol::SC0;
5675     else if (trySkipId("nosc0"))
5676       CPolOff = AMDGPU::CPol::SC0;
5677     else if (trySkipId("nt"))
5678       CPolOn = AMDGPU::CPol::NT;
5679     else if (trySkipId("nont"))
5680       CPolOff = AMDGPU::CPol::NT;
5681     else if (trySkipId("sc1"))
5682       CPolOn = AMDGPU::CPol::SC1;
5683     else if (trySkipId("nosc1"))
5684       CPolOff = AMDGPU::CPol::SC1;
5685     else
5686       return MatchOperand_NoMatch;
5687   }
5688   else if (trySkipId("glc"))
5689     CPolOn = AMDGPU::CPol::GLC;
5690   else if (trySkipId("noglc"))
5691     CPolOff = AMDGPU::CPol::GLC;
5692   else if (trySkipId("slc"))
5693     CPolOn = AMDGPU::CPol::SLC;
5694   else if (trySkipId("noslc"))
5695     CPolOff = AMDGPU::CPol::SLC;
5696   else if (trySkipId("dlc"))
5697     CPolOn = AMDGPU::CPol::DLC;
5698   else if (trySkipId("nodlc"))
5699     CPolOff = AMDGPU::CPol::DLC;
5700   else if (trySkipId("scc"))
5701     CPolOn = AMDGPU::CPol::SCC;
5702   else if (trySkipId("noscc"))
5703     CPolOff = AMDGPU::CPol::SCC;
5704   else
5705     return MatchOperand_NoMatch;
5706 
5707   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5708     Error(S, "dlc modifier is not supported on this GPU");
5709     return MatchOperand_ParseFail;
5710   }
5711 
5712   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5713     Error(S, "scc modifier is not supported on this GPU");
5714     return MatchOperand_ParseFail;
5715   }
5716 
5717   if (CPolSeen & (CPolOn | CPolOff)) {
5718     Error(S, "duplicate cache policy modifier");
5719     return MatchOperand_ParseFail;
5720   }
5721 
5722   CPolSeen |= (CPolOn | CPolOff);
5723 
5724   for (unsigned I = 1; I != Operands.size(); ++I) {
5725     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5726     if (Op.isCPol()) {
5727       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5728       return MatchOperand_Success;
5729     }
5730   }
5731 
5732   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5733                                               AMDGPUOperand::ImmTyCPol));
5734 
5735   return MatchOperand_Success;
5736 }
5737 
5738 static void addOptionalImmOperand(
5739   MCInst& Inst, const OperandVector& Operands,
5740   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5741   AMDGPUOperand::ImmTy ImmT,
5742   int64_t Default = 0) {
5743   auto i = OptionalIdx.find(ImmT);
5744   if (i != OptionalIdx.end()) {
5745     unsigned Idx = i->second;
5746     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5747   } else {
5748     Inst.addOperand(MCOperand::createImm(Default));
5749   }
5750 }
5751 
5752 OperandMatchResultTy
5753 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5754                                        StringRef &Value,
5755                                        SMLoc &StringLoc) {
5756   if (!trySkipId(Prefix, AsmToken::Colon))
5757     return MatchOperand_NoMatch;
5758 
5759   StringLoc = getLoc();
5760   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5761                                                   : MatchOperand_ParseFail;
5762 }
5763 
5764 //===----------------------------------------------------------------------===//
5765 // MTBUF format
5766 //===----------------------------------------------------------------------===//
5767 
5768 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5769                                   int64_t MaxVal,
5770                                   int64_t &Fmt) {
5771   int64_t Val;
5772   SMLoc Loc = getLoc();
5773 
5774   auto Res = parseIntWithPrefix(Pref, Val);
5775   if (Res == MatchOperand_ParseFail)
5776     return false;
5777   if (Res == MatchOperand_NoMatch)
5778     return true;
5779 
5780   if (Val < 0 || Val > MaxVal) {
5781     Error(Loc, Twine("out of range ", StringRef(Pref)));
5782     return false;
5783   }
5784 
5785   Fmt = Val;
5786   return true;
5787 }
5788 
5789 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5790 // values to live in a joint format operand in the MCInst encoding.
5791 OperandMatchResultTy
5792 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5793   using namespace llvm::AMDGPU::MTBUFFormat;
5794 
5795   int64_t Dfmt = DFMT_UNDEF;
5796   int64_t Nfmt = NFMT_UNDEF;
5797 
5798   // dfmt and nfmt can appear in either order, and each is optional.
5799   for (int I = 0; I < 2; ++I) {
5800     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5801       return MatchOperand_ParseFail;
5802 
5803     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5804       return MatchOperand_ParseFail;
5805     }
5806     // Skip optional comma between dfmt/nfmt
5807     // but guard against 2 commas following each other.
5808     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5809         !peekToken().is(AsmToken::Comma)) {
5810       trySkipToken(AsmToken::Comma);
5811     }
5812   }
5813 
5814   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5815     return MatchOperand_NoMatch;
5816 
5817   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5818   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5819 
5820   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5821   return MatchOperand_Success;
5822 }
5823 
5824 OperandMatchResultTy
5825 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5826   using namespace llvm::AMDGPU::MTBUFFormat;
5827 
5828   int64_t Fmt = UFMT_UNDEF;
5829 
5830   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5831     return MatchOperand_ParseFail;
5832 
5833   if (Fmt == UFMT_UNDEF)
5834     return MatchOperand_NoMatch;
5835 
5836   Format = Fmt;
5837   return MatchOperand_Success;
5838 }
5839 
5840 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5841                                     int64_t &Nfmt,
5842                                     StringRef FormatStr,
5843                                     SMLoc Loc) {
5844   using namespace llvm::AMDGPU::MTBUFFormat;
5845   int64_t Format;
5846 
5847   Format = getDfmt(FormatStr);
5848   if (Format != DFMT_UNDEF) {
5849     Dfmt = Format;
5850     return true;
5851   }
5852 
5853   Format = getNfmt(FormatStr, getSTI());
5854   if (Format != NFMT_UNDEF) {
5855     Nfmt = Format;
5856     return true;
5857   }
5858 
5859   Error(Loc, "unsupported format");
5860   return false;
5861 }
5862 
5863 OperandMatchResultTy
5864 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5865                                           SMLoc FormatLoc,
5866                                           int64_t &Format) {
5867   using namespace llvm::AMDGPU::MTBUFFormat;
5868 
5869   int64_t Dfmt = DFMT_UNDEF;
5870   int64_t Nfmt = NFMT_UNDEF;
5871   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5872     return MatchOperand_ParseFail;
5873 
5874   if (trySkipToken(AsmToken::Comma)) {
5875     StringRef Str;
5876     SMLoc Loc = getLoc();
5877     if (!parseId(Str, "expected a format string") ||
5878         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5879       return MatchOperand_ParseFail;
5880     }
5881     if (Dfmt == DFMT_UNDEF) {
5882       Error(Loc, "duplicate numeric format");
5883       return MatchOperand_ParseFail;
5884     } else if (Nfmt == NFMT_UNDEF) {
5885       Error(Loc, "duplicate data format");
5886       return MatchOperand_ParseFail;
5887     }
5888   }
5889 
5890   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5891   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5892 
5893   if (isGFX10Plus()) {
5894     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5895     if (Ufmt == UFMT_UNDEF) {
5896       Error(FormatLoc, "unsupported format");
5897       return MatchOperand_ParseFail;
5898     }
5899     Format = Ufmt;
5900   } else {
5901     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5902   }
5903 
5904   return MatchOperand_Success;
5905 }
5906 
5907 OperandMatchResultTy
5908 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5909                                             SMLoc Loc,
5910                                             int64_t &Format) {
5911   using namespace llvm::AMDGPU::MTBUFFormat;
5912 
5913   auto Id = getUnifiedFormat(FormatStr);
5914   if (Id == UFMT_UNDEF)
5915     return MatchOperand_NoMatch;
5916 
5917   if (!isGFX10Plus()) {
5918     Error(Loc, "unified format is not supported on this GPU");
5919     return MatchOperand_ParseFail;
5920   }
5921 
5922   Format = Id;
5923   return MatchOperand_Success;
5924 }
5925 
5926 OperandMatchResultTy
5927 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5928   using namespace llvm::AMDGPU::MTBUFFormat;
5929   SMLoc Loc = getLoc();
5930 
5931   if (!parseExpr(Format))
5932     return MatchOperand_ParseFail;
5933   if (!isValidFormatEncoding(Format, getSTI())) {
5934     Error(Loc, "out of range format");
5935     return MatchOperand_ParseFail;
5936   }
5937 
5938   return MatchOperand_Success;
5939 }
5940 
5941 OperandMatchResultTy
5942 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5943   using namespace llvm::AMDGPU::MTBUFFormat;
5944 
5945   if (!trySkipId("format", AsmToken::Colon))
5946     return MatchOperand_NoMatch;
5947 
5948   if (trySkipToken(AsmToken::LBrac)) {
5949     StringRef FormatStr;
5950     SMLoc Loc = getLoc();
5951     if (!parseId(FormatStr, "expected a format string"))
5952       return MatchOperand_ParseFail;
5953 
5954     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5955     if (Res == MatchOperand_NoMatch)
5956       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5957     if (Res != MatchOperand_Success)
5958       return Res;
5959 
5960     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5961       return MatchOperand_ParseFail;
5962 
5963     return MatchOperand_Success;
5964   }
5965 
5966   return parseNumericFormat(Format);
5967 }
5968 
5969 OperandMatchResultTy
5970 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5971   using namespace llvm::AMDGPU::MTBUFFormat;
5972 
5973   int64_t Format = getDefaultFormatEncoding(getSTI());
5974   OperandMatchResultTy Res;
5975   SMLoc Loc = getLoc();
5976 
5977   // Parse legacy format syntax.
5978   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5979   if (Res == MatchOperand_ParseFail)
5980     return Res;
5981 
5982   bool FormatFound = (Res == MatchOperand_Success);
5983 
5984   Operands.push_back(
5985     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5986 
5987   if (FormatFound)
5988     trySkipToken(AsmToken::Comma);
5989 
5990   if (isToken(AsmToken::EndOfStatement)) {
5991     // We are expecting an soffset operand,
5992     // but let matcher handle the error.
5993     return MatchOperand_Success;
5994   }
5995 
5996   // Parse soffset.
5997   Res = parseRegOrImm(Operands);
5998   if (Res != MatchOperand_Success)
5999     return Res;
6000 
6001   trySkipToken(AsmToken::Comma);
6002 
6003   if (!FormatFound) {
6004     Res = parseSymbolicOrNumericFormat(Format);
6005     if (Res == MatchOperand_ParseFail)
6006       return Res;
6007     if (Res == MatchOperand_Success) {
6008       auto Size = Operands.size();
6009       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6010       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6011       Op.setImm(Format);
6012     }
6013     return MatchOperand_Success;
6014   }
6015 
6016   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6017     Error(getLoc(), "duplicate format");
6018     return MatchOperand_ParseFail;
6019   }
6020   return MatchOperand_Success;
6021 }
6022 
6023 //===----------------------------------------------------------------------===//
6024 // ds
6025 //===----------------------------------------------------------------------===//
6026 
6027 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6028                                     const OperandVector &Operands) {
6029   OptionalImmIndexMap OptionalIdx;
6030 
6031   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6032     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6033 
6034     // Add the register arguments
6035     if (Op.isReg()) {
6036       Op.addRegOperands(Inst, 1);
6037       continue;
6038     }
6039 
6040     // Handle optional arguments
6041     OptionalIdx[Op.getImmTy()] = i;
6042   }
6043 
6044   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6045   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6046   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6047 
6048   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6049 }
6050 
6051 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6052                                 bool IsGdsHardcoded) {
6053   OptionalImmIndexMap OptionalIdx;
6054 
6055   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6056     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6057 
6058     // Add the register arguments
6059     if (Op.isReg()) {
6060       Op.addRegOperands(Inst, 1);
6061       continue;
6062     }
6063 
6064     if (Op.isToken() && Op.getToken() == "gds") {
6065       IsGdsHardcoded = true;
6066       continue;
6067     }
6068 
6069     // Handle optional arguments
6070     OptionalIdx[Op.getImmTy()] = i;
6071   }
6072 
6073   AMDGPUOperand::ImmTy OffsetType =
6074     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6075      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6076      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6077                                                       AMDGPUOperand::ImmTyOffset;
6078 
6079   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6080 
6081   if (!IsGdsHardcoded) {
6082     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6083   }
6084   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6085 }
6086 
6087 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6088   OptionalImmIndexMap OptionalIdx;
6089 
6090   unsigned OperandIdx[4];
6091   unsigned EnMask = 0;
6092   int SrcIdx = 0;
6093 
6094   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6095     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6096 
6097     // Add the register arguments
6098     if (Op.isReg()) {
6099       assert(SrcIdx < 4);
6100       OperandIdx[SrcIdx] = Inst.size();
6101       Op.addRegOperands(Inst, 1);
6102       ++SrcIdx;
6103       continue;
6104     }
6105 
6106     if (Op.isOff()) {
6107       assert(SrcIdx < 4);
6108       OperandIdx[SrcIdx] = Inst.size();
6109       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6110       ++SrcIdx;
6111       continue;
6112     }
6113 
6114     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6115       Op.addImmOperands(Inst, 1);
6116       continue;
6117     }
6118 
6119     if (Op.isToken() && Op.getToken() == "done")
6120       continue;
6121 
6122     // Handle optional arguments
6123     OptionalIdx[Op.getImmTy()] = i;
6124   }
6125 
6126   assert(SrcIdx == 4);
6127 
6128   bool Compr = false;
6129   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6130     Compr = true;
6131     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6132     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6133     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6134   }
6135 
6136   for (auto i = 0; i < SrcIdx; ++i) {
6137     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6138       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6139     }
6140   }
6141 
6142   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6143   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6144 
6145   Inst.addOperand(MCOperand::createImm(EnMask));
6146 }
6147 
6148 //===----------------------------------------------------------------------===//
6149 // s_waitcnt
6150 //===----------------------------------------------------------------------===//
6151 
6152 static bool
6153 encodeCnt(
6154   const AMDGPU::IsaVersion ISA,
6155   int64_t &IntVal,
6156   int64_t CntVal,
6157   bool Saturate,
6158   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6159   unsigned (*decode)(const IsaVersion &Version, unsigned))
6160 {
6161   bool Failed = false;
6162 
6163   IntVal = encode(ISA, IntVal, CntVal);
6164   if (CntVal != decode(ISA, IntVal)) {
6165     if (Saturate) {
6166       IntVal = encode(ISA, IntVal, -1);
6167     } else {
6168       Failed = true;
6169     }
6170   }
6171   return Failed;
6172 }
6173 
6174 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6175 
6176   SMLoc CntLoc = getLoc();
6177   StringRef CntName = getTokenStr();
6178 
6179   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6180       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6181     return false;
6182 
6183   int64_t CntVal;
6184   SMLoc ValLoc = getLoc();
6185   if (!parseExpr(CntVal))
6186     return false;
6187 
6188   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6189 
6190   bool Failed = true;
6191   bool Sat = CntName.endswith("_sat");
6192 
6193   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6194     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6195   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6196     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6197   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6198     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6199   } else {
6200     Error(CntLoc, "invalid counter name " + CntName);
6201     return false;
6202   }
6203 
6204   if (Failed) {
6205     Error(ValLoc, "too large value for " + CntName);
6206     return false;
6207   }
6208 
6209   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6210     return false;
6211 
6212   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6213     if (isToken(AsmToken::EndOfStatement)) {
6214       Error(getLoc(), "expected a counter name");
6215       return false;
6216     }
6217   }
6218 
6219   return true;
6220 }
6221 
6222 OperandMatchResultTy
6223 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6224   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6225   int64_t Waitcnt = getWaitcntBitMask(ISA);
6226   SMLoc S = getLoc();
6227 
6228   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6229     while (!isToken(AsmToken::EndOfStatement)) {
6230       if (!parseCnt(Waitcnt))
6231         return MatchOperand_ParseFail;
6232     }
6233   } else {
6234     if (!parseExpr(Waitcnt))
6235       return MatchOperand_ParseFail;
6236   }
6237 
6238   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6239   return MatchOperand_Success;
6240 }
6241 
6242 bool
6243 AMDGPUOperand::isSWaitCnt() const {
6244   return isImm();
6245 }
6246 
6247 //===----------------------------------------------------------------------===//
6248 // hwreg
6249 //===----------------------------------------------------------------------===//
6250 
6251 bool
6252 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6253                                 OperandInfoTy &Offset,
6254                                 OperandInfoTy &Width) {
6255   using namespace llvm::AMDGPU::Hwreg;
6256 
6257   // The register may be specified by name or using a numeric code
6258   HwReg.Loc = getLoc();
6259   if (isToken(AsmToken::Identifier) &&
6260       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6261     HwReg.IsSymbolic = true;
6262     lex(); // skip register name
6263   } else if (!parseExpr(HwReg.Id, "a register name")) {
6264     return false;
6265   }
6266 
6267   if (trySkipToken(AsmToken::RParen))
6268     return true;
6269 
6270   // parse optional params
6271   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6272     return false;
6273 
6274   Offset.Loc = getLoc();
6275   if (!parseExpr(Offset.Id))
6276     return false;
6277 
6278   if (!skipToken(AsmToken::Comma, "expected a comma"))
6279     return false;
6280 
6281   Width.Loc = getLoc();
6282   return parseExpr(Width.Id) &&
6283          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6284 }
6285 
6286 bool
6287 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6288                                const OperandInfoTy &Offset,
6289                                const OperandInfoTy &Width) {
6290 
6291   using namespace llvm::AMDGPU::Hwreg;
6292 
6293   if (HwReg.IsSymbolic) {
6294     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6295       Error(HwReg.Loc,
6296             "specified hardware register is not supported on this GPU");
6297       return false;
6298     }
6299   } else {
6300     if (!isValidHwreg(HwReg.Id)) {
6301       Error(HwReg.Loc,
6302             "invalid code of hardware register: only 6-bit values are legal");
6303       return false;
6304     }
6305   }
6306   if (!isValidHwregOffset(Offset.Id)) {
6307     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6308     return false;
6309   }
6310   if (!isValidHwregWidth(Width.Id)) {
6311     Error(Width.Loc,
6312           "invalid bitfield width: only values from 1 to 32 are legal");
6313     return false;
6314   }
6315   return true;
6316 }
6317 
6318 OperandMatchResultTy
6319 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6320   using namespace llvm::AMDGPU::Hwreg;
6321 
6322   int64_t ImmVal = 0;
6323   SMLoc Loc = getLoc();
6324 
6325   if (trySkipId("hwreg", AsmToken::LParen)) {
6326     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6327     OperandInfoTy Offset(OFFSET_DEFAULT_);
6328     OperandInfoTy Width(WIDTH_DEFAULT_);
6329     if (parseHwregBody(HwReg, Offset, Width) &&
6330         validateHwreg(HwReg, Offset, Width)) {
6331       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6332     } else {
6333       return MatchOperand_ParseFail;
6334     }
6335   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6336     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6337       Error(Loc, "invalid immediate: only 16-bit values are legal");
6338       return MatchOperand_ParseFail;
6339     }
6340   } else {
6341     return MatchOperand_ParseFail;
6342   }
6343 
6344   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6345   return MatchOperand_Success;
6346 }
6347 
6348 bool AMDGPUOperand::isHwreg() const {
6349   return isImmTy(ImmTyHwreg);
6350 }
6351 
6352 //===----------------------------------------------------------------------===//
6353 // sendmsg
6354 //===----------------------------------------------------------------------===//
6355 
6356 bool
6357 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6358                                   OperandInfoTy &Op,
6359                                   OperandInfoTy &Stream) {
6360   using namespace llvm::AMDGPU::SendMsg;
6361 
6362   Msg.Loc = getLoc();
6363   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6364     Msg.IsSymbolic = true;
6365     lex(); // skip message name
6366   } else if (!parseExpr(Msg.Id, "a message name")) {
6367     return false;
6368   }
6369 
6370   if (trySkipToken(AsmToken::Comma)) {
6371     Op.IsDefined = true;
6372     Op.Loc = getLoc();
6373     if (isToken(AsmToken::Identifier) &&
6374         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6375       lex(); // skip operation name
6376     } else if (!parseExpr(Op.Id, "an operation name")) {
6377       return false;
6378     }
6379 
6380     if (trySkipToken(AsmToken::Comma)) {
6381       Stream.IsDefined = true;
6382       Stream.Loc = getLoc();
6383       if (!parseExpr(Stream.Id))
6384         return false;
6385     }
6386   }
6387 
6388   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6389 }
6390 
6391 bool
6392 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6393                                  const OperandInfoTy &Op,
6394                                  const OperandInfoTy &Stream) {
6395   using namespace llvm::AMDGPU::SendMsg;
6396 
6397   // Validation strictness depends on whether message is specified
6398   // in a symbolic or in a numeric form. In the latter case
6399   // only encoding possibility is checked.
6400   bool Strict = Msg.IsSymbolic;
6401 
6402   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6403     Error(Msg.Loc, "invalid message id");
6404     return false;
6405   }
6406   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6407     if (Op.IsDefined) {
6408       Error(Op.Loc, "message does not support operations");
6409     } else {
6410       Error(Msg.Loc, "missing message operation");
6411     }
6412     return false;
6413   }
6414   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6415     Error(Op.Loc, "invalid operation id");
6416     return false;
6417   }
6418   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6419     Error(Stream.Loc, "message operation does not support streams");
6420     return false;
6421   }
6422   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6423     Error(Stream.Loc, "invalid message stream id");
6424     return false;
6425   }
6426   return true;
6427 }
6428 
6429 OperandMatchResultTy
6430 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6431   using namespace llvm::AMDGPU::SendMsg;
6432 
6433   int64_t ImmVal = 0;
6434   SMLoc Loc = getLoc();
6435 
6436   if (trySkipId("sendmsg", AsmToken::LParen)) {
6437     OperandInfoTy Msg(ID_UNKNOWN_);
6438     OperandInfoTy Op(OP_NONE_);
6439     OperandInfoTy Stream(STREAM_ID_NONE_);
6440     if (parseSendMsgBody(Msg, Op, Stream) &&
6441         validateSendMsg(Msg, Op, Stream)) {
6442       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6443     } else {
6444       return MatchOperand_ParseFail;
6445     }
6446   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6447     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6448       Error(Loc, "invalid immediate: only 16-bit values are legal");
6449       return MatchOperand_ParseFail;
6450     }
6451   } else {
6452     return MatchOperand_ParseFail;
6453   }
6454 
6455   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6456   return MatchOperand_Success;
6457 }
6458 
6459 bool AMDGPUOperand::isSendMsg() const {
6460   return isImmTy(ImmTySendMsg);
6461 }
6462 
6463 //===----------------------------------------------------------------------===//
6464 // v_interp
6465 //===----------------------------------------------------------------------===//
6466 
6467 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6468   StringRef Str;
6469   SMLoc S = getLoc();
6470 
6471   if (!parseId(Str))
6472     return MatchOperand_NoMatch;
6473 
6474   int Slot = StringSwitch<int>(Str)
6475     .Case("p10", 0)
6476     .Case("p20", 1)
6477     .Case("p0", 2)
6478     .Default(-1);
6479 
6480   if (Slot == -1) {
6481     Error(S, "invalid interpolation slot");
6482     return MatchOperand_ParseFail;
6483   }
6484 
6485   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6486                                               AMDGPUOperand::ImmTyInterpSlot));
6487   return MatchOperand_Success;
6488 }
6489 
6490 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6491   StringRef Str;
6492   SMLoc S = getLoc();
6493 
6494   if (!parseId(Str))
6495     return MatchOperand_NoMatch;
6496 
6497   if (!Str.startswith("attr")) {
6498     Error(S, "invalid interpolation attribute");
6499     return MatchOperand_ParseFail;
6500   }
6501 
6502   StringRef Chan = Str.take_back(2);
6503   int AttrChan = StringSwitch<int>(Chan)
6504     .Case(".x", 0)
6505     .Case(".y", 1)
6506     .Case(".z", 2)
6507     .Case(".w", 3)
6508     .Default(-1);
6509   if (AttrChan == -1) {
6510     Error(S, "invalid or missing interpolation attribute channel");
6511     return MatchOperand_ParseFail;
6512   }
6513 
6514   Str = Str.drop_back(2).drop_front(4);
6515 
6516   uint8_t Attr;
6517   if (Str.getAsInteger(10, Attr)) {
6518     Error(S, "invalid or missing interpolation attribute number");
6519     return MatchOperand_ParseFail;
6520   }
6521 
6522   if (Attr > 63) {
6523     Error(S, "out of bounds interpolation attribute number");
6524     return MatchOperand_ParseFail;
6525   }
6526 
6527   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6528 
6529   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6530                                               AMDGPUOperand::ImmTyInterpAttr));
6531   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6532                                               AMDGPUOperand::ImmTyAttrChan));
6533   return MatchOperand_Success;
6534 }
6535 
6536 //===----------------------------------------------------------------------===//
6537 // exp
6538 //===----------------------------------------------------------------------===//
6539 
6540 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6541   using namespace llvm::AMDGPU::Exp;
6542 
6543   StringRef Str;
6544   SMLoc S = getLoc();
6545 
6546   if (!parseId(Str))
6547     return MatchOperand_NoMatch;
6548 
6549   unsigned Id = getTgtId(Str);
6550   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6551     Error(S, (Id == ET_INVALID) ?
6552                 "invalid exp target" :
6553                 "exp target is not supported on this GPU");
6554     return MatchOperand_ParseFail;
6555   }
6556 
6557   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6558                                               AMDGPUOperand::ImmTyExpTgt));
6559   return MatchOperand_Success;
6560 }
6561 
6562 //===----------------------------------------------------------------------===//
6563 // parser helpers
6564 //===----------------------------------------------------------------------===//
6565 
6566 bool
6567 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6568   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6569 }
6570 
6571 bool
6572 AMDGPUAsmParser::isId(const StringRef Id) const {
6573   return isId(getToken(), Id);
6574 }
6575 
6576 bool
6577 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6578   return getTokenKind() == Kind;
6579 }
6580 
6581 bool
6582 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6583   if (isId(Id)) {
6584     lex();
6585     return true;
6586   }
6587   return false;
6588 }
6589 
6590 bool
6591 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6592   if (isToken(AsmToken::Identifier)) {
6593     StringRef Tok = getTokenStr();
6594     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6595       lex();
6596       return true;
6597     }
6598   }
6599   return false;
6600 }
6601 
6602 bool
6603 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6604   if (isId(Id) && peekToken().is(Kind)) {
6605     lex();
6606     lex();
6607     return true;
6608   }
6609   return false;
6610 }
6611 
6612 bool
6613 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6614   if (isToken(Kind)) {
6615     lex();
6616     return true;
6617   }
6618   return false;
6619 }
6620 
6621 bool
6622 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6623                            const StringRef ErrMsg) {
6624   if (!trySkipToken(Kind)) {
6625     Error(getLoc(), ErrMsg);
6626     return false;
6627   }
6628   return true;
6629 }
6630 
6631 bool
6632 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6633   SMLoc S = getLoc();
6634 
6635   const MCExpr *Expr;
6636   if (Parser.parseExpression(Expr))
6637     return false;
6638 
6639   if (Expr->evaluateAsAbsolute(Imm))
6640     return true;
6641 
6642   if (Expected.empty()) {
6643     Error(S, "expected absolute expression");
6644   } else {
6645     Error(S, Twine("expected ", Expected) +
6646              Twine(" or an absolute expression"));
6647   }
6648   return false;
6649 }
6650 
6651 bool
6652 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6653   SMLoc S = getLoc();
6654 
6655   const MCExpr *Expr;
6656   if (Parser.parseExpression(Expr))
6657     return false;
6658 
6659   int64_t IntVal;
6660   if (Expr->evaluateAsAbsolute(IntVal)) {
6661     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6662   } else {
6663     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6664   }
6665   return true;
6666 }
6667 
6668 bool
6669 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6670   if (isToken(AsmToken::String)) {
6671     Val = getToken().getStringContents();
6672     lex();
6673     return true;
6674   } else {
6675     Error(getLoc(), ErrMsg);
6676     return false;
6677   }
6678 }
6679 
6680 bool
6681 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6682   if (isToken(AsmToken::Identifier)) {
6683     Val = getTokenStr();
6684     lex();
6685     return true;
6686   } else {
6687     if (!ErrMsg.empty())
6688       Error(getLoc(), ErrMsg);
6689     return false;
6690   }
6691 }
6692 
6693 AsmToken
6694 AMDGPUAsmParser::getToken() const {
6695   return Parser.getTok();
6696 }
6697 
6698 AsmToken
6699 AMDGPUAsmParser::peekToken() {
6700   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6701 }
6702 
6703 void
6704 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6705   auto TokCount = getLexer().peekTokens(Tokens);
6706 
6707   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6708     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6709 }
6710 
6711 AsmToken::TokenKind
6712 AMDGPUAsmParser::getTokenKind() const {
6713   return getLexer().getKind();
6714 }
6715 
6716 SMLoc
6717 AMDGPUAsmParser::getLoc() const {
6718   return getToken().getLoc();
6719 }
6720 
6721 StringRef
6722 AMDGPUAsmParser::getTokenStr() const {
6723   return getToken().getString();
6724 }
6725 
6726 void
6727 AMDGPUAsmParser::lex() {
6728   Parser.Lex();
6729 }
6730 
6731 SMLoc
6732 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6733                                const OperandVector &Operands) const {
6734   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6735     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6736     if (Test(Op))
6737       return Op.getStartLoc();
6738   }
6739   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6740 }
6741 
6742 SMLoc
6743 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6744                            const OperandVector &Operands) const {
6745   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6746   return getOperandLoc(Test, Operands);
6747 }
6748 
6749 SMLoc
6750 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6751                            const OperandVector &Operands) const {
6752   auto Test = [=](const AMDGPUOperand& Op) {
6753     return Op.isRegKind() && Op.getReg() == Reg;
6754   };
6755   return getOperandLoc(Test, Operands);
6756 }
6757 
6758 SMLoc
6759 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6760   auto Test = [](const AMDGPUOperand& Op) {
6761     return Op.IsImmKindLiteral() || Op.isExpr();
6762   };
6763   return getOperandLoc(Test, Operands);
6764 }
6765 
6766 SMLoc
6767 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6768   auto Test = [](const AMDGPUOperand& Op) {
6769     return Op.isImmKindConst();
6770   };
6771   return getOperandLoc(Test, Operands);
6772 }
6773 
6774 //===----------------------------------------------------------------------===//
6775 // swizzle
6776 //===----------------------------------------------------------------------===//
6777 
6778 LLVM_READNONE
6779 static unsigned
6780 encodeBitmaskPerm(const unsigned AndMask,
6781                   const unsigned OrMask,
6782                   const unsigned XorMask) {
6783   using namespace llvm::AMDGPU::Swizzle;
6784 
6785   return BITMASK_PERM_ENC |
6786          (AndMask << BITMASK_AND_SHIFT) |
6787          (OrMask  << BITMASK_OR_SHIFT)  |
6788          (XorMask << BITMASK_XOR_SHIFT);
6789 }
6790 
6791 bool
6792 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6793                                      const unsigned MinVal,
6794                                      const unsigned MaxVal,
6795                                      const StringRef ErrMsg,
6796                                      SMLoc &Loc) {
6797   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6798     return false;
6799   }
6800   Loc = getLoc();
6801   if (!parseExpr(Op)) {
6802     return false;
6803   }
6804   if (Op < MinVal || Op > MaxVal) {
6805     Error(Loc, ErrMsg);
6806     return false;
6807   }
6808 
6809   return true;
6810 }
6811 
6812 bool
6813 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6814                                       const unsigned MinVal,
6815                                       const unsigned MaxVal,
6816                                       const StringRef ErrMsg) {
6817   SMLoc Loc;
6818   for (unsigned i = 0; i < OpNum; ++i) {
6819     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6820       return false;
6821   }
6822 
6823   return true;
6824 }
6825 
6826 bool
6827 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6828   using namespace llvm::AMDGPU::Swizzle;
6829 
6830   int64_t Lane[LANE_NUM];
6831   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6832                            "expected a 2-bit lane id")) {
6833     Imm = QUAD_PERM_ENC;
6834     for (unsigned I = 0; I < LANE_NUM; ++I) {
6835       Imm |= Lane[I] << (LANE_SHIFT * I);
6836     }
6837     return true;
6838   }
6839   return false;
6840 }
6841 
6842 bool
6843 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6844   using namespace llvm::AMDGPU::Swizzle;
6845 
6846   SMLoc Loc;
6847   int64_t GroupSize;
6848   int64_t LaneIdx;
6849 
6850   if (!parseSwizzleOperand(GroupSize,
6851                            2, 32,
6852                            "group size must be in the interval [2,32]",
6853                            Loc)) {
6854     return false;
6855   }
6856   if (!isPowerOf2_64(GroupSize)) {
6857     Error(Loc, "group size must be a power of two");
6858     return false;
6859   }
6860   if (parseSwizzleOperand(LaneIdx,
6861                           0, GroupSize - 1,
6862                           "lane id must be in the interval [0,group size - 1]",
6863                           Loc)) {
6864     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6865     return true;
6866   }
6867   return false;
6868 }
6869 
6870 bool
6871 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6872   using namespace llvm::AMDGPU::Swizzle;
6873 
6874   SMLoc Loc;
6875   int64_t GroupSize;
6876 
6877   if (!parseSwizzleOperand(GroupSize,
6878                            2, 32,
6879                            "group size must be in the interval [2,32]",
6880                            Loc)) {
6881     return false;
6882   }
6883   if (!isPowerOf2_64(GroupSize)) {
6884     Error(Loc, "group size must be a power of two");
6885     return false;
6886   }
6887 
6888   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6889   return true;
6890 }
6891 
6892 bool
6893 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6894   using namespace llvm::AMDGPU::Swizzle;
6895 
6896   SMLoc Loc;
6897   int64_t GroupSize;
6898 
6899   if (!parseSwizzleOperand(GroupSize,
6900                            1, 16,
6901                            "group size must be in the interval [1,16]",
6902                            Loc)) {
6903     return false;
6904   }
6905   if (!isPowerOf2_64(GroupSize)) {
6906     Error(Loc, "group size must be a power of two");
6907     return false;
6908   }
6909 
6910   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6911   return true;
6912 }
6913 
6914 bool
6915 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6916   using namespace llvm::AMDGPU::Swizzle;
6917 
6918   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6919     return false;
6920   }
6921 
6922   StringRef Ctl;
6923   SMLoc StrLoc = getLoc();
6924   if (!parseString(Ctl)) {
6925     return false;
6926   }
6927   if (Ctl.size() != BITMASK_WIDTH) {
6928     Error(StrLoc, "expected a 5-character mask");
6929     return false;
6930   }
6931 
6932   unsigned AndMask = 0;
6933   unsigned OrMask = 0;
6934   unsigned XorMask = 0;
6935 
6936   for (size_t i = 0; i < Ctl.size(); ++i) {
6937     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6938     switch(Ctl[i]) {
6939     default:
6940       Error(StrLoc, "invalid mask");
6941       return false;
6942     case '0':
6943       break;
6944     case '1':
6945       OrMask |= Mask;
6946       break;
6947     case 'p':
6948       AndMask |= Mask;
6949       break;
6950     case 'i':
6951       AndMask |= Mask;
6952       XorMask |= Mask;
6953       break;
6954     }
6955   }
6956 
6957   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6958   return true;
6959 }
6960 
6961 bool
6962 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6963 
6964   SMLoc OffsetLoc = getLoc();
6965 
6966   if (!parseExpr(Imm, "a swizzle macro")) {
6967     return false;
6968   }
6969   if (!isUInt<16>(Imm)) {
6970     Error(OffsetLoc, "expected a 16-bit offset");
6971     return false;
6972   }
6973   return true;
6974 }
6975 
6976 bool
6977 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6978   using namespace llvm::AMDGPU::Swizzle;
6979 
6980   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6981 
6982     SMLoc ModeLoc = getLoc();
6983     bool Ok = false;
6984 
6985     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6986       Ok = parseSwizzleQuadPerm(Imm);
6987     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6988       Ok = parseSwizzleBitmaskPerm(Imm);
6989     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6990       Ok = parseSwizzleBroadcast(Imm);
6991     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6992       Ok = parseSwizzleSwap(Imm);
6993     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6994       Ok = parseSwizzleReverse(Imm);
6995     } else {
6996       Error(ModeLoc, "expected a swizzle mode");
6997     }
6998 
6999     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7000   }
7001 
7002   return false;
7003 }
7004 
7005 OperandMatchResultTy
7006 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7007   SMLoc S = getLoc();
7008   int64_t Imm = 0;
7009 
7010   if (trySkipId("offset")) {
7011 
7012     bool Ok = false;
7013     if (skipToken(AsmToken::Colon, "expected a colon")) {
7014       if (trySkipId("swizzle")) {
7015         Ok = parseSwizzleMacro(Imm);
7016       } else {
7017         Ok = parseSwizzleOffset(Imm);
7018       }
7019     }
7020 
7021     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7022 
7023     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7024   } else {
7025     // Swizzle "offset" operand is optional.
7026     // If it is omitted, try parsing other optional operands.
7027     return parseOptionalOpr(Operands);
7028   }
7029 }
7030 
7031 bool
7032 AMDGPUOperand::isSwizzle() const {
7033   return isImmTy(ImmTySwizzle);
7034 }
7035 
7036 //===----------------------------------------------------------------------===//
7037 // VGPR Index Mode
7038 //===----------------------------------------------------------------------===//
7039 
7040 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7041 
7042   using namespace llvm::AMDGPU::VGPRIndexMode;
7043 
7044   if (trySkipToken(AsmToken::RParen)) {
7045     return OFF;
7046   }
7047 
7048   int64_t Imm = 0;
7049 
7050   while (true) {
7051     unsigned Mode = 0;
7052     SMLoc S = getLoc();
7053 
7054     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7055       if (trySkipId(IdSymbolic[ModeId])) {
7056         Mode = 1 << ModeId;
7057         break;
7058       }
7059     }
7060 
7061     if (Mode == 0) {
7062       Error(S, (Imm == 0)?
7063                "expected a VGPR index mode or a closing parenthesis" :
7064                "expected a VGPR index mode");
7065       return UNDEF;
7066     }
7067 
7068     if (Imm & Mode) {
7069       Error(S, "duplicate VGPR index mode");
7070       return UNDEF;
7071     }
7072     Imm |= Mode;
7073 
7074     if (trySkipToken(AsmToken::RParen))
7075       break;
7076     if (!skipToken(AsmToken::Comma,
7077                    "expected a comma or a closing parenthesis"))
7078       return UNDEF;
7079   }
7080 
7081   return Imm;
7082 }
7083 
7084 OperandMatchResultTy
7085 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7086 
7087   using namespace llvm::AMDGPU::VGPRIndexMode;
7088 
7089   int64_t Imm = 0;
7090   SMLoc S = getLoc();
7091 
7092   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7093     Imm = parseGPRIdxMacro();
7094     if (Imm == UNDEF)
7095       return MatchOperand_ParseFail;
7096   } else {
7097     if (getParser().parseAbsoluteExpression(Imm))
7098       return MatchOperand_ParseFail;
7099     if (Imm < 0 || !isUInt<4>(Imm)) {
7100       Error(S, "invalid immediate: only 4-bit values are legal");
7101       return MatchOperand_ParseFail;
7102     }
7103   }
7104 
7105   Operands.push_back(
7106       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7107   return MatchOperand_Success;
7108 }
7109 
7110 bool AMDGPUOperand::isGPRIdxMode() const {
7111   return isImmTy(ImmTyGprIdxMode);
7112 }
7113 
7114 //===----------------------------------------------------------------------===//
7115 // sopp branch targets
7116 //===----------------------------------------------------------------------===//
7117 
7118 OperandMatchResultTy
7119 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7120 
7121   // Make sure we are not parsing something
7122   // that looks like a label or an expression but is not.
7123   // This will improve error messages.
7124   if (isRegister() || isModifier())
7125     return MatchOperand_NoMatch;
7126 
7127   if (!parseExpr(Operands))
7128     return MatchOperand_ParseFail;
7129 
7130   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7131   assert(Opr.isImm() || Opr.isExpr());
7132   SMLoc Loc = Opr.getStartLoc();
7133 
7134   // Currently we do not support arbitrary expressions as branch targets.
7135   // Only labels and absolute expressions are accepted.
7136   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7137     Error(Loc, "expected an absolute expression or a label");
7138   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7139     Error(Loc, "expected a 16-bit signed jump offset");
7140   }
7141 
7142   return MatchOperand_Success;
7143 }
7144 
7145 //===----------------------------------------------------------------------===//
7146 // Boolean holding registers
7147 //===----------------------------------------------------------------------===//
7148 
7149 OperandMatchResultTy
7150 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7151   return parseReg(Operands);
7152 }
7153 
7154 //===----------------------------------------------------------------------===//
7155 // mubuf
7156 //===----------------------------------------------------------------------===//
7157 
7158 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7159   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7160 }
7161 
7162 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7163                                    const OperandVector &Operands,
7164                                    bool IsAtomic,
7165                                    bool IsLds) {
7166   bool IsLdsOpcode = IsLds;
7167   bool HasLdsModifier = false;
7168   OptionalImmIndexMap OptionalIdx;
7169   unsigned FirstOperandIdx = 1;
7170   bool IsAtomicReturn = false;
7171 
7172   if (IsAtomic) {
7173     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7174       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7175       if (!Op.isCPol())
7176         continue;
7177       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7178       break;
7179     }
7180 
7181     if (!IsAtomicReturn) {
7182       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7183       if (NewOpc != -1)
7184         Inst.setOpcode(NewOpc);
7185     }
7186 
7187     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7188                       SIInstrFlags::IsAtomicRet;
7189   }
7190 
7191   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7192     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7193 
7194     // Add the register arguments
7195     if (Op.isReg()) {
7196       Op.addRegOperands(Inst, 1);
7197       // Insert a tied src for atomic return dst.
7198       // This cannot be postponed as subsequent calls to
7199       // addImmOperands rely on correct number of MC operands.
7200       if (IsAtomicReturn && i == FirstOperandIdx)
7201         Op.addRegOperands(Inst, 1);
7202       continue;
7203     }
7204 
7205     // Handle the case where soffset is an immediate
7206     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7207       Op.addImmOperands(Inst, 1);
7208       continue;
7209     }
7210 
7211     HasLdsModifier |= Op.isLDS();
7212 
7213     // Handle tokens like 'offen' which are sometimes hard-coded into the
7214     // asm string.  There are no MCInst operands for these.
7215     if (Op.isToken()) {
7216       continue;
7217     }
7218     assert(Op.isImm());
7219 
7220     // Handle optional arguments
7221     OptionalIdx[Op.getImmTy()] = i;
7222   }
7223 
7224   // This is a workaround for an llvm quirk which may result in an
7225   // incorrect instruction selection. Lds and non-lds versions of
7226   // MUBUF instructions are identical except that lds versions
7227   // have mandatory 'lds' modifier. However this modifier follows
7228   // optional modifiers and llvm asm matcher regards this 'lds'
7229   // modifier as an optional one. As a result, an lds version
7230   // of opcode may be selected even if it has no 'lds' modifier.
7231   if (IsLdsOpcode && !HasLdsModifier) {
7232     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7233     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7234       Inst.setOpcode(NoLdsOpcode);
7235       IsLdsOpcode = false;
7236     }
7237   }
7238 
7239   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7240   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7241 
7242   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7243     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7244   }
7245   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7246 }
7247 
7248 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7249   OptionalImmIndexMap OptionalIdx;
7250 
7251   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7252     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7253 
7254     // Add the register arguments
7255     if (Op.isReg()) {
7256       Op.addRegOperands(Inst, 1);
7257       continue;
7258     }
7259 
7260     // Handle the case where soffset is an immediate
7261     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7262       Op.addImmOperands(Inst, 1);
7263       continue;
7264     }
7265 
7266     // Handle tokens like 'offen' which are sometimes hard-coded into the
7267     // asm string.  There are no MCInst operands for these.
7268     if (Op.isToken()) {
7269       continue;
7270     }
7271     assert(Op.isImm());
7272 
7273     // Handle optional arguments
7274     OptionalIdx[Op.getImmTy()] = i;
7275   }
7276 
7277   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7278                         AMDGPUOperand::ImmTyOffset);
7279   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7280   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7281   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7282   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7283 }
7284 
7285 //===----------------------------------------------------------------------===//
7286 // mimg
7287 //===----------------------------------------------------------------------===//
7288 
7289 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7290                               bool IsAtomic) {
7291   unsigned I = 1;
7292   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7293   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7294     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7295   }
7296 
7297   if (IsAtomic) {
7298     // Add src, same as dst
7299     assert(Desc.getNumDefs() == 1);
7300     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7301   }
7302 
7303   OptionalImmIndexMap OptionalIdx;
7304 
7305   for (unsigned E = Operands.size(); I != E; ++I) {
7306     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7307 
7308     // Add the register arguments
7309     if (Op.isReg()) {
7310       Op.addRegOperands(Inst, 1);
7311     } else if (Op.isImmModifier()) {
7312       OptionalIdx[Op.getImmTy()] = I;
7313     } else if (!Op.isToken()) {
7314       llvm_unreachable("unexpected operand type");
7315     }
7316   }
7317 
7318   bool IsGFX10Plus = isGFX10Plus();
7319 
7320   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7321   if (IsGFX10Plus)
7322     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7323   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7324   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7325   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7326   if (IsGFX10Plus)
7327     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7328   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7329     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7330   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7331   if (!IsGFX10Plus)
7332     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7333   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7334 }
7335 
7336 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7337   cvtMIMG(Inst, Operands, true);
7338 }
7339 
7340 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7341   OptionalImmIndexMap OptionalIdx;
7342   bool IsAtomicReturn = false;
7343 
7344   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7345     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7346     if (!Op.isCPol())
7347       continue;
7348     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7349     break;
7350   }
7351 
7352   if (!IsAtomicReturn) {
7353     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7354     if (NewOpc != -1)
7355       Inst.setOpcode(NewOpc);
7356   }
7357 
7358   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7359                     SIInstrFlags::IsAtomicRet;
7360 
7361   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7362     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7363 
7364     // Add the register arguments
7365     if (Op.isReg()) {
7366       Op.addRegOperands(Inst, 1);
7367       if (IsAtomicReturn && i == 1)
7368         Op.addRegOperands(Inst, 1);
7369       continue;
7370     }
7371 
7372     // Handle the case where soffset is an immediate
7373     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7374       Op.addImmOperands(Inst, 1);
7375       continue;
7376     }
7377 
7378     // Handle tokens like 'offen' which are sometimes hard-coded into the
7379     // asm string.  There are no MCInst operands for these.
7380     if (Op.isToken()) {
7381       continue;
7382     }
7383     assert(Op.isImm());
7384 
7385     // Handle optional arguments
7386     OptionalIdx[Op.getImmTy()] = i;
7387   }
7388 
7389   if ((int)Inst.getNumOperands() <=
7390       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7391     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7392   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7393 }
7394 
7395 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7396                                       const OperandVector &Operands) {
7397   for (unsigned I = 1; I < Operands.size(); ++I) {
7398     auto &Operand = (AMDGPUOperand &)*Operands[I];
7399     if (Operand.isReg())
7400       Operand.addRegOperands(Inst, 1);
7401   }
7402 
7403   Inst.addOperand(MCOperand::createImm(1)); // a16
7404 }
7405 
7406 //===----------------------------------------------------------------------===//
7407 // smrd
7408 //===----------------------------------------------------------------------===//
7409 
7410 bool AMDGPUOperand::isSMRDOffset8() const {
7411   return isImm() && isUInt<8>(getImm());
7412 }
7413 
7414 bool AMDGPUOperand::isSMEMOffset() const {
7415   return isImm(); // Offset range is checked later by validator.
7416 }
7417 
7418 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7419   // 32-bit literals are only supported on CI and we only want to use them
7420   // when the offset is > 8-bits.
7421   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7422 }
7423 
7424 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7425   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7426 }
7427 
7428 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7429   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7430 }
7431 
7432 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7433   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7434 }
7435 
7436 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7437   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7438 }
7439 
7440 //===----------------------------------------------------------------------===//
7441 // vop3
7442 //===----------------------------------------------------------------------===//
7443 
7444 static bool ConvertOmodMul(int64_t &Mul) {
7445   if (Mul != 1 && Mul != 2 && Mul != 4)
7446     return false;
7447 
7448   Mul >>= 1;
7449   return true;
7450 }
7451 
7452 static bool ConvertOmodDiv(int64_t &Div) {
7453   if (Div == 1) {
7454     Div = 0;
7455     return true;
7456   }
7457 
7458   if (Div == 2) {
7459     Div = 3;
7460     return true;
7461   }
7462 
7463   return false;
7464 }
7465 
7466 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7467 // This is intentional and ensures compatibility with sp3.
7468 // See bug 35397 for details.
7469 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7470   if (BoundCtrl == 0 || BoundCtrl == 1) {
7471     BoundCtrl = 1;
7472     return true;
7473   }
7474   return false;
7475 }
7476 
7477 // Note: the order in this table matches the order of operands in AsmString.
7478 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7479   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7480   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7481   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7482   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7483   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7484   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7485   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7486   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7487   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7488   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7489   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7490   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7491   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7492   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7493   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7494   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7495   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7496   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7497   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7498   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7499   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7500   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7501   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7502   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7503   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7504   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7505   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7506   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7507   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7508   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7509   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7510   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7511   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7512   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7513   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7514   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7515   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7516   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7517   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7518   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7519   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7520 };
7521 
7522 void AMDGPUAsmParser::onBeginOfFile() {
7523   if (!getParser().getStreamer().getTargetStreamer() ||
7524       getSTI().getTargetTriple().getArch() == Triple::r600)
7525     return;
7526 
7527   if (!getTargetStreamer().getTargetID())
7528     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7529 
7530   if (isHsaAbiVersion3AndAbove(&getSTI()))
7531     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7532 }
7533 
7534 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7535 
7536   OperandMatchResultTy res = parseOptionalOpr(Operands);
7537 
7538   // This is a hack to enable hardcoded mandatory operands which follow
7539   // optional operands.
7540   //
7541   // Current design assumes that all operands after the first optional operand
7542   // are also optional. However implementation of some instructions violates
7543   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7544   //
7545   // To alleviate this problem, we have to (implicitly) parse extra operands
7546   // to make sure autogenerated parser of custom operands never hit hardcoded
7547   // mandatory operands.
7548 
7549   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7550     if (res != MatchOperand_Success ||
7551         isToken(AsmToken::EndOfStatement))
7552       break;
7553 
7554     trySkipToken(AsmToken::Comma);
7555     res = parseOptionalOpr(Operands);
7556   }
7557 
7558   return res;
7559 }
7560 
7561 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7562   OperandMatchResultTy res;
7563   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7564     // try to parse any optional operand here
7565     if (Op.IsBit) {
7566       res = parseNamedBit(Op.Name, Operands, Op.Type);
7567     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7568       res = parseOModOperand(Operands);
7569     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7570                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7571                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7572       res = parseSDWASel(Operands, Op.Name, Op.Type);
7573     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7574       res = parseSDWADstUnused(Operands);
7575     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7576                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7577                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7578                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7579       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7580                                         Op.ConvertResult);
7581     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7582       res = parseDim(Operands);
7583     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7584       res = parseCPol(Operands);
7585     } else {
7586       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7587     }
7588     if (res != MatchOperand_NoMatch) {
7589       return res;
7590     }
7591   }
7592   return MatchOperand_NoMatch;
7593 }
7594 
7595 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7596   StringRef Name = getTokenStr();
7597   if (Name == "mul") {
7598     return parseIntWithPrefix("mul", Operands,
7599                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7600   }
7601 
7602   if (Name == "div") {
7603     return parseIntWithPrefix("div", Operands,
7604                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7605   }
7606 
7607   return MatchOperand_NoMatch;
7608 }
7609 
7610 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7611   cvtVOP3P(Inst, Operands);
7612 
7613   int Opc = Inst.getOpcode();
7614 
7615   int SrcNum;
7616   const int Ops[] = { AMDGPU::OpName::src0,
7617                       AMDGPU::OpName::src1,
7618                       AMDGPU::OpName::src2 };
7619   for (SrcNum = 0;
7620        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7621        ++SrcNum);
7622   assert(SrcNum > 0);
7623 
7624   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7625   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7626 
7627   if ((OpSel & (1 << SrcNum)) != 0) {
7628     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7629     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7630     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7631   }
7632 }
7633 
7634 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7635       // 1. This operand is input modifiers
7636   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7637       // 2. This is not last operand
7638       && Desc.NumOperands > (OpNum + 1)
7639       // 3. Next operand is register class
7640       && Desc.OpInfo[OpNum + 1].RegClass != -1
7641       // 4. Next register is not tied to any other operand
7642       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7643 }
7644 
7645 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7646 {
7647   OptionalImmIndexMap OptionalIdx;
7648   unsigned Opc = Inst.getOpcode();
7649 
7650   unsigned I = 1;
7651   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7652   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7653     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7654   }
7655 
7656   for (unsigned E = Operands.size(); I != E; ++I) {
7657     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7658     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7659       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7660     } else if (Op.isInterpSlot() ||
7661                Op.isInterpAttr() ||
7662                Op.isAttrChan()) {
7663       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7664     } else if (Op.isImmModifier()) {
7665       OptionalIdx[Op.getImmTy()] = I;
7666     } else {
7667       llvm_unreachable("unhandled operand type");
7668     }
7669   }
7670 
7671   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7672     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7673   }
7674 
7675   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7676     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7677   }
7678 
7679   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7680     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7681   }
7682 }
7683 
7684 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7685                               OptionalImmIndexMap &OptionalIdx) {
7686   unsigned Opc = Inst.getOpcode();
7687 
7688   unsigned I = 1;
7689   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7690   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7691     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7692   }
7693 
7694   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7695     // This instruction has src modifiers
7696     for (unsigned E = Operands.size(); I != E; ++I) {
7697       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7698       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7699         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7700       } else if (Op.isImmModifier()) {
7701         OptionalIdx[Op.getImmTy()] = I;
7702       } else if (Op.isRegOrImm()) {
7703         Op.addRegOrImmOperands(Inst, 1);
7704       } else {
7705         llvm_unreachable("unhandled operand type");
7706       }
7707     }
7708   } else {
7709     // No src modifiers
7710     for (unsigned E = Operands.size(); I != E; ++I) {
7711       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7712       if (Op.isMod()) {
7713         OptionalIdx[Op.getImmTy()] = I;
7714       } else {
7715         Op.addRegOrImmOperands(Inst, 1);
7716       }
7717     }
7718   }
7719 
7720   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7721     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7722   }
7723 
7724   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7725     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7726   }
7727 
7728   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7729   // it has src2 register operand that is tied to dst operand
7730   // we don't allow modifiers for this operand in assembler so src2_modifiers
7731   // should be 0.
7732   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7733       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7734       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7735       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7736       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7737       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7738       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7739       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7740       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7741       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7742       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7743     auto it = Inst.begin();
7744     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7745     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7746     ++it;
7747     // Copy the operand to ensure it's not invalidated when Inst grows.
7748     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7749   }
7750 }
7751 
7752 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7753   OptionalImmIndexMap OptionalIdx;
7754   cvtVOP3(Inst, Operands, OptionalIdx);
7755 }
7756 
7757 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7758                                OptionalImmIndexMap &OptIdx) {
7759   const int Opc = Inst.getOpcode();
7760   const MCInstrDesc &Desc = MII.get(Opc);
7761 
7762   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7763 
7764   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7765     assert(!IsPacked);
7766     Inst.addOperand(Inst.getOperand(0));
7767   }
7768 
7769   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7770   // instruction, and then figure out where to actually put the modifiers
7771 
7772   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7773   if (OpSelIdx != -1) {
7774     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7775   }
7776 
7777   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7778   if (OpSelHiIdx != -1) {
7779     int DefaultVal = IsPacked ? -1 : 0;
7780     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7781                           DefaultVal);
7782   }
7783 
7784   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7785   if (NegLoIdx != -1) {
7786     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7787     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7788   }
7789 
7790   const int Ops[] = { AMDGPU::OpName::src0,
7791                       AMDGPU::OpName::src1,
7792                       AMDGPU::OpName::src2 };
7793   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7794                          AMDGPU::OpName::src1_modifiers,
7795                          AMDGPU::OpName::src2_modifiers };
7796 
7797   unsigned OpSel = 0;
7798   unsigned OpSelHi = 0;
7799   unsigned NegLo = 0;
7800   unsigned NegHi = 0;
7801 
7802   if (OpSelIdx != -1)
7803     OpSel = Inst.getOperand(OpSelIdx).getImm();
7804 
7805   if (OpSelHiIdx != -1)
7806     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7807 
7808   if (NegLoIdx != -1) {
7809     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7810     NegLo = Inst.getOperand(NegLoIdx).getImm();
7811     NegHi = Inst.getOperand(NegHiIdx).getImm();
7812   }
7813 
7814   for (int J = 0; J < 3; ++J) {
7815     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7816     if (OpIdx == -1)
7817       break;
7818 
7819     uint32_t ModVal = 0;
7820 
7821     if ((OpSel & (1 << J)) != 0)
7822       ModVal |= SISrcMods::OP_SEL_0;
7823 
7824     if ((OpSelHi & (1 << J)) != 0)
7825       ModVal |= SISrcMods::OP_SEL_1;
7826 
7827     if ((NegLo & (1 << J)) != 0)
7828       ModVal |= SISrcMods::NEG;
7829 
7830     if ((NegHi & (1 << J)) != 0)
7831       ModVal |= SISrcMods::NEG_HI;
7832 
7833     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7834 
7835     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7836   }
7837 }
7838 
7839 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7840   OptionalImmIndexMap OptIdx;
7841   cvtVOP3(Inst, Operands, OptIdx);
7842   cvtVOP3P(Inst, Operands, OptIdx);
7843 }
7844 
7845 //===----------------------------------------------------------------------===//
7846 // dpp
7847 //===----------------------------------------------------------------------===//
7848 
7849 bool AMDGPUOperand::isDPP8() const {
7850   return isImmTy(ImmTyDPP8);
7851 }
7852 
7853 bool AMDGPUOperand::isDPPCtrl() const {
7854   using namespace AMDGPU::DPP;
7855 
7856   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7857   if (result) {
7858     int64_t Imm = getImm();
7859     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7860            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7861            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7862            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7863            (Imm == DppCtrl::WAVE_SHL1) ||
7864            (Imm == DppCtrl::WAVE_ROL1) ||
7865            (Imm == DppCtrl::WAVE_SHR1) ||
7866            (Imm == DppCtrl::WAVE_ROR1) ||
7867            (Imm == DppCtrl::ROW_MIRROR) ||
7868            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7869            (Imm == DppCtrl::BCAST15) ||
7870            (Imm == DppCtrl::BCAST31) ||
7871            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7872            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7873   }
7874   return false;
7875 }
7876 
7877 //===----------------------------------------------------------------------===//
7878 // mAI
7879 //===----------------------------------------------------------------------===//
7880 
7881 bool AMDGPUOperand::isBLGP() const {
7882   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7883 }
7884 
7885 bool AMDGPUOperand::isCBSZ() const {
7886   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7887 }
7888 
7889 bool AMDGPUOperand::isABID() const {
7890   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7891 }
7892 
7893 bool AMDGPUOperand::isS16Imm() const {
7894   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7895 }
7896 
7897 bool AMDGPUOperand::isU16Imm() const {
7898   return isImm() && isUInt<16>(getImm());
7899 }
7900 
7901 //===----------------------------------------------------------------------===//
7902 // dim
7903 //===----------------------------------------------------------------------===//
7904 
7905 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7906   // We want to allow "dim:1D" etc.,
7907   // but the initial 1 is tokenized as an integer.
7908   std::string Token;
7909   if (isToken(AsmToken::Integer)) {
7910     SMLoc Loc = getToken().getEndLoc();
7911     Token = std::string(getTokenStr());
7912     lex();
7913     if (getLoc() != Loc)
7914       return false;
7915   }
7916 
7917   StringRef Suffix;
7918   if (!parseId(Suffix))
7919     return false;
7920   Token += Suffix;
7921 
7922   StringRef DimId = Token;
7923   if (DimId.startswith("SQ_RSRC_IMG_"))
7924     DimId = DimId.drop_front(12);
7925 
7926   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7927   if (!DimInfo)
7928     return false;
7929 
7930   Encoding = DimInfo->Encoding;
7931   return true;
7932 }
7933 
7934 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7935   if (!isGFX10Plus())
7936     return MatchOperand_NoMatch;
7937 
7938   SMLoc S = getLoc();
7939 
7940   if (!trySkipId("dim", AsmToken::Colon))
7941     return MatchOperand_NoMatch;
7942 
7943   unsigned Encoding;
7944   SMLoc Loc = getLoc();
7945   if (!parseDimId(Encoding)) {
7946     Error(Loc, "invalid dim value");
7947     return MatchOperand_ParseFail;
7948   }
7949 
7950   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7951                                               AMDGPUOperand::ImmTyDim));
7952   return MatchOperand_Success;
7953 }
7954 
7955 //===----------------------------------------------------------------------===//
7956 // dpp
7957 //===----------------------------------------------------------------------===//
7958 
7959 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7960   SMLoc S = getLoc();
7961 
7962   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7963     return MatchOperand_NoMatch;
7964 
7965   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7966 
7967   int64_t Sels[8];
7968 
7969   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7970     return MatchOperand_ParseFail;
7971 
7972   for (size_t i = 0; i < 8; ++i) {
7973     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7974       return MatchOperand_ParseFail;
7975 
7976     SMLoc Loc = getLoc();
7977     if (getParser().parseAbsoluteExpression(Sels[i]))
7978       return MatchOperand_ParseFail;
7979     if (0 > Sels[i] || 7 < Sels[i]) {
7980       Error(Loc, "expected a 3-bit value");
7981       return MatchOperand_ParseFail;
7982     }
7983   }
7984 
7985   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7986     return MatchOperand_ParseFail;
7987 
7988   unsigned DPP8 = 0;
7989   for (size_t i = 0; i < 8; ++i)
7990     DPP8 |= (Sels[i] << (i * 3));
7991 
7992   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7993   return MatchOperand_Success;
7994 }
7995 
7996 bool
7997 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7998                                     const OperandVector &Operands) {
7999   if (Ctrl == "row_newbcast")
8000     return isGFX90A();
8001 
8002   if (Ctrl == "row_share" ||
8003       Ctrl == "row_xmask")
8004     return isGFX10Plus();
8005 
8006   if (Ctrl == "wave_shl" ||
8007       Ctrl == "wave_shr" ||
8008       Ctrl == "wave_rol" ||
8009       Ctrl == "wave_ror" ||
8010       Ctrl == "row_bcast")
8011     return isVI() || isGFX9();
8012 
8013   return Ctrl == "row_mirror" ||
8014          Ctrl == "row_half_mirror" ||
8015          Ctrl == "quad_perm" ||
8016          Ctrl == "row_shl" ||
8017          Ctrl == "row_shr" ||
8018          Ctrl == "row_ror";
8019 }
8020 
8021 int64_t
8022 AMDGPUAsmParser::parseDPPCtrlPerm() {
8023   // quad_perm:[%d,%d,%d,%d]
8024 
8025   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8026     return -1;
8027 
8028   int64_t Val = 0;
8029   for (int i = 0; i < 4; ++i) {
8030     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8031       return -1;
8032 
8033     int64_t Temp;
8034     SMLoc Loc = getLoc();
8035     if (getParser().parseAbsoluteExpression(Temp))
8036       return -1;
8037     if (Temp < 0 || Temp > 3) {
8038       Error(Loc, "expected a 2-bit value");
8039       return -1;
8040     }
8041 
8042     Val += (Temp << i * 2);
8043   }
8044 
8045   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8046     return -1;
8047 
8048   return Val;
8049 }
8050 
8051 int64_t
8052 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8053   using namespace AMDGPU::DPP;
8054 
8055   // sel:%d
8056 
8057   int64_t Val;
8058   SMLoc Loc = getLoc();
8059 
8060   if (getParser().parseAbsoluteExpression(Val))
8061     return -1;
8062 
8063   struct DppCtrlCheck {
8064     int64_t Ctrl;
8065     int Lo;
8066     int Hi;
8067   };
8068 
8069   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8070     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8071     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8072     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8073     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8074     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8075     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8076     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8077     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8078     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8079     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8080     .Default({-1, 0, 0});
8081 
8082   bool Valid;
8083   if (Check.Ctrl == -1) {
8084     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8085     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8086   } else {
8087     Valid = Check.Lo <= Val && Val <= Check.Hi;
8088     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8089   }
8090 
8091   if (!Valid) {
8092     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8093     return -1;
8094   }
8095 
8096   return Val;
8097 }
8098 
8099 OperandMatchResultTy
8100 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8101   using namespace AMDGPU::DPP;
8102 
8103   if (!isToken(AsmToken::Identifier) ||
8104       !isSupportedDPPCtrl(getTokenStr(), Operands))
8105     return MatchOperand_NoMatch;
8106 
8107   SMLoc S = getLoc();
8108   int64_t Val = -1;
8109   StringRef Ctrl;
8110 
8111   parseId(Ctrl);
8112 
8113   if (Ctrl == "row_mirror") {
8114     Val = DppCtrl::ROW_MIRROR;
8115   } else if (Ctrl == "row_half_mirror") {
8116     Val = DppCtrl::ROW_HALF_MIRROR;
8117   } else {
8118     if (skipToken(AsmToken::Colon, "expected a colon")) {
8119       if (Ctrl == "quad_perm") {
8120         Val = parseDPPCtrlPerm();
8121       } else {
8122         Val = parseDPPCtrlSel(Ctrl);
8123       }
8124     }
8125   }
8126 
8127   if (Val == -1)
8128     return MatchOperand_ParseFail;
8129 
8130   Operands.push_back(
8131     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8132   return MatchOperand_Success;
8133 }
8134 
8135 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8136   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8137 }
8138 
8139 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8140   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8141 }
8142 
8143 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8144   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8145 }
8146 
8147 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8148   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8149 }
8150 
8151 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8152   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8153 }
8154 
8155 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8156   OptionalImmIndexMap OptionalIdx;
8157 
8158   unsigned Opc = Inst.getOpcode();
8159   bool HasModifiers =
8160       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8161   unsigned I = 1;
8162   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8163   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8164     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8165   }
8166 
8167   int Fi = 0;
8168   for (unsigned E = Operands.size(); I != E; ++I) {
8169     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8170                                             MCOI::TIED_TO);
8171     if (TiedTo != -1) {
8172       assert((unsigned)TiedTo < Inst.getNumOperands());
8173       // handle tied old or src2 for MAC instructions
8174       Inst.addOperand(Inst.getOperand(TiedTo));
8175     }
8176     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8177     // Add the register arguments
8178     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8179       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8180       // Skip it.
8181       continue;
8182     }
8183 
8184     if (IsDPP8) {
8185       if (Op.isDPP8()) {
8186         Op.addImmOperands(Inst, 1);
8187       } else if (HasModifiers &&
8188                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8189         Op.addRegWithFPInputModsOperands(Inst, 2);
8190       } else if (Op.isFI()) {
8191         Fi = Op.getImm();
8192       } else if (Op.isReg()) {
8193         Op.addRegOperands(Inst, 1);
8194       } else {
8195         llvm_unreachable("Invalid operand type");
8196       }
8197     } else {
8198       if (HasModifiers &&
8199           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8200         Op.addRegWithFPInputModsOperands(Inst, 2);
8201       } else if (Op.isReg()) {
8202         Op.addRegOperands(Inst, 1);
8203       } else if (Op.isDPPCtrl()) {
8204         Op.addImmOperands(Inst, 1);
8205       } else if (Op.isImm()) {
8206         // Handle optional arguments
8207         OptionalIdx[Op.getImmTy()] = I;
8208       } else {
8209         llvm_unreachable("Invalid operand type");
8210       }
8211     }
8212   }
8213 
8214   if (IsDPP8) {
8215     using namespace llvm::AMDGPU::DPP;
8216     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8217   } else {
8218     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8219     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8220     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8221     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8222       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8223     }
8224   }
8225 }
8226 
8227 //===----------------------------------------------------------------------===//
8228 // sdwa
8229 //===----------------------------------------------------------------------===//
8230 
8231 OperandMatchResultTy
8232 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8233                               AMDGPUOperand::ImmTy Type) {
8234   using namespace llvm::AMDGPU::SDWA;
8235 
8236   SMLoc S = getLoc();
8237   StringRef Value;
8238   OperandMatchResultTy res;
8239 
8240   SMLoc StringLoc;
8241   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8242   if (res != MatchOperand_Success) {
8243     return res;
8244   }
8245 
8246   int64_t Int;
8247   Int = StringSwitch<int64_t>(Value)
8248         .Case("BYTE_0", SdwaSel::BYTE_0)
8249         .Case("BYTE_1", SdwaSel::BYTE_1)
8250         .Case("BYTE_2", SdwaSel::BYTE_2)
8251         .Case("BYTE_3", SdwaSel::BYTE_3)
8252         .Case("WORD_0", SdwaSel::WORD_0)
8253         .Case("WORD_1", SdwaSel::WORD_1)
8254         .Case("DWORD", SdwaSel::DWORD)
8255         .Default(0xffffffff);
8256 
8257   if (Int == 0xffffffff) {
8258     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8259     return MatchOperand_ParseFail;
8260   }
8261 
8262   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8263   return MatchOperand_Success;
8264 }
8265 
8266 OperandMatchResultTy
8267 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8268   using namespace llvm::AMDGPU::SDWA;
8269 
8270   SMLoc S = getLoc();
8271   StringRef Value;
8272   OperandMatchResultTy res;
8273 
8274   SMLoc StringLoc;
8275   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8276   if (res != MatchOperand_Success) {
8277     return res;
8278   }
8279 
8280   int64_t Int;
8281   Int = StringSwitch<int64_t>(Value)
8282         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8283         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8284         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8285         .Default(0xffffffff);
8286 
8287   if (Int == 0xffffffff) {
8288     Error(StringLoc, "invalid dst_unused value");
8289     return MatchOperand_ParseFail;
8290   }
8291 
8292   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8293   return MatchOperand_Success;
8294 }
8295 
8296 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8297   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8298 }
8299 
8300 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8301   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8302 }
8303 
8304 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8305   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8306 }
8307 
8308 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8309   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8310 }
8311 
8312 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8313   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8314 }
8315 
8316 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8317                               uint64_t BasicInstType,
8318                               bool SkipDstVcc,
8319                               bool SkipSrcVcc) {
8320   using namespace llvm::AMDGPU::SDWA;
8321 
8322   OptionalImmIndexMap OptionalIdx;
8323   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8324   bool SkippedVcc = false;
8325 
8326   unsigned I = 1;
8327   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8328   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8329     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8330   }
8331 
8332   for (unsigned E = Operands.size(); I != E; ++I) {
8333     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8334     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8335         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8336       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8337       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8338       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8339       // Skip VCC only if we didn't skip it on previous iteration.
8340       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8341       if (BasicInstType == SIInstrFlags::VOP2 &&
8342           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8343            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8344         SkippedVcc = true;
8345         continue;
8346       } else if (BasicInstType == SIInstrFlags::VOPC &&
8347                  Inst.getNumOperands() == 0) {
8348         SkippedVcc = true;
8349         continue;
8350       }
8351     }
8352     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8353       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8354     } else if (Op.isImm()) {
8355       // Handle optional arguments
8356       OptionalIdx[Op.getImmTy()] = I;
8357     } else {
8358       llvm_unreachable("Invalid operand type");
8359     }
8360     SkippedVcc = false;
8361   }
8362 
8363   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8364       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8365       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8366     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8367     switch (BasicInstType) {
8368     case SIInstrFlags::VOP1:
8369       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8370       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8371         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8372       }
8373       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8374       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8375       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8376       break;
8377 
8378     case SIInstrFlags::VOP2:
8379       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8380       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8381         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8382       }
8383       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8384       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8385       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8386       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8387       break;
8388 
8389     case SIInstrFlags::VOPC:
8390       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8391         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8392       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8393       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8394       break;
8395 
8396     default:
8397       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8398     }
8399   }
8400 
8401   // special case v_mac_{f16, f32}:
8402   // it has src2 register operand that is tied to dst operand
8403   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8404       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8405     auto it = Inst.begin();
8406     std::advance(
8407       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8408     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8409   }
8410 }
8411 
8412 //===----------------------------------------------------------------------===//
8413 // mAI
8414 //===----------------------------------------------------------------------===//
8415 
8416 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8417   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8418 }
8419 
8420 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8421   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8422 }
8423 
8424 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8425   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8426 }
8427 
8428 /// Force static initialization.
8429 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8430   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8431   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8432 }
8433 
8434 #define GET_REGISTER_MATCHER
8435 #define GET_MATCHER_IMPLEMENTATION
8436 #define GET_MNEMONIC_SPELL_CHECKER
8437 #define GET_MNEMONIC_CHECKER
8438 #include "AMDGPUGenAsmMatcher.inc"
8439 
8440 // This function should be defined after auto-generated include so that we have
8441 // MatchClassKind enum defined
8442 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8443                                                      unsigned Kind) {
8444   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8445   // But MatchInstructionImpl() expects to meet token and fails to validate
8446   // operand. This method checks if we are given immediate operand but expect to
8447   // get corresponding token.
8448   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8449   switch (Kind) {
8450   case MCK_addr64:
8451     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8452   case MCK_gds:
8453     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8454   case MCK_lds:
8455     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8456   case MCK_idxen:
8457     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8458   case MCK_offen:
8459     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8460   case MCK_SSrcB32:
8461     // When operands have expression values, they will return true for isToken,
8462     // because it is not possible to distinguish between a token and an
8463     // expression at parse time. MatchInstructionImpl() will always try to
8464     // match an operand as a token, when isToken returns true, and when the
8465     // name of the expression is not a valid token, the match will fail,
8466     // so we need to handle it here.
8467     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8468   case MCK_SSrcF32:
8469     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8470   case MCK_SoppBrTarget:
8471     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8472   case MCK_VReg32OrOff:
8473     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8474   case MCK_InterpSlot:
8475     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8476   case MCK_Attr:
8477     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8478   case MCK_AttrChan:
8479     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8480   case MCK_ImmSMEMOffset:
8481     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8482   case MCK_SReg_64:
8483   case MCK_SReg_64_XEXEC:
8484     // Null is defined as a 32-bit register but
8485     // it should also be enabled with 64-bit operands.
8486     // The following code enables it for SReg_64 operands
8487     // used as source and destination. Remaining source
8488     // operands are handled in isInlinableImm.
8489     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8490   default:
8491     return Match_InvalidOperand;
8492   }
8493 }
8494 
8495 //===----------------------------------------------------------------------===//
8496 // endpgm
8497 //===----------------------------------------------------------------------===//
8498 
8499 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8500   SMLoc S = getLoc();
8501   int64_t Imm = 0;
8502 
8503   if (!parseExpr(Imm)) {
8504     // The operand is optional, if not present default to 0
8505     Imm = 0;
8506   }
8507 
8508   if (!isUInt<16>(Imm)) {
8509     Error(S, "expected a 16-bit value");
8510     return MatchOperand_ParseFail;
8511   }
8512 
8513   Operands.push_back(
8514       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8515   return MatchOperand_Success;
8516 }
8517 
8518 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8519