1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/TargetParser.h"
39 
40 using namespace llvm;
41 using namespace llvm::AMDGPU;
42 using namespace llvm::amdhsa;
43 
44 namespace {
45 
46 class AMDGPUAsmParser;
47 
48 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
49 
50 //===----------------------------------------------------------------------===//
51 // Operand
52 //===----------------------------------------------------------------------===//
53 
54 class AMDGPUOperand : public MCParsedAsmOperand {
55   enum KindTy {
56     Token,
57     Immediate,
58     Register,
59     Expression
60   } Kind;
61 
62   SMLoc StartLoc, EndLoc;
63   const AMDGPUAsmParser *AsmParser;
64 
65 public:
66   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
67       : Kind(Kind_), AsmParser(AsmParser_) {}
68 
69   using Ptr = std::unique_ptr<AMDGPUOperand>;
70 
71   struct Modifiers {
72     bool Abs = false;
73     bool Neg = false;
74     bool Sext = false;
75 
76     bool hasFPModifiers() const { return Abs || Neg; }
77     bool hasIntModifiers() const { return Sext; }
78     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
79 
80     int64_t getFPModifiersOperand() const {
81       int64_t Operand = 0;
82       Operand |= Abs ? SISrcMods::ABS : 0u;
83       Operand |= Neg ? SISrcMods::NEG : 0u;
84       return Operand;
85     }
86 
87     int64_t getIntModifiersOperand() const {
88       int64_t Operand = 0;
89       Operand |= Sext ? SISrcMods::SEXT : 0u;
90       return Operand;
91     }
92 
93     int64_t getModifiersOperand() const {
94       assert(!(hasFPModifiers() && hasIntModifiers())
95            && "fp and int modifiers should not be used simultaneously");
96       if (hasFPModifiers()) {
97         return getFPModifiersOperand();
98       } else if (hasIntModifiers()) {
99         return getIntModifiersOperand();
100       } else {
101         return 0;
102       }
103     }
104 
105     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
106   };
107 
108   enum ImmTy {
109     ImmTyNone,
110     ImmTyGDS,
111     ImmTyLDS,
112     ImmTyOffen,
113     ImmTyIdxen,
114     ImmTyAddr64,
115     ImmTyOffset,
116     ImmTyInstOffset,
117     ImmTyOffset0,
118     ImmTyOffset1,
119     ImmTyCPol,
120     ImmTySWZ,
121     ImmTyTFE,
122     ImmTyD16,
123     ImmTyClampSI,
124     ImmTyOModSI,
125     ImmTyDPP8,
126     ImmTyDppCtrl,
127     ImmTyDppRowMask,
128     ImmTyDppBankMask,
129     ImmTyDppBoundCtrl,
130     ImmTyDppFi,
131     ImmTySdwaDstSel,
132     ImmTySdwaSrc0Sel,
133     ImmTySdwaSrc1Sel,
134     ImmTySdwaDstUnused,
135     ImmTyDMask,
136     ImmTyDim,
137     ImmTyUNorm,
138     ImmTyDA,
139     ImmTyR128A16,
140     ImmTyA16,
141     ImmTyLWE,
142     ImmTyExpTgt,
143     ImmTyExpCompr,
144     ImmTyExpVM,
145     ImmTyFORMAT,
146     ImmTyHwreg,
147     ImmTyOff,
148     ImmTySendMsg,
149     ImmTyInterpSlot,
150     ImmTyInterpAttr,
151     ImmTyAttrChan,
152     ImmTyOpSel,
153     ImmTyOpSelHi,
154     ImmTyNegLo,
155     ImmTyNegHi,
156     ImmTySwizzle,
157     ImmTyGprIdxMode,
158     ImmTyHigh,
159     ImmTyBLGP,
160     ImmTyCBSZ,
161     ImmTyABID,
162     ImmTyEndpgm,
163   };
164 
165   enum ImmKindTy {
166     ImmKindTyNone,
167     ImmKindTyLiteral,
168     ImmKindTyConst,
169   };
170 
171 private:
172   struct TokOp {
173     const char *Data;
174     unsigned Length;
175   };
176 
177   struct ImmOp {
178     int64_t Val;
179     ImmTy Type;
180     bool IsFPImm;
181     mutable ImmKindTy Kind;
182     Modifiers Mods;
183   };
184 
185   struct RegOp {
186     unsigned RegNo;
187     Modifiers Mods;
188   };
189 
190   union {
191     TokOp Tok;
192     ImmOp Imm;
193     RegOp Reg;
194     const MCExpr *Expr;
195   };
196 
197 public:
198   bool isToken() const override {
199     if (Kind == Token)
200       return true;
201 
202     // When parsing operands, we can't always tell if something was meant to be
203     // a token, like 'gds', or an expression that references a global variable.
204     // In this case, we assume the string is an expression, and if we need to
205     // interpret is a token, then we treat the symbol name as the token.
206     return isSymbolRefExpr();
207   }
208 
209   bool isSymbolRefExpr() const {
210     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
211   }
212 
213   bool isImm() const override {
214     return Kind == Immediate;
215   }
216 
217   void setImmKindNone() const {
218     assert(isImm());
219     Imm.Kind = ImmKindTyNone;
220   }
221 
222   void setImmKindLiteral() const {
223     assert(isImm());
224     Imm.Kind = ImmKindTyLiteral;
225   }
226 
227   void setImmKindConst() const {
228     assert(isImm());
229     Imm.Kind = ImmKindTyConst;
230   }
231 
232   bool IsImmKindLiteral() const {
233     return isImm() && Imm.Kind == ImmKindTyLiteral;
234   }
235 
236   bool isImmKindConst() const {
237     return isImm() && Imm.Kind == ImmKindTyConst;
238   }
239 
240   bool isInlinableImm(MVT type) const;
241   bool isLiteralImm(MVT type) const;
242 
243   bool isRegKind() const {
244     return Kind == Register;
245   }
246 
247   bool isReg() const override {
248     return isRegKind() && !hasModifiers();
249   }
250 
251   bool isRegOrInline(unsigned RCID, MVT type) const {
252     return isRegClass(RCID) || isInlinableImm(type);
253   }
254 
255   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
256     return isRegOrInline(RCID, type) || isLiteralImm(type);
257   }
258 
259   bool isRegOrImmWithInt16InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
261   }
262 
263   bool isRegOrImmWithInt32InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
265   }
266 
267   bool isRegOrImmWithInt64InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
269   }
270 
271   bool isRegOrImmWithFP16InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
273   }
274 
275   bool isRegOrImmWithFP32InputMods() const {
276     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
277   }
278 
279   bool isRegOrImmWithFP64InputMods() const {
280     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
281   }
282 
283   bool isVReg() const {
284     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
285            isRegClass(AMDGPU::VReg_64RegClassID) ||
286            isRegClass(AMDGPU::VReg_96RegClassID) ||
287            isRegClass(AMDGPU::VReg_128RegClassID) ||
288            isRegClass(AMDGPU::VReg_160RegClassID) ||
289            isRegClass(AMDGPU::VReg_192RegClassID) ||
290            isRegClass(AMDGPU::VReg_256RegClassID) ||
291            isRegClass(AMDGPU::VReg_512RegClassID) ||
292            isRegClass(AMDGPU::VReg_1024RegClassID);
293   }
294 
295   bool isVReg32() const {
296     return isRegClass(AMDGPU::VGPR_32RegClassID);
297   }
298 
299   bool isVReg32OrOff() const {
300     return isOff() || isVReg32();
301   }
302 
303   bool isNull() const {
304     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
305   }
306 
307   bool isVRegWithInputMods() const;
308 
309   bool isSDWAOperand(MVT type) const;
310   bool isSDWAFP16Operand() const;
311   bool isSDWAFP32Operand() const;
312   bool isSDWAInt16Operand() const;
313   bool isSDWAInt32Operand() const;
314 
315   bool isImmTy(ImmTy ImmT) const {
316     return isImm() && Imm.Type == ImmT;
317   }
318 
319   bool isImmModifier() const {
320     return isImm() && Imm.Type != ImmTyNone;
321   }
322 
323   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
324   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
325   bool isDMask() const { return isImmTy(ImmTyDMask); }
326   bool isDim() const { return isImmTy(ImmTyDim); }
327   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
328   bool isDA() const { return isImmTy(ImmTyDA); }
329   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
330   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
331   bool isLWE() const { return isImmTy(ImmTyLWE); }
332   bool isOff() const { return isImmTy(ImmTyOff); }
333   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
334   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
335   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
336   bool isOffen() const { return isImmTy(ImmTyOffen); }
337   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
338   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
339   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
340   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
341   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
342 
343   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
344   bool isGDS() const { return isImmTy(ImmTyGDS); }
345   bool isLDS() const { return isImmTy(ImmTyLDS); }
346   bool isCPol() const { return isImmTy(ImmTyCPol); }
347   bool isSWZ() const { return isImmTy(ImmTySWZ); }
348   bool isTFE() const { return isImmTy(ImmTyTFE); }
349   bool isD16() const { return isImmTy(ImmTyD16); }
350   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
351   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
352   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
353   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
354   bool isFI() const { return isImmTy(ImmTyDppFi); }
355   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
356   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
357   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
358   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
359   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
360   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
361   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
362   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
363   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
364   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
365   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
366   bool isHigh() const { return isImmTy(ImmTyHigh); }
367 
368   bool isMod() const {
369     return isClampSI() || isOModSI();
370   }
371 
372   bool isRegOrImm() const {
373     return isReg() || isImm();
374   }
375 
376   bool isRegClass(unsigned RCID) const;
377 
378   bool isInlineValue() const;
379 
380   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
381     return isRegOrInline(RCID, type) && !hasModifiers();
382   }
383 
384   bool isSCSrcB16() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
386   }
387 
388   bool isSCSrcV2B16() const {
389     return isSCSrcB16();
390   }
391 
392   bool isSCSrcB32() const {
393     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
394   }
395 
396   bool isSCSrcB64() const {
397     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
398   }
399 
400   bool isBoolReg() const;
401 
402   bool isSCSrcF16() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
404   }
405 
406   bool isSCSrcV2F16() const {
407     return isSCSrcF16();
408   }
409 
410   bool isSCSrcF32() const {
411     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
412   }
413 
414   bool isSCSrcF64() const {
415     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
416   }
417 
418   bool isSSrcB32() const {
419     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
420   }
421 
422   bool isSSrcB16() const {
423     return isSCSrcB16() || isLiteralImm(MVT::i16);
424   }
425 
426   bool isSSrcV2B16() const {
427     llvm_unreachable("cannot happen");
428     return isSSrcB16();
429   }
430 
431   bool isSSrcB64() const {
432     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
433     // See isVSrc64().
434     return isSCSrcB64() || isLiteralImm(MVT::i64);
435   }
436 
437   bool isSSrcF32() const {
438     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
439   }
440 
441   bool isSSrcF64() const {
442     return isSCSrcB64() || isLiteralImm(MVT::f64);
443   }
444 
445   bool isSSrcF16() const {
446     return isSCSrcB16() || isLiteralImm(MVT::f16);
447   }
448 
449   bool isSSrcV2F16() const {
450     llvm_unreachable("cannot happen");
451     return isSSrcF16();
452   }
453 
454   bool isSSrcV2FP32() const {
455     llvm_unreachable("cannot happen");
456     return isSSrcF32();
457   }
458 
459   bool isSCSrcV2FP32() const {
460     llvm_unreachable("cannot happen");
461     return isSCSrcF32();
462   }
463 
464   bool isSSrcV2INT32() const {
465     llvm_unreachable("cannot happen");
466     return isSSrcB32();
467   }
468 
469   bool isSCSrcV2INT32() const {
470     llvm_unreachable("cannot happen");
471     return isSCSrcB32();
472   }
473 
474   bool isSSrcOrLdsB32() const {
475     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
476            isLiteralImm(MVT::i32) || isExpr();
477   }
478 
479   bool isVCSrcB32() const {
480     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
481   }
482 
483   bool isVCSrcB64() const {
484     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
485   }
486 
487   bool isVCSrcB16() const {
488     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
489   }
490 
491   bool isVCSrcV2B16() const {
492     return isVCSrcB16();
493   }
494 
495   bool isVCSrcF32() const {
496     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
497   }
498 
499   bool isVCSrcF64() const {
500     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
501   }
502 
503   bool isVCSrcF16() const {
504     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
505   }
506 
507   bool isVCSrcV2F16() const {
508     return isVCSrcF16();
509   }
510 
511   bool isVSrcB32() const {
512     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
513   }
514 
515   bool isVSrcB64() const {
516     return isVCSrcF64() || isLiteralImm(MVT::i64);
517   }
518 
519   bool isVSrcB16() const {
520     return isVCSrcB16() || isLiteralImm(MVT::i16);
521   }
522 
523   bool isVSrcV2B16() const {
524     return isVSrcB16() || isLiteralImm(MVT::v2i16);
525   }
526 
527   bool isVCSrcV2FP32() const {
528     return isVCSrcF64();
529   }
530 
531   bool isVSrcV2FP32() const {
532     return isVSrcF64() || isLiteralImm(MVT::v2f32);
533   }
534 
535   bool isVCSrcV2INT32() const {
536     return isVCSrcB64();
537   }
538 
539   bool isVSrcV2INT32() const {
540     return isVSrcB64() || isLiteralImm(MVT::v2i32);
541   }
542 
543   bool isVSrcF32() const {
544     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
545   }
546 
547   bool isVSrcF64() const {
548     return isVCSrcF64() || isLiteralImm(MVT::f64);
549   }
550 
551   bool isVSrcF16() const {
552     return isVCSrcF16() || isLiteralImm(MVT::f16);
553   }
554 
555   bool isVSrcV2F16() const {
556     return isVSrcF16() || isLiteralImm(MVT::v2f16);
557   }
558 
559   bool isVISrcB32() const {
560     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
561   }
562 
563   bool isVISrcB16() const {
564     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
565   }
566 
567   bool isVISrcV2B16() const {
568     return isVISrcB16();
569   }
570 
571   bool isVISrcF32() const {
572     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
573   }
574 
575   bool isVISrcF16() const {
576     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
577   }
578 
579   bool isVISrcV2F16() const {
580     return isVISrcF16() || isVISrcB32();
581   }
582 
583   bool isVISrc_64B64() const {
584     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
585   }
586 
587   bool isVISrc_64F64() const {
588     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
589   }
590 
591   bool isVISrc_64V2FP32() const {
592     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
593   }
594 
595   bool isVISrc_64V2INT32() const {
596     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
597   }
598 
599   bool isVISrc_256B64() const {
600     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
601   }
602 
603   bool isVISrc_256F64() const {
604     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
605   }
606 
607   bool isVISrc_128B16() const {
608     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
609   }
610 
611   bool isVISrc_128V2B16() const {
612     return isVISrc_128B16();
613   }
614 
615   bool isVISrc_128B32() const {
616     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
617   }
618 
619   bool isVISrc_128F32() const {
620     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
621   }
622 
623   bool isVISrc_256V2FP32() const {
624     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
625   }
626 
627   bool isVISrc_256V2INT32() const {
628     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
629   }
630 
631   bool isVISrc_512B32() const {
632     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
633   }
634 
635   bool isVISrc_512B16() const {
636     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
637   }
638 
639   bool isVISrc_512V2B16() const {
640     return isVISrc_512B16();
641   }
642 
643   bool isVISrc_512F32() const {
644     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
645   }
646 
647   bool isVISrc_512F16() const {
648     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
649   }
650 
651   bool isVISrc_512V2F16() const {
652     return isVISrc_512F16() || isVISrc_512B32();
653   }
654 
655   bool isVISrc_1024B32() const {
656     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
657   }
658 
659   bool isVISrc_1024B16() const {
660     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
661   }
662 
663   bool isVISrc_1024V2B16() const {
664     return isVISrc_1024B16();
665   }
666 
667   bool isVISrc_1024F32() const {
668     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
669   }
670 
671   bool isVISrc_1024F16() const {
672     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
673   }
674 
675   bool isVISrc_1024V2F16() const {
676     return isVISrc_1024F16() || isVISrc_1024B32();
677   }
678 
679   bool isAISrcB32() const {
680     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
681   }
682 
683   bool isAISrcB16() const {
684     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
685   }
686 
687   bool isAISrcV2B16() const {
688     return isAISrcB16();
689   }
690 
691   bool isAISrcF32() const {
692     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
693   }
694 
695   bool isAISrcF16() const {
696     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
697   }
698 
699   bool isAISrcV2F16() const {
700     return isAISrcF16() || isAISrcB32();
701   }
702 
703   bool isAISrc_64B64() const {
704     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
705   }
706 
707   bool isAISrc_64F64() const {
708     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
709   }
710 
711   bool isAISrc_128B32() const {
712     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
713   }
714 
715   bool isAISrc_128B16() const {
716     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
717   }
718 
719   bool isAISrc_128V2B16() const {
720     return isAISrc_128B16();
721   }
722 
723   bool isAISrc_128F32() const {
724     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
725   }
726 
727   bool isAISrc_128F16() const {
728     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
729   }
730 
731   bool isAISrc_128V2F16() const {
732     return isAISrc_128F16() || isAISrc_128B32();
733   }
734 
735   bool isVISrc_128F16() const {
736     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
737   }
738 
739   bool isVISrc_128V2F16() const {
740     return isVISrc_128F16() || isVISrc_128B32();
741   }
742 
743   bool isAISrc_256B64() const {
744     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
745   }
746 
747   bool isAISrc_256F64() const {
748     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
749   }
750 
751   bool isAISrc_512B32() const {
752     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
753   }
754 
755   bool isAISrc_512B16() const {
756     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
757   }
758 
759   bool isAISrc_512V2B16() const {
760     return isAISrc_512B16();
761   }
762 
763   bool isAISrc_512F32() const {
764     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
765   }
766 
767   bool isAISrc_512F16() const {
768     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
769   }
770 
771   bool isAISrc_512V2F16() const {
772     return isAISrc_512F16() || isAISrc_512B32();
773   }
774 
775   bool isAISrc_1024B32() const {
776     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
777   }
778 
779   bool isAISrc_1024B16() const {
780     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
781   }
782 
783   bool isAISrc_1024V2B16() const {
784     return isAISrc_1024B16();
785   }
786 
787   bool isAISrc_1024F32() const {
788     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
789   }
790 
791   bool isAISrc_1024F16() const {
792     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
793   }
794 
795   bool isAISrc_1024V2F16() const {
796     return isAISrc_1024F16() || isAISrc_1024B32();
797   }
798 
799   bool isKImmFP32() const {
800     return isLiteralImm(MVT::f32);
801   }
802 
803   bool isKImmFP16() const {
804     return isLiteralImm(MVT::f16);
805   }
806 
807   bool isMem() const override {
808     return false;
809   }
810 
811   bool isExpr() const {
812     return Kind == Expression;
813   }
814 
815   bool isSoppBrTarget() const {
816     return isExpr() || isImm();
817   }
818 
819   bool isSWaitCnt() const;
820   bool isHwreg() const;
821   bool isSendMsg() const;
822   bool isSwizzle() const;
823   bool isSMRDOffset8() const;
824   bool isSMEMOffset() const;
825   bool isSMRDLiteralOffset() const;
826   bool isDPP8() const;
827   bool isDPPCtrl() const;
828   bool isBLGP() const;
829   bool isCBSZ() const;
830   bool isABID() const;
831   bool isGPRIdxMode() const;
832   bool isS16Imm() const;
833   bool isU16Imm() const;
834   bool isEndpgm() const;
835 
836   StringRef getExpressionAsToken() const {
837     assert(isExpr());
838     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
839     return S->getSymbol().getName();
840   }
841 
842   StringRef getToken() const {
843     assert(isToken());
844 
845     if (Kind == Expression)
846       return getExpressionAsToken();
847 
848     return StringRef(Tok.Data, Tok.Length);
849   }
850 
851   int64_t getImm() const {
852     assert(isImm());
853     return Imm.Val;
854   }
855 
856   void setImm(int64_t Val) {
857     assert(isImm());
858     Imm.Val = Val;
859   }
860 
861   ImmTy getImmTy() const {
862     assert(isImm());
863     return Imm.Type;
864   }
865 
866   unsigned getReg() const override {
867     assert(isRegKind());
868     return Reg.RegNo;
869   }
870 
871   SMLoc getStartLoc() const override {
872     return StartLoc;
873   }
874 
875   SMLoc getEndLoc() const override {
876     return EndLoc;
877   }
878 
879   SMRange getLocRange() const {
880     return SMRange(StartLoc, EndLoc);
881   }
882 
883   Modifiers getModifiers() const {
884     assert(isRegKind() || isImmTy(ImmTyNone));
885     return isRegKind() ? Reg.Mods : Imm.Mods;
886   }
887 
888   void setModifiers(Modifiers Mods) {
889     assert(isRegKind() || isImmTy(ImmTyNone));
890     if (isRegKind())
891       Reg.Mods = Mods;
892     else
893       Imm.Mods = Mods;
894   }
895 
896   bool hasModifiers() const {
897     return getModifiers().hasModifiers();
898   }
899 
900   bool hasFPModifiers() const {
901     return getModifiers().hasFPModifiers();
902   }
903 
904   bool hasIntModifiers() const {
905     return getModifiers().hasIntModifiers();
906   }
907 
908   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
909 
910   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
911 
912   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
913 
914   template <unsigned Bitwidth>
915   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
916 
917   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
918     addKImmFPOperands<16>(Inst, N);
919   }
920 
921   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
922     addKImmFPOperands<32>(Inst, N);
923   }
924 
925   void addRegOperands(MCInst &Inst, unsigned N) const;
926 
927   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
928     addRegOperands(Inst, N);
929   }
930 
931   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
932     if (isRegKind())
933       addRegOperands(Inst, N);
934     else if (isExpr())
935       Inst.addOperand(MCOperand::createExpr(Expr));
936     else
937       addImmOperands(Inst, N);
938   }
939 
940   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
941     Modifiers Mods = getModifiers();
942     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
943     if (isRegKind()) {
944       addRegOperands(Inst, N);
945     } else {
946       addImmOperands(Inst, N, false);
947     }
948   }
949 
950   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
951     assert(!hasIntModifiers());
952     addRegOrImmWithInputModsOperands(Inst, N);
953   }
954 
955   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
956     assert(!hasFPModifiers());
957     addRegOrImmWithInputModsOperands(Inst, N);
958   }
959 
960   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
961     Modifiers Mods = getModifiers();
962     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
963     assert(isRegKind());
964     addRegOperands(Inst, N);
965   }
966 
967   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
968     assert(!hasIntModifiers());
969     addRegWithInputModsOperands(Inst, N);
970   }
971 
972   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
973     assert(!hasFPModifiers());
974     addRegWithInputModsOperands(Inst, N);
975   }
976 
977   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
978     if (isImm())
979       addImmOperands(Inst, N);
980     else {
981       assert(isExpr());
982       Inst.addOperand(MCOperand::createExpr(Expr));
983     }
984   }
985 
986   static void printImmTy(raw_ostream& OS, ImmTy Type) {
987     switch (Type) {
988     case ImmTyNone: OS << "None"; break;
989     case ImmTyGDS: OS << "GDS"; break;
990     case ImmTyLDS: OS << "LDS"; break;
991     case ImmTyOffen: OS << "Offen"; break;
992     case ImmTyIdxen: OS << "Idxen"; break;
993     case ImmTyAddr64: OS << "Addr64"; break;
994     case ImmTyOffset: OS << "Offset"; break;
995     case ImmTyInstOffset: OS << "InstOffset"; break;
996     case ImmTyOffset0: OS << "Offset0"; break;
997     case ImmTyOffset1: OS << "Offset1"; break;
998     case ImmTyCPol: OS << "CPol"; break;
999     case ImmTySWZ: OS << "SWZ"; break;
1000     case ImmTyTFE: OS << "TFE"; break;
1001     case ImmTyD16: OS << "D16"; break;
1002     case ImmTyFORMAT: OS << "FORMAT"; break;
1003     case ImmTyClampSI: OS << "ClampSI"; break;
1004     case ImmTyOModSI: OS << "OModSI"; break;
1005     case ImmTyDPP8: OS << "DPP8"; break;
1006     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1007     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1008     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1009     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1010     case ImmTyDppFi: OS << "FI"; break;
1011     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1012     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1013     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1014     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1015     case ImmTyDMask: OS << "DMask"; break;
1016     case ImmTyDim: OS << "Dim"; break;
1017     case ImmTyUNorm: OS << "UNorm"; break;
1018     case ImmTyDA: OS << "DA"; break;
1019     case ImmTyR128A16: OS << "R128A16"; break;
1020     case ImmTyA16: OS << "A16"; break;
1021     case ImmTyLWE: OS << "LWE"; break;
1022     case ImmTyOff: OS << "Off"; break;
1023     case ImmTyExpTgt: OS << "ExpTgt"; break;
1024     case ImmTyExpCompr: OS << "ExpCompr"; break;
1025     case ImmTyExpVM: OS << "ExpVM"; break;
1026     case ImmTyHwreg: OS << "Hwreg"; break;
1027     case ImmTySendMsg: OS << "SendMsg"; break;
1028     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1029     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1030     case ImmTyAttrChan: OS << "AttrChan"; break;
1031     case ImmTyOpSel: OS << "OpSel"; break;
1032     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1033     case ImmTyNegLo: OS << "NegLo"; break;
1034     case ImmTyNegHi: OS << "NegHi"; break;
1035     case ImmTySwizzle: OS << "Swizzle"; break;
1036     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1037     case ImmTyHigh: OS << "High"; break;
1038     case ImmTyBLGP: OS << "BLGP"; break;
1039     case ImmTyCBSZ: OS << "CBSZ"; break;
1040     case ImmTyABID: OS << "ABID"; break;
1041     case ImmTyEndpgm: OS << "Endpgm"; break;
1042     }
1043   }
1044 
1045   void print(raw_ostream &OS) const override {
1046     switch (Kind) {
1047     case Register:
1048       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1049       break;
1050     case Immediate:
1051       OS << '<' << getImm();
1052       if (getImmTy() != ImmTyNone) {
1053         OS << " type: "; printImmTy(OS, getImmTy());
1054       }
1055       OS << " mods: " << Imm.Mods << '>';
1056       break;
1057     case Token:
1058       OS << '\'' << getToken() << '\'';
1059       break;
1060     case Expression:
1061       OS << "<expr " << *Expr << '>';
1062       break;
1063     }
1064   }
1065 
1066   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1067                                       int64_t Val, SMLoc Loc,
1068                                       ImmTy Type = ImmTyNone,
1069                                       bool IsFPImm = false) {
1070     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1071     Op->Imm.Val = Val;
1072     Op->Imm.IsFPImm = IsFPImm;
1073     Op->Imm.Kind = ImmKindTyNone;
1074     Op->Imm.Type = Type;
1075     Op->Imm.Mods = Modifiers();
1076     Op->StartLoc = Loc;
1077     Op->EndLoc = Loc;
1078     return Op;
1079   }
1080 
1081   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1082                                         StringRef Str, SMLoc Loc,
1083                                         bool HasExplicitEncodingSize = true) {
1084     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1085     Res->Tok.Data = Str.data();
1086     Res->Tok.Length = Str.size();
1087     Res->StartLoc = Loc;
1088     Res->EndLoc = Loc;
1089     return Res;
1090   }
1091 
1092   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1093                                       unsigned RegNo, SMLoc S,
1094                                       SMLoc E) {
1095     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1096     Op->Reg.RegNo = RegNo;
1097     Op->Reg.Mods = Modifiers();
1098     Op->StartLoc = S;
1099     Op->EndLoc = E;
1100     return Op;
1101   }
1102 
1103   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1104                                        const class MCExpr *Expr, SMLoc S) {
1105     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1106     Op->Expr = Expr;
1107     Op->StartLoc = S;
1108     Op->EndLoc = S;
1109     return Op;
1110   }
1111 };
1112 
1113 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1114   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1115   return OS;
1116 }
1117 
1118 //===----------------------------------------------------------------------===//
1119 // AsmParser
1120 //===----------------------------------------------------------------------===//
1121 
1122 // Holds info related to the current kernel, e.g. count of SGPRs used.
1123 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1124 // .amdgpu_hsa_kernel or at EOF.
1125 class KernelScopeInfo {
1126   int SgprIndexUnusedMin = -1;
1127   int VgprIndexUnusedMin = -1;
1128   int AgprIndexUnusedMin = -1;
1129   MCContext *Ctx = nullptr;
1130   MCSubtargetInfo const *MSTI = nullptr;
1131 
1132   void usesSgprAt(int i) {
1133     if (i >= SgprIndexUnusedMin) {
1134       SgprIndexUnusedMin = ++i;
1135       if (Ctx) {
1136         MCSymbol* const Sym =
1137           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1138         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1139       }
1140     }
1141   }
1142 
1143   void usesVgprAt(int i) {
1144     if (i >= VgprIndexUnusedMin) {
1145       VgprIndexUnusedMin = ++i;
1146       if (Ctx) {
1147         MCSymbol* const Sym =
1148           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1149         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1150                                          VgprIndexUnusedMin);
1151         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1152       }
1153     }
1154   }
1155 
1156   void usesAgprAt(int i) {
1157     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1158     if (!hasMAIInsts(*MSTI))
1159       return;
1160 
1161     if (i >= AgprIndexUnusedMin) {
1162       AgprIndexUnusedMin = ++i;
1163       if (Ctx) {
1164         MCSymbol* const Sym =
1165           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1166         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1167 
1168         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1169         MCSymbol* const vSym =
1170           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1171         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1172                                          VgprIndexUnusedMin);
1173         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1174       }
1175     }
1176   }
1177 
1178 public:
1179   KernelScopeInfo() = default;
1180 
1181   void initialize(MCContext &Context) {
1182     Ctx = &Context;
1183     MSTI = Ctx->getSubtargetInfo();
1184 
1185     usesSgprAt(SgprIndexUnusedMin = -1);
1186     usesVgprAt(VgprIndexUnusedMin = -1);
1187     if (hasMAIInsts(*MSTI)) {
1188       usesAgprAt(AgprIndexUnusedMin = -1);
1189     }
1190   }
1191 
1192   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1193     switch (RegKind) {
1194       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1195       case IS_AGPR: usesAgprAt(DwordRegIndex + RegWidth - 1); break;
1196       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1197       default: break;
1198     }
1199   }
1200 };
1201 
1202 class AMDGPUAsmParser : public MCTargetAsmParser {
1203   MCAsmParser &Parser;
1204 
1205   // Number of extra operands parsed after the first optional operand.
1206   // This may be necessary to skip hardcoded mandatory operands.
1207   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1208 
1209   unsigned ForcedEncodingSize = 0;
1210   bool ForcedDPP = false;
1211   bool ForcedSDWA = false;
1212   KernelScopeInfo KernelScope;
1213   unsigned CPolSeen;
1214 
1215   /// @name Auto-generated Match Functions
1216   /// {
1217 
1218 #define GET_ASSEMBLER_HEADER
1219 #include "AMDGPUGenAsmMatcher.inc"
1220 
1221   /// }
1222 
1223 private:
1224   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1225   bool OutOfRangeError(SMRange Range);
1226   /// Calculate VGPR/SGPR blocks required for given target, reserved
1227   /// registers, and user-specified NextFreeXGPR values.
1228   ///
1229   /// \param Features [in] Target features, used for bug corrections.
1230   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1231   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1232   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1233   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1234   /// descriptor field, if valid.
1235   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1236   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1237   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1238   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1239   /// \param VGPRBlocks [out] Result VGPR block count.
1240   /// \param SGPRBlocks [out] Result SGPR block count.
1241   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1242                           bool FlatScrUsed, bool XNACKUsed,
1243                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1244                           SMRange VGPRRange, unsigned NextFreeSGPR,
1245                           SMRange SGPRRange, unsigned &VGPRBlocks,
1246                           unsigned &SGPRBlocks);
1247   bool ParseDirectiveAMDGCNTarget();
1248   bool ParseDirectiveAMDHSAKernel();
1249   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1250   bool ParseDirectiveHSACodeObjectVersion();
1251   bool ParseDirectiveHSACodeObjectISA();
1252   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1253   bool ParseDirectiveAMDKernelCodeT();
1254   // TODO: Possibly make subtargetHasRegister const.
1255   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1256   bool ParseDirectiveAMDGPUHsaKernel();
1257 
1258   bool ParseDirectiveISAVersion();
1259   bool ParseDirectiveHSAMetadata();
1260   bool ParseDirectivePALMetadataBegin();
1261   bool ParseDirectivePALMetadata();
1262   bool ParseDirectiveAMDGPULDS();
1263 
1264   /// Common code to parse out a block of text (typically YAML) between start and
1265   /// end directives.
1266   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1267                            const char *AssemblerDirectiveEnd,
1268                            std::string &CollectString);
1269 
1270   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1271                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1272   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1273                            unsigned &RegNum, unsigned &RegWidth,
1274                            bool RestoreOnFailure = false);
1275   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1276                            unsigned &RegNum, unsigned &RegWidth,
1277                            SmallVectorImpl<AsmToken> &Tokens);
1278   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1279                            unsigned &RegWidth,
1280                            SmallVectorImpl<AsmToken> &Tokens);
1281   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1282                            unsigned &RegWidth,
1283                            SmallVectorImpl<AsmToken> &Tokens);
1284   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1285                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1286   bool ParseRegRange(unsigned& Num, unsigned& Width);
1287   unsigned getRegularReg(RegisterKind RegKind,
1288                          unsigned RegNum,
1289                          unsigned RegWidth,
1290                          SMLoc Loc);
1291 
1292   bool isRegister();
1293   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1294   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1295   void initializeGprCountSymbol(RegisterKind RegKind);
1296   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1297                              unsigned RegWidth);
1298   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1299                     bool IsAtomic, bool IsLds = false);
1300   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1301                  bool IsGdsHardcoded);
1302 
1303 public:
1304   enum AMDGPUMatchResultTy {
1305     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1306   };
1307   enum OperandMode {
1308     OperandMode_Default,
1309     OperandMode_NSA,
1310   };
1311 
1312   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1313 
1314   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1315                const MCInstrInfo &MII,
1316                const MCTargetOptions &Options)
1317       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1318     MCAsmParserExtension::Initialize(Parser);
1319 
1320     if (getFeatureBits().none()) {
1321       // Set default features.
1322       copySTI().ToggleFeature("southern-islands");
1323     }
1324 
1325     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1326 
1327     {
1328       // TODO: make those pre-defined variables read-only.
1329       // Currently there is none suitable machinery in the core llvm-mc for this.
1330       // MCSymbol::isRedefinable is intended for another purpose, and
1331       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1332       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1333       MCContext &Ctx = getContext();
1334       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1335         MCSymbol *Sym =
1336             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1337         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1338         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1339         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1340         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1341         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1342       } else {
1343         MCSymbol *Sym =
1344             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1345         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1346         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1347         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1348         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1349         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1350       }
1351       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1352         initializeGprCountSymbol(IS_VGPR);
1353         initializeGprCountSymbol(IS_SGPR);
1354       } else
1355         KernelScope.initialize(getContext());
1356     }
1357   }
1358 
1359   bool hasMIMG_R128() const {
1360     return AMDGPU::hasMIMG_R128(getSTI());
1361   }
1362 
1363   bool hasPackedD16() const {
1364     return AMDGPU::hasPackedD16(getSTI());
1365   }
1366 
1367   bool hasGFX10A16() const {
1368     return AMDGPU::hasGFX10A16(getSTI());
1369   }
1370 
1371   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1372 
1373   bool isSI() const {
1374     return AMDGPU::isSI(getSTI());
1375   }
1376 
1377   bool isCI() const {
1378     return AMDGPU::isCI(getSTI());
1379   }
1380 
1381   bool isVI() const {
1382     return AMDGPU::isVI(getSTI());
1383   }
1384 
1385   bool isGFX9() const {
1386     return AMDGPU::isGFX9(getSTI());
1387   }
1388 
1389   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1390   bool isGFX90A() const {
1391     return AMDGPU::isGFX90A(getSTI());
1392   }
1393 
1394   bool isGFX940() const {
1395     return AMDGPU::isGFX940(getSTI());
1396   }
1397 
1398   bool isGFX9Plus() const {
1399     return AMDGPU::isGFX9Plus(getSTI());
1400   }
1401 
1402   bool isGFX10() const {
1403     return AMDGPU::isGFX10(getSTI());
1404   }
1405 
1406   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1407 
1408   bool isGFX10_BEncoding() const {
1409     return AMDGPU::isGFX10_BEncoding(getSTI());
1410   }
1411 
1412   bool hasInv2PiInlineImm() const {
1413     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1414   }
1415 
1416   bool hasFlatOffsets() const {
1417     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1418   }
1419 
1420   bool hasArchitectedFlatScratch() const {
1421     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1422   }
1423 
1424   bool hasSGPR102_SGPR103() const {
1425     return !isVI() && !isGFX9();
1426   }
1427 
1428   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1429 
1430   bool hasIntClamp() const {
1431     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1432   }
1433 
1434   AMDGPUTargetStreamer &getTargetStreamer() {
1435     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1436     return static_cast<AMDGPUTargetStreamer &>(TS);
1437   }
1438 
1439   const MCRegisterInfo *getMRI() const {
1440     // We need this const_cast because for some reason getContext() is not const
1441     // in MCAsmParser.
1442     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1443   }
1444 
1445   const MCInstrInfo *getMII() const {
1446     return &MII;
1447   }
1448 
1449   const FeatureBitset &getFeatureBits() const {
1450     return getSTI().getFeatureBits();
1451   }
1452 
1453   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1454   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1455   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1456 
1457   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1458   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1459   bool isForcedDPP() const { return ForcedDPP; }
1460   bool isForcedSDWA() const { return ForcedSDWA; }
1461   ArrayRef<unsigned> getMatchedVariants() const;
1462   StringRef getMatchedVariantName() const;
1463 
1464   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1465   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1466                      bool RestoreOnFailure);
1467   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1468   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1469                                         SMLoc &EndLoc) override;
1470   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1471   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1472                                       unsigned Kind) override;
1473   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1474                                OperandVector &Operands, MCStreamer &Out,
1475                                uint64_t &ErrorInfo,
1476                                bool MatchingInlineAsm) override;
1477   bool ParseDirective(AsmToken DirectiveID) override;
1478   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1479                                     OperandMode Mode = OperandMode_Default);
1480   StringRef parseMnemonicSuffix(StringRef Name);
1481   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1482                         SMLoc NameLoc, OperandVector &Operands) override;
1483   //bool ProcessInstruction(MCInst &Inst);
1484 
1485   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1486 
1487   OperandMatchResultTy
1488   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1489                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1490                      bool (*ConvertResult)(int64_t &) = nullptr);
1491 
1492   OperandMatchResultTy
1493   parseOperandArrayWithPrefix(const char *Prefix,
1494                               OperandVector &Operands,
1495                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1496                               bool (*ConvertResult)(int64_t&) = nullptr);
1497 
1498   OperandMatchResultTy
1499   parseNamedBit(StringRef Name, OperandVector &Operands,
1500                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1501   OperandMatchResultTy parseCPol(OperandVector &Operands);
1502   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1503                                              StringRef &Value,
1504                                              SMLoc &StringLoc);
1505 
1506   bool isModifier();
1507   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1508   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1509   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1510   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1511   bool parseSP3NegModifier();
1512   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1513   OperandMatchResultTy parseReg(OperandVector &Operands);
1514   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1515   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1516   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1517   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1518   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1519   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1520   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1521   OperandMatchResultTy parseUfmt(int64_t &Format);
1522   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1523   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1524   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1525   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1526   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1527   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1528   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1529 
1530   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1531   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1532   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1533   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1534 
1535   bool parseCnt(int64_t &IntVal);
1536   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1537   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1538 
1539 private:
1540   struct OperandInfoTy {
1541     SMLoc Loc;
1542     int64_t Id;
1543     bool IsSymbolic = false;
1544     bool IsDefined = false;
1545 
1546     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1547   };
1548 
1549   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1550   bool validateSendMsg(const OperandInfoTy &Msg,
1551                        const OperandInfoTy &Op,
1552                        const OperandInfoTy &Stream);
1553 
1554   bool parseHwregBody(OperandInfoTy &HwReg,
1555                       OperandInfoTy &Offset,
1556                       OperandInfoTy &Width);
1557   bool validateHwreg(const OperandInfoTy &HwReg,
1558                      const OperandInfoTy &Offset,
1559                      const OperandInfoTy &Width);
1560 
1561   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1562   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1563 
1564   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1565                       const OperandVector &Operands) const;
1566   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1567   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1568   SMLoc getLitLoc(const OperandVector &Operands) const;
1569   SMLoc getConstLoc(const OperandVector &Operands) const;
1570 
1571   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1572   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1573   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1574   bool validateSOPLiteral(const MCInst &Inst) const;
1575   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1576   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1577   bool validateIntClampSupported(const MCInst &Inst);
1578   bool validateMIMGAtomicDMask(const MCInst &Inst);
1579   bool validateMIMGGatherDMask(const MCInst &Inst);
1580   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1581   bool validateMIMGDataSize(const MCInst &Inst);
1582   bool validateMIMGAddrSize(const MCInst &Inst);
1583   bool validateMIMGD16(const MCInst &Inst);
1584   bool validateMIMGDim(const MCInst &Inst);
1585   bool validateMIMGMSAA(const MCInst &Inst);
1586   bool validateOpSel(const MCInst &Inst);
1587   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1588   bool validateVccOperand(unsigned Reg) const;
1589   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1590   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1591   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1592   bool validateAGPRLdSt(const MCInst &Inst) const;
1593   bool validateVGPRAlign(const MCInst &Inst) const;
1594   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1595   bool validateDivScale(const MCInst &Inst);
1596   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1597                              const SMLoc &IDLoc);
1598   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1599   unsigned getConstantBusLimit(unsigned Opcode) const;
1600   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1601   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1602   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1603 
1604   bool isSupportedMnemo(StringRef Mnemo,
1605                         const FeatureBitset &FBS);
1606   bool isSupportedMnemo(StringRef Mnemo,
1607                         const FeatureBitset &FBS,
1608                         ArrayRef<unsigned> Variants);
1609   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1610 
1611   bool isId(const StringRef Id) const;
1612   bool isId(const AsmToken &Token, const StringRef Id) const;
1613   bool isToken(const AsmToken::TokenKind Kind) const;
1614   bool trySkipId(const StringRef Id);
1615   bool trySkipId(const StringRef Pref, const StringRef Id);
1616   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1617   bool trySkipToken(const AsmToken::TokenKind Kind);
1618   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1619   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1620   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1621 
1622   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1623   AsmToken::TokenKind getTokenKind() const;
1624   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1625   bool parseExpr(OperandVector &Operands);
1626   StringRef getTokenStr() const;
1627   AsmToken peekToken();
1628   AsmToken getToken() const;
1629   SMLoc getLoc() const;
1630   void lex();
1631 
1632 public:
1633   void onBeginOfFile() override;
1634 
1635   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1636   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1637 
1638   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1639   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1640   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1641   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1642   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1643   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1644 
1645   bool parseSwizzleOperand(int64_t &Op,
1646                            const unsigned MinVal,
1647                            const unsigned MaxVal,
1648                            const StringRef ErrMsg,
1649                            SMLoc &Loc);
1650   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1651                             const unsigned MinVal,
1652                             const unsigned MaxVal,
1653                             const StringRef ErrMsg);
1654   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1655   bool parseSwizzleOffset(int64_t &Imm);
1656   bool parseSwizzleMacro(int64_t &Imm);
1657   bool parseSwizzleQuadPerm(int64_t &Imm);
1658   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1659   bool parseSwizzleBroadcast(int64_t &Imm);
1660   bool parseSwizzleSwap(int64_t &Imm);
1661   bool parseSwizzleReverse(int64_t &Imm);
1662 
1663   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1664   int64_t parseGPRIdxMacro();
1665 
1666   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1667   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1668   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1669   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1670 
1671   AMDGPUOperand::Ptr defaultCPol() const;
1672 
1673   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1674   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1675   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1676   AMDGPUOperand::Ptr defaultFlatOffset() const;
1677 
1678   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1679 
1680   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1681                OptionalImmIndexMap &OptionalIdx);
1682   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1683   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1684   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1685   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1686                 OptionalImmIndexMap &OptionalIdx);
1687 
1688   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1689 
1690   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1691                bool IsAtomic = false);
1692   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1693   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1694 
1695   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1696 
1697   bool parseDimId(unsigned &Encoding);
1698   OperandMatchResultTy parseDim(OperandVector &Operands);
1699   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1700   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1701   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1702   int64_t parseDPPCtrlSel(StringRef Ctrl);
1703   int64_t parseDPPCtrlPerm();
1704   AMDGPUOperand::Ptr defaultRowMask() const;
1705   AMDGPUOperand::Ptr defaultBankMask() const;
1706   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1707   AMDGPUOperand::Ptr defaultFI() const;
1708   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1709   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1710 
1711   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1712                                     AMDGPUOperand::ImmTy Type);
1713   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1714   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1715   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1716   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1717   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1718   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1719   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1720                uint64_t BasicInstType,
1721                bool SkipDstVcc = false,
1722                bool SkipSrcVcc = false);
1723 
1724   AMDGPUOperand::Ptr defaultBLGP() const;
1725   AMDGPUOperand::Ptr defaultCBSZ() const;
1726   AMDGPUOperand::Ptr defaultABID() const;
1727 
1728   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1729   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1730 };
1731 
1732 struct OptionalOperand {
1733   const char *Name;
1734   AMDGPUOperand::ImmTy Type;
1735   bool IsBit;
1736   bool (*ConvertResult)(int64_t&);
1737 };
1738 
1739 } // end anonymous namespace
1740 
1741 // May be called with integer type with equivalent bitwidth.
1742 static const fltSemantics *getFltSemantics(unsigned Size) {
1743   switch (Size) {
1744   case 4:
1745     return &APFloat::IEEEsingle();
1746   case 8:
1747     return &APFloat::IEEEdouble();
1748   case 2:
1749     return &APFloat::IEEEhalf();
1750   default:
1751     llvm_unreachable("unsupported fp type");
1752   }
1753 }
1754 
1755 static const fltSemantics *getFltSemantics(MVT VT) {
1756   return getFltSemantics(VT.getSizeInBits() / 8);
1757 }
1758 
1759 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1760   switch (OperandType) {
1761   case AMDGPU::OPERAND_REG_IMM_INT32:
1762   case AMDGPU::OPERAND_REG_IMM_FP32:
1763   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1764   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1765   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1766   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1767   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1768   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1769   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1770   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1771   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1772   case AMDGPU::OPERAND_KIMM32:
1773     return &APFloat::IEEEsingle();
1774   case AMDGPU::OPERAND_REG_IMM_INT64:
1775   case AMDGPU::OPERAND_REG_IMM_FP64:
1776   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1777   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1778   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1779     return &APFloat::IEEEdouble();
1780   case AMDGPU::OPERAND_REG_IMM_INT16:
1781   case AMDGPU::OPERAND_REG_IMM_FP16:
1782   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1783   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1784   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1785   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1786   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1787   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1788   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1789   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1790   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1791   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1792   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1793   case AMDGPU::OPERAND_KIMM16:
1794     return &APFloat::IEEEhalf();
1795   default:
1796     llvm_unreachable("unsupported fp type");
1797   }
1798 }
1799 
1800 //===----------------------------------------------------------------------===//
1801 // Operand
1802 //===----------------------------------------------------------------------===//
1803 
1804 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1805   bool Lost;
1806 
1807   // Convert literal to single precision
1808   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1809                                                APFloat::rmNearestTiesToEven,
1810                                                &Lost);
1811   // We allow precision lost but not overflow or underflow
1812   if (Status != APFloat::opOK &&
1813       Lost &&
1814       ((Status & APFloat::opOverflow)  != 0 ||
1815        (Status & APFloat::opUnderflow) != 0)) {
1816     return false;
1817   }
1818 
1819   return true;
1820 }
1821 
1822 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1823   return isUIntN(Size, Val) || isIntN(Size, Val);
1824 }
1825 
1826 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1827   if (VT.getScalarType() == MVT::i16) {
1828     // FP immediate values are broken.
1829     return isInlinableIntLiteral(Val);
1830   }
1831 
1832   // f16/v2f16 operands work correctly for all values.
1833   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1834 }
1835 
1836 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1837 
1838   // This is a hack to enable named inline values like
1839   // shared_base with both 32-bit and 64-bit operands.
1840   // Note that these values are defined as
1841   // 32-bit operands only.
1842   if (isInlineValue()) {
1843     return true;
1844   }
1845 
1846   if (!isImmTy(ImmTyNone)) {
1847     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1848     return false;
1849   }
1850   // TODO: We should avoid using host float here. It would be better to
1851   // check the float bit values which is what a few other places do.
1852   // We've had bot failures before due to weird NaN support on mips hosts.
1853 
1854   APInt Literal(64, Imm.Val);
1855 
1856   if (Imm.IsFPImm) { // We got fp literal token
1857     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1858       return AMDGPU::isInlinableLiteral64(Imm.Val,
1859                                           AsmParser->hasInv2PiInlineImm());
1860     }
1861 
1862     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1863     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1864       return false;
1865 
1866     if (type.getScalarSizeInBits() == 16) {
1867       return isInlineableLiteralOp16(
1868         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1869         type, AsmParser->hasInv2PiInlineImm());
1870     }
1871 
1872     // Check if single precision literal is inlinable
1873     return AMDGPU::isInlinableLiteral32(
1874       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1875       AsmParser->hasInv2PiInlineImm());
1876   }
1877 
1878   // We got int literal token.
1879   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1880     return AMDGPU::isInlinableLiteral64(Imm.Val,
1881                                         AsmParser->hasInv2PiInlineImm());
1882   }
1883 
1884   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1885     return false;
1886   }
1887 
1888   if (type.getScalarSizeInBits() == 16) {
1889     return isInlineableLiteralOp16(
1890       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1891       type, AsmParser->hasInv2PiInlineImm());
1892   }
1893 
1894   return AMDGPU::isInlinableLiteral32(
1895     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1896     AsmParser->hasInv2PiInlineImm());
1897 }
1898 
1899 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1900   // Check that this immediate can be added as literal
1901   if (!isImmTy(ImmTyNone)) {
1902     return false;
1903   }
1904 
1905   if (!Imm.IsFPImm) {
1906     // We got int literal token.
1907 
1908     if (type == MVT::f64 && hasFPModifiers()) {
1909       // Cannot apply fp modifiers to int literals preserving the same semantics
1910       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1911       // disable these cases.
1912       return false;
1913     }
1914 
1915     unsigned Size = type.getSizeInBits();
1916     if (Size == 64)
1917       Size = 32;
1918 
1919     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1920     // types.
1921     return isSafeTruncation(Imm.Val, Size);
1922   }
1923 
1924   // We got fp literal token
1925   if (type == MVT::f64) { // Expected 64-bit fp operand
1926     // We would set low 64-bits of literal to zeroes but we accept this literals
1927     return true;
1928   }
1929 
1930   if (type == MVT::i64) { // Expected 64-bit int operand
1931     // We don't allow fp literals in 64-bit integer instructions. It is
1932     // unclear how we should encode them.
1933     return false;
1934   }
1935 
1936   // We allow fp literals with f16x2 operands assuming that the specified
1937   // literal goes into the lower half and the upper half is zero. We also
1938   // require that the literal may be losslessly converted to f16.
1939   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1940                      (type == MVT::v2i16)? MVT::i16 :
1941                      (type == MVT::v2f32)? MVT::f32 : type;
1942 
1943   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1944   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1945 }
1946 
1947 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1948   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1949 }
1950 
1951 bool AMDGPUOperand::isVRegWithInputMods() const {
1952   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1953          // GFX90A allows DPP on 64-bit operands.
1954          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1955           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1956 }
1957 
1958 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1959   if (AsmParser->isVI())
1960     return isVReg32();
1961   else if (AsmParser->isGFX9Plus())
1962     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1963   else
1964     return false;
1965 }
1966 
1967 bool AMDGPUOperand::isSDWAFP16Operand() const {
1968   return isSDWAOperand(MVT::f16);
1969 }
1970 
1971 bool AMDGPUOperand::isSDWAFP32Operand() const {
1972   return isSDWAOperand(MVT::f32);
1973 }
1974 
1975 bool AMDGPUOperand::isSDWAInt16Operand() const {
1976   return isSDWAOperand(MVT::i16);
1977 }
1978 
1979 bool AMDGPUOperand::isSDWAInt32Operand() const {
1980   return isSDWAOperand(MVT::i32);
1981 }
1982 
1983 bool AMDGPUOperand::isBoolReg() const {
1984   auto FB = AsmParser->getFeatureBits();
1985   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1986                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1987 }
1988 
1989 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1990 {
1991   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1992   assert(Size == 2 || Size == 4 || Size == 8);
1993 
1994   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1995 
1996   if (Imm.Mods.Abs) {
1997     Val &= ~FpSignMask;
1998   }
1999   if (Imm.Mods.Neg) {
2000     Val ^= FpSignMask;
2001   }
2002 
2003   return Val;
2004 }
2005 
2006 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2007   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2008                              Inst.getNumOperands())) {
2009     addLiteralImmOperand(Inst, Imm.Val,
2010                          ApplyModifiers &
2011                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2012   } else {
2013     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2014     Inst.addOperand(MCOperand::createImm(Imm.Val));
2015     setImmKindNone();
2016   }
2017 }
2018 
2019 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2020   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2021   auto OpNum = Inst.getNumOperands();
2022   // Check that this operand accepts literals
2023   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2024 
2025   if (ApplyModifiers) {
2026     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2027     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2028     Val = applyInputFPModifiers(Val, Size);
2029   }
2030 
2031   APInt Literal(64, Val);
2032   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2033 
2034   if (Imm.IsFPImm) { // We got fp literal token
2035     switch (OpTy) {
2036     case AMDGPU::OPERAND_REG_IMM_INT64:
2037     case AMDGPU::OPERAND_REG_IMM_FP64:
2038     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2039     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2040     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2041       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2042                                        AsmParser->hasInv2PiInlineImm())) {
2043         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2044         setImmKindConst();
2045         return;
2046       }
2047 
2048       // Non-inlineable
2049       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2050         // For fp operands we check if low 32 bits are zeros
2051         if (Literal.getLoBits(32) != 0) {
2052           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2053           "Can't encode literal as exact 64-bit floating-point operand. "
2054           "Low 32-bits will be set to zero");
2055         }
2056 
2057         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2058         setImmKindLiteral();
2059         return;
2060       }
2061 
2062       // We don't allow fp literals in 64-bit integer instructions. It is
2063       // unclear how we should encode them. This case should be checked earlier
2064       // in predicate methods (isLiteralImm())
2065       llvm_unreachable("fp literal in 64-bit integer instruction.");
2066 
2067     case AMDGPU::OPERAND_REG_IMM_INT32:
2068     case AMDGPU::OPERAND_REG_IMM_FP32:
2069     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2070     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2071     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2072     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2073     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2074     case AMDGPU::OPERAND_REG_IMM_INT16:
2075     case AMDGPU::OPERAND_REG_IMM_FP16:
2076     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2077     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2078     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2079     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2080     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2081     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2082     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2083     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2084     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2085     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2086     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2087     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2088     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2089     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2090     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2091     case AMDGPU::OPERAND_KIMM32:
2092     case AMDGPU::OPERAND_KIMM16: {
2093       bool lost;
2094       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2095       // Convert literal to single precision
2096       FPLiteral.convert(*getOpFltSemantics(OpTy),
2097                         APFloat::rmNearestTiesToEven, &lost);
2098       // We allow precision lost but not overflow or underflow. This should be
2099       // checked earlier in isLiteralImm()
2100 
2101       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2102       Inst.addOperand(MCOperand::createImm(ImmVal));
2103       setImmKindLiteral();
2104       return;
2105     }
2106     default:
2107       llvm_unreachable("invalid operand size");
2108     }
2109 
2110     return;
2111   }
2112 
2113   // We got int literal token.
2114   // Only sign extend inline immediates.
2115   switch (OpTy) {
2116   case AMDGPU::OPERAND_REG_IMM_INT32:
2117   case AMDGPU::OPERAND_REG_IMM_FP32:
2118   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2119   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2120   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2121   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2122   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2123   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2124   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2125   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2126   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2127   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2128   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2129     if (isSafeTruncation(Val, 32) &&
2130         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2131                                      AsmParser->hasInv2PiInlineImm())) {
2132       Inst.addOperand(MCOperand::createImm(Val));
2133       setImmKindConst();
2134       return;
2135     }
2136 
2137     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2138     setImmKindLiteral();
2139     return;
2140 
2141   case AMDGPU::OPERAND_REG_IMM_INT64:
2142   case AMDGPU::OPERAND_REG_IMM_FP64:
2143   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2144   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2145   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2146     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2147       Inst.addOperand(MCOperand::createImm(Val));
2148       setImmKindConst();
2149       return;
2150     }
2151 
2152     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2153     setImmKindLiteral();
2154     return;
2155 
2156   case AMDGPU::OPERAND_REG_IMM_INT16:
2157   case AMDGPU::OPERAND_REG_IMM_FP16:
2158   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2159   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2160   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2161   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2162   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2163     if (isSafeTruncation(Val, 16) &&
2164         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2165                                      AsmParser->hasInv2PiInlineImm())) {
2166       Inst.addOperand(MCOperand::createImm(Val));
2167       setImmKindConst();
2168       return;
2169     }
2170 
2171     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2172     setImmKindLiteral();
2173     return;
2174 
2175   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2176   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2177   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2178   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2179     assert(isSafeTruncation(Val, 16));
2180     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2181                                         AsmParser->hasInv2PiInlineImm()));
2182 
2183     Inst.addOperand(MCOperand::createImm(Val));
2184     return;
2185   }
2186   case AMDGPU::OPERAND_KIMM32:
2187     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2188     setImmKindNone();
2189     return;
2190   case AMDGPU::OPERAND_KIMM16:
2191     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2192     setImmKindNone();
2193     return;
2194   default:
2195     llvm_unreachable("invalid operand size");
2196   }
2197 }
2198 
2199 template <unsigned Bitwidth>
2200 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2201   APInt Literal(64, Imm.Val);
2202   setImmKindNone();
2203 
2204   if (!Imm.IsFPImm) {
2205     // We got int literal token.
2206     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2207     return;
2208   }
2209 
2210   bool Lost;
2211   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2212   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2213                     APFloat::rmNearestTiesToEven, &Lost);
2214   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2215 }
2216 
2217 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2218   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2219 }
2220 
2221 static bool isInlineValue(unsigned Reg) {
2222   switch (Reg) {
2223   case AMDGPU::SRC_SHARED_BASE:
2224   case AMDGPU::SRC_SHARED_LIMIT:
2225   case AMDGPU::SRC_PRIVATE_BASE:
2226   case AMDGPU::SRC_PRIVATE_LIMIT:
2227   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2228     return true;
2229   case AMDGPU::SRC_VCCZ:
2230   case AMDGPU::SRC_EXECZ:
2231   case AMDGPU::SRC_SCC:
2232     return true;
2233   case AMDGPU::SGPR_NULL:
2234     return true;
2235   default:
2236     return false;
2237   }
2238 }
2239 
2240 bool AMDGPUOperand::isInlineValue() const {
2241   return isRegKind() && ::isInlineValue(getReg());
2242 }
2243 
2244 //===----------------------------------------------------------------------===//
2245 // AsmParser
2246 //===----------------------------------------------------------------------===//
2247 
2248 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2249   if (Is == IS_VGPR) {
2250     switch (RegWidth) {
2251       default: return -1;
2252       case 1: return AMDGPU::VGPR_32RegClassID;
2253       case 2: return AMDGPU::VReg_64RegClassID;
2254       case 3: return AMDGPU::VReg_96RegClassID;
2255       case 4: return AMDGPU::VReg_128RegClassID;
2256       case 5: return AMDGPU::VReg_160RegClassID;
2257       case 6: return AMDGPU::VReg_192RegClassID;
2258       case 7: return AMDGPU::VReg_224RegClassID;
2259       case 8: return AMDGPU::VReg_256RegClassID;
2260       case 16: return AMDGPU::VReg_512RegClassID;
2261       case 32: return AMDGPU::VReg_1024RegClassID;
2262     }
2263   } else if (Is == IS_TTMP) {
2264     switch (RegWidth) {
2265       default: return -1;
2266       case 1: return AMDGPU::TTMP_32RegClassID;
2267       case 2: return AMDGPU::TTMP_64RegClassID;
2268       case 4: return AMDGPU::TTMP_128RegClassID;
2269       case 8: return AMDGPU::TTMP_256RegClassID;
2270       case 16: return AMDGPU::TTMP_512RegClassID;
2271     }
2272   } else if (Is == IS_SGPR) {
2273     switch (RegWidth) {
2274       default: return -1;
2275       case 1: return AMDGPU::SGPR_32RegClassID;
2276       case 2: return AMDGPU::SGPR_64RegClassID;
2277       case 3: return AMDGPU::SGPR_96RegClassID;
2278       case 4: return AMDGPU::SGPR_128RegClassID;
2279       case 5: return AMDGPU::SGPR_160RegClassID;
2280       case 6: return AMDGPU::SGPR_192RegClassID;
2281       case 7: return AMDGPU::SGPR_224RegClassID;
2282       case 8: return AMDGPU::SGPR_256RegClassID;
2283       case 16: return AMDGPU::SGPR_512RegClassID;
2284     }
2285   } else if (Is == IS_AGPR) {
2286     switch (RegWidth) {
2287       default: return -1;
2288       case 1: return AMDGPU::AGPR_32RegClassID;
2289       case 2: return AMDGPU::AReg_64RegClassID;
2290       case 3: return AMDGPU::AReg_96RegClassID;
2291       case 4: return AMDGPU::AReg_128RegClassID;
2292       case 5: return AMDGPU::AReg_160RegClassID;
2293       case 6: return AMDGPU::AReg_192RegClassID;
2294       case 7: return AMDGPU::AReg_224RegClassID;
2295       case 8: return AMDGPU::AReg_256RegClassID;
2296       case 16: return AMDGPU::AReg_512RegClassID;
2297       case 32: return AMDGPU::AReg_1024RegClassID;
2298     }
2299   }
2300   return -1;
2301 }
2302 
2303 static unsigned getSpecialRegForName(StringRef RegName) {
2304   return StringSwitch<unsigned>(RegName)
2305     .Case("exec", AMDGPU::EXEC)
2306     .Case("vcc", AMDGPU::VCC)
2307     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2308     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2309     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2310     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2311     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2312     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2313     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2314     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2315     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2316     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2317     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2318     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2319     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2320     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2321     .Case("m0", AMDGPU::M0)
2322     .Case("vccz", AMDGPU::SRC_VCCZ)
2323     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2324     .Case("execz", AMDGPU::SRC_EXECZ)
2325     .Case("src_execz", AMDGPU::SRC_EXECZ)
2326     .Case("scc", AMDGPU::SRC_SCC)
2327     .Case("src_scc", AMDGPU::SRC_SCC)
2328     .Case("tba", AMDGPU::TBA)
2329     .Case("tma", AMDGPU::TMA)
2330     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2331     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2332     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2333     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2334     .Case("vcc_lo", AMDGPU::VCC_LO)
2335     .Case("vcc_hi", AMDGPU::VCC_HI)
2336     .Case("exec_lo", AMDGPU::EXEC_LO)
2337     .Case("exec_hi", AMDGPU::EXEC_HI)
2338     .Case("tma_lo", AMDGPU::TMA_LO)
2339     .Case("tma_hi", AMDGPU::TMA_HI)
2340     .Case("tba_lo", AMDGPU::TBA_LO)
2341     .Case("tba_hi", AMDGPU::TBA_HI)
2342     .Case("pc", AMDGPU::PC_REG)
2343     .Case("null", AMDGPU::SGPR_NULL)
2344     .Default(AMDGPU::NoRegister);
2345 }
2346 
2347 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2348                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2349   auto R = parseRegister();
2350   if (!R) return true;
2351   assert(R->isReg());
2352   RegNo = R->getReg();
2353   StartLoc = R->getStartLoc();
2354   EndLoc = R->getEndLoc();
2355   return false;
2356 }
2357 
2358 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2359                                     SMLoc &EndLoc) {
2360   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2361 }
2362 
2363 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2364                                                        SMLoc &StartLoc,
2365                                                        SMLoc &EndLoc) {
2366   bool Result =
2367       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2368   bool PendingErrors = getParser().hasPendingError();
2369   getParser().clearPendingErrors();
2370   if (PendingErrors)
2371     return MatchOperand_ParseFail;
2372   if (Result)
2373     return MatchOperand_NoMatch;
2374   return MatchOperand_Success;
2375 }
2376 
2377 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2378                                             RegisterKind RegKind, unsigned Reg1,
2379                                             SMLoc Loc) {
2380   switch (RegKind) {
2381   case IS_SPECIAL:
2382     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2383       Reg = AMDGPU::EXEC;
2384       RegWidth = 2;
2385       return true;
2386     }
2387     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2388       Reg = AMDGPU::FLAT_SCR;
2389       RegWidth = 2;
2390       return true;
2391     }
2392     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2393       Reg = AMDGPU::XNACK_MASK;
2394       RegWidth = 2;
2395       return true;
2396     }
2397     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2398       Reg = AMDGPU::VCC;
2399       RegWidth = 2;
2400       return true;
2401     }
2402     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2403       Reg = AMDGPU::TBA;
2404       RegWidth = 2;
2405       return true;
2406     }
2407     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2408       Reg = AMDGPU::TMA;
2409       RegWidth = 2;
2410       return true;
2411     }
2412     Error(Loc, "register does not fit in the list");
2413     return false;
2414   case IS_VGPR:
2415   case IS_SGPR:
2416   case IS_AGPR:
2417   case IS_TTMP:
2418     if (Reg1 != Reg + RegWidth) {
2419       Error(Loc, "registers in a list must have consecutive indices");
2420       return false;
2421     }
2422     RegWidth++;
2423     return true;
2424   default:
2425     llvm_unreachable("unexpected register kind");
2426   }
2427 }
2428 
2429 struct RegInfo {
2430   StringLiteral Name;
2431   RegisterKind Kind;
2432 };
2433 
2434 static constexpr RegInfo RegularRegisters[] = {
2435   {{"v"},    IS_VGPR},
2436   {{"s"},    IS_SGPR},
2437   {{"ttmp"}, IS_TTMP},
2438   {{"acc"},  IS_AGPR},
2439   {{"a"},    IS_AGPR},
2440 };
2441 
2442 static bool isRegularReg(RegisterKind Kind) {
2443   return Kind == IS_VGPR ||
2444          Kind == IS_SGPR ||
2445          Kind == IS_TTMP ||
2446          Kind == IS_AGPR;
2447 }
2448 
2449 static const RegInfo* getRegularRegInfo(StringRef Str) {
2450   for (const RegInfo &Reg : RegularRegisters)
2451     if (Str.startswith(Reg.Name))
2452       return &Reg;
2453   return nullptr;
2454 }
2455 
2456 static bool getRegNum(StringRef Str, unsigned& Num) {
2457   return !Str.getAsInteger(10, Num);
2458 }
2459 
2460 bool
2461 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2462                             const AsmToken &NextToken) const {
2463 
2464   // A list of consecutive registers: [s0,s1,s2,s3]
2465   if (Token.is(AsmToken::LBrac))
2466     return true;
2467 
2468   if (!Token.is(AsmToken::Identifier))
2469     return false;
2470 
2471   // A single register like s0 or a range of registers like s[0:1]
2472 
2473   StringRef Str = Token.getString();
2474   const RegInfo *Reg = getRegularRegInfo(Str);
2475   if (Reg) {
2476     StringRef RegName = Reg->Name;
2477     StringRef RegSuffix = Str.substr(RegName.size());
2478     if (!RegSuffix.empty()) {
2479       unsigned Num;
2480       // A single register with an index: rXX
2481       if (getRegNum(RegSuffix, Num))
2482         return true;
2483     } else {
2484       // A range of registers: r[XX:YY].
2485       if (NextToken.is(AsmToken::LBrac))
2486         return true;
2487     }
2488   }
2489 
2490   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2491 }
2492 
2493 bool
2494 AMDGPUAsmParser::isRegister()
2495 {
2496   return isRegister(getToken(), peekToken());
2497 }
2498 
2499 unsigned
2500 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2501                                unsigned RegNum,
2502                                unsigned RegWidth,
2503                                SMLoc Loc) {
2504 
2505   assert(isRegularReg(RegKind));
2506 
2507   unsigned AlignSize = 1;
2508   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2509     // SGPR and TTMP registers must be aligned.
2510     // Max required alignment is 4 dwords.
2511     AlignSize = std::min(RegWidth, 4u);
2512   }
2513 
2514   if (RegNum % AlignSize != 0) {
2515     Error(Loc, "invalid register alignment");
2516     return AMDGPU::NoRegister;
2517   }
2518 
2519   unsigned RegIdx = RegNum / AlignSize;
2520   int RCID = getRegClass(RegKind, RegWidth);
2521   if (RCID == -1) {
2522     Error(Loc, "invalid or unsupported register size");
2523     return AMDGPU::NoRegister;
2524   }
2525 
2526   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2527   const MCRegisterClass RC = TRI->getRegClass(RCID);
2528   if (RegIdx >= RC.getNumRegs()) {
2529     Error(Loc, "register index is out of range");
2530     return AMDGPU::NoRegister;
2531   }
2532 
2533   return RC.getRegister(RegIdx);
2534 }
2535 
2536 bool
2537 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2538   int64_t RegLo, RegHi;
2539   if (!skipToken(AsmToken::LBrac, "missing register index"))
2540     return false;
2541 
2542   SMLoc FirstIdxLoc = getLoc();
2543   SMLoc SecondIdxLoc;
2544 
2545   if (!parseExpr(RegLo))
2546     return false;
2547 
2548   if (trySkipToken(AsmToken::Colon)) {
2549     SecondIdxLoc = getLoc();
2550     if (!parseExpr(RegHi))
2551       return false;
2552   } else {
2553     RegHi = RegLo;
2554   }
2555 
2556   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2557     return false;
2558 
2559   if (!isUInt<32>(RegLo)) {
2560     Error(FirstIdxLoc, "invalid register index");
2561     return false;
2562   }
2563 
2564   if (!isUInt<32>(RegHi)) {
2565     Error(SecondIdxLoc, "invalid register index");
2566     return false;
2567   }
2568 
2569   if (RegLo > RegHi) {
2570     Error(FirstIdxLoc, "first register index should not exceed second index");
2571     return false;
2572   }
2573 
2574   Num = static_cast<unsigned>(RegLo);
2575   Width = (RegHi - RegLo) + 1;
2576   return true;
2577 }
2578 
2579 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2580                                           unsigned &RegNum, unsigned &RegWidth,
2581                                           SmallVectorImpl<AsmToken> &Tokens) {
2582   assert(isToken(AsmToken::Identifier));
2583   unsigned Reg = getSpecialRegForName(getTokenStr());
2584   if (Reg) {
2585     RegNum = 0;
2586     RegWidth = 1;
2587     RegKind = IS_SPECIAL;
2588     Tokens.push_back(getToken());
2589     lex(); // skip register name
2590   }
2591   return Reg;
2592 }
2593 
2594 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2595                                           unsigned &RegNum, unsigned &RegWidth,
2596                                           SmallVectorImpl<AsmToken> &Tokens) {
2597   assert(isToken(AsmToken::Identifier));
2598   StringRef RegName = getTokenStr();
2599   auto Loc = getLoc();
2600 
2601   const RegInfo *RI = getRegularRegInfo(RegName);
2602   if (!RI) {
2603     Error(Loc, "invalid register name");
2604     return AMDGPU::NoRegister;
2605   }
2606 
2607   Tokens.push_back(getToken());
2608   lex(); // skip register name
2609 
2610   RegKind = RI->Kind;
2611   StringRef RegSuffix = RegName.substr(RI->Name.size());
2612   if (!RegSuffix.empty()) {
2613     // Single 32-bit register: vXX.
2614     if (!getRegNum(RegSuffix, RegNum)) {
2615       Error(Loc, "invalid register index");
2616       return AMDGPU::NoRegister;
2617     }
2618     RegWidth = 1;
2619   } else {
2620     // Range of registers: v[XX:YY]. ":YY" is optional.
2621     if (!ParseRegRange(RegNum, RegWidth))
2622       return AMDGPU::NoRegister;
2623   }
2624 
2625   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2626 }
2627 
2628 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2629                                        unsigned &RegWidth,
2630                                        SmallVectorImpl<AsmToken> &Tokens) {
2631   unsigned Reg = AMDGPU::NoRegister;
2632   auto ListLoc = getLoc();
2633 
2634   if (!skipToken(AsmToken::LBrac,
2635                  "expected a register or a list of registers")) {
2636     return AMDGPU::NoRegister;
2637   }
2638 
2639   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2640 
2641   auto Loc = getLoc();
2642   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2643     return AMDGPU::NoRegister;
2644   if (RegWidth != 1) {
2645     Error(Loc, "expected a single 32-bit register");
2646     return AMDGPU::NoRegister;
2647   }
2648 
2649   for (; trySkipToken(AsmToken::Comma); ) {
2650     RegisterKind NextRegKind;
2651     unsigned NextReg, NextRegNum, NextRegWidth;
2652     Loc = getLoc();
2653 
2654     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2655                              NextRegNum, NextRegWidth,
2656                              Tokens)) {
2657       return AMDGPU::NoRegister;
2658     }
2659     if (NextRegWidth != 1) {
2660       Error(Loc, "expected a single 32-bit register");
2661       return AMDGPU::NoRegister;
2662     }
2663     if (NextRegKind != RegKind) {
2664       Error(Loc, "registers in a list must be of the same kind");
2665       return AMDGPU::NoRegister;
2666     }
2667     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2668       return AMDGPU::NoRegister;
2669   }
2670 
2671   if (!skipToken(AsmToken::RBrac,
2672                  "expected a comma or a closing square bracket")) {
2673     return AMDGPU::NoRegister;
2674   }
2675 
2676   if (isRegularReg(RegKind))
2677     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2678 
2679   return Reg;
2680 }
2681 
2682 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2683                                           unsigned &RegNum, unsigned &RegWidth,
2684                                           SmallVectorImpl<AsmToken> &Tokens) {
2685   auto Loc = getLoc();
2686   Reg = AMDGPU::NoRegister;
2687 
2688   if (isToken(AsmToken::Identifier)) {
2689     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2690     if (Reg == AMDGPU::NoRegister)
2691       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2692   } else {
2693     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2694   }
2695 
2696   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2697   if (Reg == AMDGPU::NoRegister) {
2698     assert(Parser.hasPendingError());
2699     return false;
2700   }
2701 
2702   if (!subtargetHasRegister(*TRI, Reg)) {
2703     if (Reg == AMDGPU::SGPR_NULL) {
2704       Error(Loc, "'null' operand is not supported on this GPU");
2705     } else {
2706       Error(Loc, "register not available on this GPU");
2707     }
2708     return false;
2709   }
2710 
2711   return true;
2712 }
2713 
2714 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2715                                           unsigned &RegNum, unsigned &RegWidth,
2716                                           bool RestoreOnFailure /*=false*/) {
2717   Reg = AMDGPU::NoRegister;
2718 
2719   SmallVector<AsmToken, 1> Tokens;
2720   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2721     if (RestoreOnFailure) {
2722       while (!Tokens.empty()) {
2723         getLexer().UnLex(Tokens.pop_back_val());
2724       }
2725     }
2726     return true;
2727   }
2728   return false;
2729 }
2730 
2731 Optional<StringRef>
2732 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2733   switch (RegKind) {
2734   case IS_VGPR:
2735     return StringRef(".amdgcn.next_free_vgpr");
2736   case IS_SGPR:
2737     return StringRef(".amdgcn.next_free_sgpr");
2738   default:
2739     return None;
2740   }
2741 }
2742 
2743 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2744   auto SymbolName = getGprCountSymbolName(RegKind);
2745   assert(SymbolName && "initializing invalid register kind");
2746   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2747   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2748 }
2749 
2750 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2751                                             unsigned DwordRegIndex,
2752                                             unsigned RegWidth) {
2753   // Symbols are only defined for GCN targets
2754   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2755     return true;
2756 
2757   auto SymbolName = getGprCountSymbolName(RegKind);
2758   if (!SymbolName)
2759     return true;
2760   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2761 
2762   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2763   int64_t OldCount;
2764 
2765   if (!Sym->isVariable())
2766     return !Error(getLoc(),
2767                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2768   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2769     return !Error(
2770         getLoc(),
2771         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2772 
2773   if (OldCount <= NewMax)
2774     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2775 
2776   return true;
2777 }
2778 
2779 std::unique_ptr<AMDGPUOperand>
2780 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2781   const auto &Tok = getToken();
2782   SMLoc StartLoc = Tok.getLoc();
2783   SMLoc EndLoc = Tok.getEndLoc();
2784   RegisterKind RegKind;
2785   unsigned Reg, RegNum, RegWidth;
2786 
2787   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2788     return nullptr;
2789   }
2790   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2791     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2792       return nullptr;
2793   } else
2794     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2795   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2796 }
2797 
2798 OperandMatchResultTy
2799 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2800   // TODO: add syntactic sugar for 1/(2*PI)
2801 
2802   assert(!isRegister());
2803   assert(!isModifier());
2804 
2805   const auto& Tok = getToken();
2806   const auto& NextTok = peekToken();
2807   bool IsReal = Tok.is(AsmToken::Real);
2808   SMLoc S = getLoc();
2809   bool Negate = false;
2810 
2811   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2812     lex();
2813     IsReal = true;
2814     Negate = true;
2815   }
2816 
2817   if (IsReal) {
2818     // Floating-point expressions are not supported.
2819     // Can only allow floating-point literals with an
2820     // optional sign.
2821 
2822     StringRef Num = getTokenStr();
2823     lex();
2824 
2825     APFloat RealVal(APFloat::IEEEdouble());
2826     auto roundMode = APFloat::rmNearestTiesToEven;
2827     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2828       return MatchOperand_ParseFail;
2829     }
2830     if (Negate)
2831       RealVal.changeSign();
2832 
2833     Operands.push_back(
2834       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2835                                AMDGPUOperand::ImmTyNone, true));
2836 
2837     return MatchOperand_Success;
2838 
2839   } else {
2840     int64_t IntVal;
2841     const MCExpr *Expr;
2842     SMLoc S = getLoc();
2843 
2844     if (HasSP3AbsModifier) {
2845       // This is a workaround for handling expressions
2846       // as arguments of SP3 'abs' modifier, for example:
2847       //     |1.0|
2848       //     |-1|
2849       //     |1+x|
2850       // This syntax is not compatible with syntax of standard
2851       // MC expressions (due to the trailing '|').
2852       SMLoc EndLoc;
2853       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2854         return MatchOperand_ParseFail;
2855     } else {
2856       if (Parser.parseExpression(Expr))
2857         return MatchOperand_ParseFail;
2858     }
2859 
2860     if (Expr->evaluateAsAbsolute(IntVal)) {
2861       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2862     } else {
2863       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2864     }
2865 
2866     return MatchOperand_Success;
2867   }
2868 
2869   return MatchOperand_NoMatch;
2870 }
2871 
2872 OperandMatchResultTy
2873 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2874   if (!isRegister())
2875     return MatchOperand_NoMatch;
2876 
2877   if (auto R = parseRegister()) {
2878     assert(R->isReg());
2879     Operands.push_back(std::move(R));
2880     return MatchOperand_Success;
2881   }
2882   return MatchOperand_ParseFail;
2883 }
2884 
2885 OperandMatchResultTy
2886 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2887   auto res = parseReg(Operands);
2888   if (res != MatchOperand_NoMatch) {
2889     return res;
2890   } else if (isModifier()) {
2891     return MatchOperand_NoMatch;
2892   } else {
2893     return parseImm(Operands, HasSP3AbsMod);
2894   }
2895 }
2896 
2897 bool
2898 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2899   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2900     const auto &str = Token.getString();
2901     return str == "abs" || str == "neg" || str == "sext";
2902   }
2903   return false;
2904 }
2905 
2906 bool
2907 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2908   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2909 }
2910 
2911 bool
2912 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2913   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2914 }
2915 
2916 bool
2917 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2918   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2919 }
2920 
2921 // Check if this is an operand modifier or an opcode modifier
2922 // which may look like an expression but it is not. We should
2923 // avoid parsing these modifiers as expressions. Currently
2924 // recognized sequences are:
2925 //   |...|
2926 //   abs(...)
2927 //   neg(...)
2928 //   sext(...)
2929 //   -reg
2930 //   -|...|
2931 //   -abs(...)
2932 //   name:...
2933 // Note that simple opcode modifiers like 'gds' may be parsed as
2934 // expressions; this is a special case. See getExpressionAsToken.
2935 //
2936 bool
2937 AMDGPUAsmParser::isModifier() {
2938 
2939   AsmToken Tok = getToken();
2940   AsmToken NextToken[2];
2941   peekTokens(NextToken);
2942 
2943   return isOperandModifier(Tok, NextToken[0]) ||
2944          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2945          isOpcodeModifierWithVal(Tok, NextToken[0]);
2946 }
2947 
2948 // Check if the current token is an SP3 'neg' modifier.
2949 // Currently this modifier is allowed in the following context:
2950 //
2951 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2952 // 2. Before an 'abs' modifier: -abs(...)
2953 // 3. Before an SP3 'abs' modifier: -|...|
2954 //
2955 // In all other cases "-" is handled as a part
2956 // of an expression that follows the sign.
2957 //
2958 // Note: When "-" is followed by an integer literal,
2959 // this is interpreted as integer negation rather
2960 // than a floating-point NEG modifier applied to N.
2961 // Beside being contr-intuitive, such use of floating-point
2962 // NEG modifier would have resulted in different meaning
2963 // of integer literals used with VOP1/2/C and VOP3,
2964 // for example:
2965 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2966 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2967 // Negative fp literals with preceding "-" are
2968 // handled likewise for uniformity
2969 //
2970 bool
2971 AMDGPUAsmParser::parseSP3NegModifier() {
2972 
2973   AsmToken NextToken[2];
2974   peekTokens(NextToken);
2975 
2976   if (isToken(AsmToken::Minus) &&
2977       (isRegister(NextToken[0], NextToken[1]) ||
2978        NextToken[0].is(AsmToken::Pipe) ||
2979        isId(NextToken[0], "abs"))) {
2980     lex();
2981     return true;
2982   }
2983 
2984   return false;
2985 }
2986 
2987 OperandMatchResultTy
2988 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2989                                               bool AllowImm) {
2990   bool Neg, SP3Neg;
2991   bool Abs, SP3Abs;
2992   SMLoc Loc;
2993 
2994   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2995   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2996     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2997     return MatchOperand_ParseFail;
2998   }
2999 
3000   SP3Neg = parseSP3NegModifier();
3001 
3002   Loc = getLoc();
3003   Neg = trySkipId("neg");
3004   if (Neg && SP3Neg) {
3005     Error(Loc, "expected register or immediate");
3006     return MatchOperand_ParseFail;
3007   }
3008   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3009     return MatchOperand_ParseFail;
3010 
3011   Abs = trySkipId("abs");
3012   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3013     return MatchOperand_ParseFail;
3014 
3015   Loc = getLoc();
3016   SP3Abs = trySkipToken(AsmToken::Pipe);
3017   if (Abs && SP3Abs) {
3018     Error(Loc, "expected register or immediate");
3019     return MatchOperand_ParseFail;
3020   }
3021 
3022   OperandMatchResultTy Res;
3023   if (AllowImm) {
3024     Res = parseRegOrImm(Operands, SP3Abs);
3025   } else {
3026     Res = parseReg(Operands);
3027   }
3028   if (Res != MatchOperand_Success) {
3029     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3030   }
3031 
3032   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3033     return MatchOperand_ParseFail;
3034   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3035     return MatchOperand_ParseFail;
3036   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3037     return MatchOperand_ParseFail;
3038 
3039   AMDGPUOperand::Modifiers Mods;
3040   Mods.Abs = Abs || SP3Abs;
3041   Mods.Neg = Neg || SP3Neg;
3042 
3043   if (Mods.hasFPModifiers()) {
3044     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3045     if (Op.isExpr()) {
3046       Error(Op.getStartLoc(), "expected an absolute expression");
3047       return MatchOperand_ParseFail;
3048     }
3049     Op.setModifiers(Mods);
3050   }
3051   return MatchOperand_Success;
3052 }
3053 
3054 OperandMatchResultTy
3055 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3056                                                bool AllowImm) {
3057   bool Sext = trySkipId("sext");
3058   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3059     return MatchOperand_ParseFail;
3060 
3061   OperandMatchResultTy Res;
3062   if (AllowImm) {
3063     Res = parseRegOrImm(Operands);
3064   } else {
3065     Res = parseReg(Operands);
3066   }
3067   if (Res != MatchOperand_Success) {
3068     return Sext? MatchOperand_ParseFail : Res;
3069   }
3070 
3071   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3072     return MatchOperand_ParseFail;
3073 
3074   AMDGPUOperand::Modifiers Mods;
3075   Mods.Sext = Sext;
3076 
3077   if (Mods.hasIntModifiers()) {
3078     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3079     if (Op.isExpr()) {
3080       Error(Op.getStartLoc(), "expected an absolute expression");
3081       return MatchOperand_ParseFail;
3082     }
3083     Op.setModifiers(Mods);
3084   }
3085 
3086   return MatchOperand_Success;
3087 }
3088 
3089 OperandMatchResultTy
3090 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3091   return parseRegOrImmWithFPInputMods(Operands, false);
3092 }
3093 
3094 OperandMatchResultTy
3095 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3096   return parseRegOrImmWithIntInputMods(Operands, false);
3097 }
3098 
3099 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3100   auto Loc = getLoc();
3101   if (trySkipId("off")) {
3102     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3103                                                 AMDGPUOperand::ImmTyOff, false));
3104     return MatchOperand_Success;
3105   }
3106 
3107   if (!isRegister())
3108     return MatchOperand_NoMatch;
3109 
3110   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3111   if (Reg) {
3112     Operands.push_back(std::move(Reg));
3113     return MatchOperand_Success;
3114   }
3115 
3116   return MatchOperand_ParseFail;
3117 
3118 }
3119 
3120 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3121   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3122 
3123   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3124       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3125       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3126       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3127     return Match_InvalidOperand;
3128 
3129   if ((TSFlags & SIInstrFlags::VOP3) &&
3130       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3131       getForcedEncodingSize() != 64)
3132     return Match_PreferE32;
3133 
3134   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3135       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3136     // v_mac_f32/16 allow only dst_sel == DWORD;
3137     auto OpNum =
3138         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3139     const auto &Op = Inst.getOperand(OpNum);
3140     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3141       return Match_InvalidOperand;
3142     }
3143   }
3144 
3145   return Match_Success;
3146 }
3147 
3148 static ArrayRef<unsigned> getAllVariants() {
3149   static const unsigned Variants[] = {
3150     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3151     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3152   };
3153 
3154   return makeArrayRef(Variants);
3155 }
3156 
3157 // What asm variants we should check
3158 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3159   if (getForcedEncodingSize() == 32) {
3160     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3161     return makeArrayRef(Variants);
3162   }
3163 
3164   if (isForcedVOP3()) {
3165     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3166     return makeArrayRef(Variants);
3167   }
3168 
3169   if (isForcedSDWA()) {
3170     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3171                                         AMDGPUAsmVariants::SDWA9};
3172     return makeArrayRef(Variants);
3173   }
3174 
3175   if (isForcedDPP()) {
3176     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3177     return makeArrayRef(Variants);
3178   }
3179 
3180   return getAllVariants();
3181 }
3182 
3183 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3184   if (getForcedEncodingSize() == 32)
3185     return "e32";
3186 
3187   if (isForcedVOP3())
3188     return "e64";
3189 
3190   if (isForcedSDWA())
3191     return "sdwa";
3192 
3193   if (isForcedDPP())
3194     return "dpp";
3195 
3196   return "";
3197 }
3198 
3199 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3200   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3201   const unsigned Num = Desc.getNumImplicitUses();
3202   for (unsigned i = 0; i < Num; ++i) {
3203     unsigned Reg = Desc.ImplicitUses[i];
3204     switch (Reg) {
3205     case AMDGPU::FLAT_SCR:
3206     case AMDGPU::VCC:
3207     case AMDGPU::VCC_LO:
3208     case AMDGPU::VCC_HI:
3209     case AMDGPU::M0:
3210       return Reg;
3211     default:
3212       break;
3213     }
3214   }
3215   return AMDGPU::NoRegister;
3216 }
3217 
3218 // NB: This code is correct only when used to check constant
3219 // bus limitations because GFX7 support no f16 inline constants.
3220 // Note that there are no cases when a GFX7 opcode violates
3221 // constant bus limitations due to the use of an f16 constant.
3222 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3223                                        unsigned OpIdx) const {
3224   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3225 
3226   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3227     return false;
3228   }
3229 
3230   const MCOperand &MO = Inst.getOperand(OpIdx);
3231 
3232   int64_t Val = MO.getImm();
3233   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3234 
3235   switch (OpSize) { // expected operand size
3236   case 8:
3237     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3238   case 4:
3239     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3240   case 2: {
3241     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3242     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3243         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3244         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3245       return AMDGPU::isInlinableIntLiteral(Val);
3246 
3247     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3248         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3249         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3250       return AMDGPU::isInlinableIntLiteralV216(Val);
3251 
3252     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3253         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3254         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3255       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3256 
3257     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3258   }
3259   default:
3260     llvm_unreachable("invalid operand size");
3261   }
3262 }
3263 
3264 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3265   if (!isGFX10Plus())
3266     return 1;
3267 
3268   switch (Opcode) {
3269   // 64-bit shift instructions can use only one scalar value input
3270   case AMDGPU::V_LSHLREV_B64_e64:
3271   case AMDGPU::V_LSHLREV_B64_gfx10:
3272   case AMDGPU::V_LSHRREV_B64_e64:
3273   case AMDGPU::V_LSHRREV_B64_gfx10:
3274   case AMDGPU::V_ASHRREV_I64_e64:
3275   case AMDGPU::V_ASHRREV_I64_gfx10:
3276   case AMDGPU::V_LSHL_B64_e64:
3277   case AMDGPU::V_LSHR_B64_e64:
3278   case AMDGPU::V_ASHR_I64_e64:
3279     return 1;
3280   default:
3281     return 2;
3282   }
3283 }
3284 
3285 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3286   const MCOperand &MO = Inst.getOperand(OpIdx);
3287   if (MO.isImm()) {
3288     return !isInlineConstant(Inst, OpIdx);
3289   } else if (MO.isReg()) {
3290     auto Reg = MO.getReg();
3291     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3292     auto PReg = mc2PseudoReg(Reg);
3293     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3294   } else {
3295     return true;
3296   }
3297 }
3298 
3299 bool
3300 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3301                                                 const OperandVector &Operands) {
3302   const unsigned Opcode = Inst.getOpcode();
3303   const MCInstrDesc &Desc = MII.get(Opcode);
3304   unsigned LastSGPR = AMDGPU::NoRegister;
3305   unsigned ConstantBusUseCount = 0;
3306   unsigned NumLiterals = 0;
3307   unsigned LiteralSize;
3308 
3309   if (Desc.TSFlags &
3310       (SIInstrFlags::VOPC |
3311        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3312        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3313        SIInstrFlags::SDWA)) {
3314     // Check special imm operands (used by madmk, etc)
3315     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3316       ++NumLiterals;
3317       LiteralSize = 4;
3318     }
3319 
3320     SmallDenseSet<unsigned> SGPRsUsed;
3321     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3322     if (SGPRUsed != AMDGPU::NoRegister) {
3323       SGPRsUsed.insert(SGPRUsed);
3324       ++ConstantBusUseCount;
3325     }
3326 
3327     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3328     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3329     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3330 
3331     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3332 
3333     for (int OpIdx : OpIndices) {
3334       if (OpIdx == -1) break;
3335 
3336       const MCOperand &MO = Inst.getOperand(OpIdx);
3337       if (usesConstantBus(Inst, OpIdx)) {
3338         if (MO.isReg()) {
3339           LastSGPR = mc2PseudoReg(MO.getReg());
3340           // Pairs of registers with a partial intersections like these
3341           //   s0, s[0:1]
3342           //   flat_scratch_lo, flat_scratch
3343           //   flat_scratch_lo, flat_scratch_hi
3344           // are theoretically valid but they are disabled anyway.
3345           // Note that this code mimics SIInstrInfo::verifyInstruction
3346           if (!SGPRsUsed.count(LastSGPR)) {
3347             SGPRsUsed.insert(LastSGPR);
3348             ++ConstantBusUseCount;
3349           }
3350         } else { // Expression or a literal
3351 
3352           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3353             continue; // special operand like VINTERP attr_chan
3354 
3355           // An instruction may use only one literal.
3356           // This has been validated on the previous step.
3357           // See validateVOPLiteral.
3358           // This literal may be used as more than one operand.
3359           // If all these operands are of the same size,
3360           // this literal counts as one scalar value.
3361           // Otherwise it counts as 2 scalar values.
3362           // See "GFX10 Shader Programming", section 3.6.2.3.
3363 
3364           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3365           if (Size < 4) Size = 4;
3366 
3367           if (NumLiterals == 0) {
3368             NumLiterals = 1;
3369             LiteralSize = Size;
3370           } else if (LiteralSize != Size) {
3371             NumLiterals = 2;
3372           }
3373         }
3374       }
3375     }
3376   }
3377   ConstantBusUseCount += NumLiterals;
3378 
3379   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3380     return true;
3381 
3382   SMLoc LitLoc = getLitLoc(Operands);
3383   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3384   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3385   Error(Loc, "invalid operand (violates constant bus restrictions)");
3386   return false;
3387 }
3388 
3389 bool
3390 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3391                                                  const OperandVector &Operands) {
3392   const unsigned Opcode = Inst.getOpcode();
3393   const MCInstrDesc &Desc = MII.get(Opcode);
3394 
3395   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3396   if (DstIdx == -1 ||
3397       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3398     return true;
3399   }
3400 
3401   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3402 
3403   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3404   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3405   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3406 
3407   assert(DstIdx != -1);
3408   const MCOperand &Dst = Inst.getOperand(DstIdx);
3409   assert(Dst.isReg());
3410 
3411   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3412 
3413   for (int SrcIdx : SrcIndices) {
3414     if (SrcIdx == -1) break;
3415     const MCOperand &Src = Inst.getOperand(SrcIdx);
3416     if (Src.isReg()) {
3417       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3418         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3419         Error(getRegLoc(SrcReg, Operands),
3420           "destination must be different than all sources");
3421         return false;
3422       }
3423     }
3424   }
3425 
3426   return true;
3427 }
3428 
3429 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3430 
3431   const unsigned Opc = Inst.getOpcode();
3432   const MCInstrDesc &Desc = MII.get(Opc);
3433 
3434   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3435     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3436     assert(ClampIdx != -1);
3437     return Inst.getOperand(ClampIdx).getImm() == 0;
3438   }
3439 
3440   return true;
3441 }
3442 
3443 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3444 
3445   const unsigned Opc = Inst.getOpcode();
3446   const MCInstrDesc &Desc = MII.get(Opc);
3447 
3448   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3449     return true;
3450 
3451   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3452   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3453   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3454 
3455   assert(VDataIdx != -1);
3456 
3457   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3458     return true;
3459 
3460   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3461   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3462   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3463   if (DMask == 0)
3464     DMask = 1;
3465 
3466   unsigned DataSize =
3467     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3468   if (hasPackedD16()) {
3469     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3470     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3471       DataSize = (DataSize + 1) / 2;
3472   }
3473 
3474   return (VDataSize / 4) == DataSize + TFESize;
3475 }
3476 
3477 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3478   const unsigned Opc = Inst.getOpcode();
3479   const MCInstrDesc &Desc = MII.get(Opc);
3480 
3481   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3482     return true;
3483 
3484   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3485 
3486   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3487       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3488   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3489   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3490   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3491   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3492 
3493   assert(VAddr0Idx != -1);
3494   assert(SrsrcIdx != -1);
3495   assert(SrsrcIdx > VAddr0Idx);
3496 
3497   if (DimIdx == -1)
3498     return true; // intersect_ray
3499 
3500   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3501   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3502   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3503   unsigned ActualAddrSize =
3504       IsNSA ? SrsrcIdx - VAddr0Idx
3505             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3506   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3507 
3508   unsigned ExpectedAddrSize =
3509       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3510 
3511   if (!IsNSA) {
3512     if (ExpectedAddrSize > 8)
3513       ExpectedAddrSize = 16;
3514 
3515     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3516     // This provides backward compatibility for assembly created
3517     // before 160b/192b/224b types were directly supported.
3518     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3519       return true;
3520   }
3521 
3522   return ActualAddrSize == ExpectedAddrSize;
3523 }
3524 
3525 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3526 
3527   const unsigned Opc = Inst.getOpcode();
3528   const MCInstrDesc &Desc = MII.get(Opc);
3529 
3530   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3531     return true;
3532   if (!Desc.mayLoad() || !Desc.mayStore())
3533     return true; // Not atomic
3534 
3535   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3536   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3537 
3538   // This is an incomplete check because image_atomic_cmpswap
3539   // may only use 0x3 and 0xf while other atomic operations
3540   // may use 0x1 and 0x3. However these limitations are
3541   // verified when we check that dmask matches dst size.
3542   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3543 }
3544 
3545 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3546 
3547   const unsigned Opc = Inst.getOpcode();
3548   const MCInstrDesc &Desc = MII.get(Opc);
3549 
3550   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3551     return true;
3552 
3553   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3554   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3555 
3556   // GATHER4 instructions use dmask in a different fashion compared to
3557   // other MIMG instructions. The only useful DMASK values are
3558   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3559   // (red,red,red,red) etc.) The ISA document doesn't mention
3560   // this.
3561   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3562 }
3563 
3564 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3565   const unsigned Opc = Inst.getOpcode();
3566   const MCInstrDesc &Desc = MII.get(Opc);
3567 
3568   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3569     return true;
3570 
3571   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3572   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3573       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3574 
3575   if (!BaseOpcode->MSAA)
3576     return true;
3577 
3578   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3579   assert(DimIdx != -1);
3580 
3581   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3582   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3583 
3584   return DimInfo->MSAA;
3585 }
3586 
3587 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3588 {
3589   switch (Opcode) {
3590   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3591   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3592   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3593     return true;
3594   default:
3595     return false;
3596   }
3597 }
3598 
3599 // movrels* opcodes should only allow VGPRS as src0.
3600 // This is specified in .td description for vop1/vop3,
3601 // but sdwa is handled differently. See isSDWAOperand.
3602 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3603                                       const OperandVector &Operands) {
3604 
3605   const unsigned Opc = Inst.getOpcode();
3606   const MCInstrDesc &Desc = MII.get(Opc);
3607 
3608   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3609     return true;
3610 
3611   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3612   assert(Src0Idx != -1);
3613 
3614   SMLoc ErrLoc;
3615   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3616   if (Src0.isReg()) {
3617     auto Reg = mc2PseudoReg(Src0.getReg());
3618     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3619     if (!isSGPR(Reg, TRI))
3620       return true;
3621     ErrLoc = getRegLoc(Reg, Operands);
3622   } else {
3623     ErrLoc = getConstLoc(Operands);
3624   }
3625 
3626   Error(ErrLoc, "source operand must be a VGPR");
3627   return false;
3628 }
3629 
3630 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3631                                           const OperandVector &Operands) {
3632 
3633   const unsigned Opc = Inst.getOpcode();
3634 
3635   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3636     return true;
3637 
3638   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3639   assert(Src0Idx != -1);
3640 
3641   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3642   if (!Src0.isReg())
3643     return true;
3644 
3645   auto Reg = mc2PseudoReg(Src0.getReg());
3646   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3647   if (isSGPR(Reg, TRI)) {
3648     Error(getRegLoc(Reg, Operands),
3649           "source operand must be either a VGPR or an inline constant");
3650     return false;
3651   }
3652 
3653   return true;
3654 }
3655 
3656 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3657                                    const OperandVector &Operands) {
3658   const unsigned Opc = Inst.getOpcode();
3659   const MCInstrDesc &Desc = MII.get(Opc);
3660 
3661   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3662     return true;
3663 
3664   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3665   if (Src2Idx == -1)
3666     return true;
3667 
3668   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3669   if (!Src2.isReg())
3670     return true;
3671 
3672   MCRegister Src2Reg = Src2.getReg();
3673   MCRegister DstReg = Inst.getOperand(0).getReg();
3674   if (Src2Reg == DstReg)
3675     return true;
3676 
3677   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3678   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3679     return true;
3680 
3681   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3682     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3683           "source 2 operand must not partially overlap with dst");
3684     return false;
3685   }
3686 
3687   return true;
3688 }
3689 
3690 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3691   switch (Inst.getOpcode()) {
3692   default:
3693     return true;
3694   case V_DIV_SCALE_F32_gfx6_gfx7:
3695   case V_DIV_SCALE_F32_vi:
3696   case V_DIV_SCALE_F32_gfx10:
3697   case V_DIV_SCALE_F64_gfx6_gfx7:
3698   case V_DIV_SCALE_F64_vi:
3699   case V_DIV_SCALE_F64_gfx10:
3700     break;
3701   }
3702 
3703   // TODO: Check that src0 = src1 or src2.
3704 
3705   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3706                     AMDGPU::OpName::src2_modifiers,
3707                     AMDGPU::OpName::src2_modifiers}) {
3708     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3709             .getImm() &
3710         SISrcMods::ABS) {
3711       return false;
3712     }
3713   }
3714 
3715   return true;
3716 }
3717 
3718 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3719 
3720   const unsigned Opc = Inst.getOpcode();
3721   const MCInstrDesc &Desc = MII.get(Opc);
3722 
3723   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3724     return true;
3725 
3726   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3727   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3728     if (isCI() || isSI())
3729       return false;
3730   }
3731 
3732   return true;
3733 }
3734 
3735 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3736   const unsigned Opc = Inst.getOpcode();
3737   const MCInstrDesc &Desc = MII.get(Opc);
3738 
3739   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3740     return true;
3741 
3742   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3743   if (DimIdx < 0)
3744     return true;
3745 
3746   long Imm = Inst.getOperand(DimIdx).getImm();
3747   if (Imm < 0 || Imm >= 8)
3748     return false;
3749 
3750   return true;
3751 }
3752 
3753 static bool IsRevOpcode(const unsigned Opcode)
3754 {
3755   switch (Opcode) {
3756   case AMDGPU::V_SUBREV_F32_e32:
3757   case AMDGPU::V_SUBREV_F32_e64:
3758   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3759   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3760   case AMDGPU::V_SUBREV_F32_e32_vi:
3761   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3762   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3763   case AMDGPU::V_SUBREV_F32_e64_vi:
3764 
3765   case AMDGPU::V_SUBREV_CO_U32_e32:
3766   case AMDGPU::V_SUBREV_CO_U32_e64:
3767   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3768   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3769 
3770   case AMDGPU::V_SUBBREV_U32_e32:
3771   case AMDGPU::V_SUBBREV_U32_e64:
3772   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3773   case AMDGPU::V_SUBBREV_U32_e32_vi:
3774   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3775   case AMDGPU::V_SUBBREV_U32_e64_vi:
3776 
3777   case AMDGPU::V_SUBREV_U32_e32:
3778   case AMDGPU::V_SUBREV_U32_e64:
3779   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3780   case AMDGPU::V_SUBREV_U32_e32_vi:
3781   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3782   case AMDGPU::V_SUBREV_U32_e64_vi:
3783 
3784   case AMDGPU::V_SUBREV_F16_e32:
3785   case AMDGPU::V_SUBREV_F16_e64:
3786   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3787   case AMDGPU::V_SUBREV_F16_e32_vi:
3788   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3789   case AMDGPU::V_SUBREV_F16_e64_vi:
3790 
3791   case AMDGPU::V_SUBREV_U16_e32:
3792   case AMDGPU::V_SUBREV_U16_e64:
3793   case AMDGPU::V_SUBREV_U16_e32_vi:
3794   case AMDGPU::V_SUBREV_U16_e64_vi:
3795 
3796   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3797   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3798   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3799 
3800   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3801   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3802 
3803   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3804   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3805 
3806   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3807   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3808 
3809   case AMDGPU::V_LSHRREV_B32_e32:
3810   case AMDGPU::V_LSHRREV_B32_e64:
3811   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3812   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3813   case AMDGPU::V_LSHRREV_B32_e32_vi:
3814   case AMDGPU::V_LSHRREV_B32_e64_vi:
3815   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3816   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3817 
3818   case AMDGPU::V_ASHRREV_I32_e32:
3819   case AMDGPU::V_ASHRREV_I32_e64:
3820   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3821   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3822   case AMDGPU::V_ASHRREV_I32_e32_vi:
3823   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3824   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3825   case AMDGPU::V_ASHRREV_I32_e64_vi:
3826 
3827   case AMDGPU::V_LSHLREV_B32_e32:
3828   case AMDGPU::V_LSHLREV_B32_e64:
3829   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3830   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3831   case AMDGPU::V_LSHLREV_B32_e32_vi:
3832   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3833   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3834   case AMDGPU::V_LSHLREV_B32_e64_vi:
3835 
3836   case AMDGPU::V_LSHLREV_B16_e32:
3837   case AMDGPU::V_LSHLREV_B16_e64:
3838   case AMDGPU::V_LSHLREV_B16_e32_vi:
3839   case AMDGPU::V_LSHLREV_B16_e64_vi:
3840   case AMDGPU::V_LSHLREV_B16_gfx10:
3841 
3842   case AMDGPU::V_LSHRREV_B16_e32:
3843   case AMDGPU::V_LSHRREV_B16_e64:
3844   case AMDGPU::V_LSHRREV_B16_e32_vi:
3845   case AMDGPU::V_LSHRREV_B16_e64_vi:
3846   case AMDGPU::V_LSHRREV_B16_gfx10:
3847 
3848   case AMDGPU::V_ASHRREV_I16_e32:
3849   case AMDGPU::V_ASHRREV_I16_e64:
3850   case AMDGPU::V_ASHRREV_I16_e32_vi:
3851   case AMDGPU::V_ASHRREV_I16_e64_vi:
3852   case AMDGPU::V_ASHRREV_I16_gfx10:
3853 
3854   case AMDGPU::V_LSHLREV_B64_e64:
3855   case AMDGPU::V_LSHLREV_B64_gfx10:
3856   case AMDGPU::V_LSHLREV_B64_vi:
3857 
3858   case AMDGPU::V_LSHRREV_B64_e64:
3859   case AMDGPU::V_LSHRREV_B64_gfx10:
3860   case AMDGPU::V_LSHRREV_B64_vi:
3861 
3862   case AMDGPU::V_ASHRREV_I64_e64:
3863   case AMDGPU::V_ASHRREV_I64_gfx10:
3864   case AMDGPU::V_ASHRREV_I64_vi:
3865 
3866   case AMDGPU::V_PK_LSHLREV_B16:
3867   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3868   case AMDGPU::V_PK_LSHLREV_B16_vi:
3869 
3870   case AMDGPU::V_PK_LSHRREV_B16:
3871   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3872   case AMDGPU::V_PK_LSHRREV_B16_vi:
3873   case AMDGPU::V_PK_ASHRREV_I16:
3874   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3875   case AMDGPU::V_PK_ASHRREV_I16_vi:
3876     return true;
3877   default:
3878     return false;
3879   }
3880 }
3881 
3882 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3883 
3884   using namespace SIInstrFlags;
3885   const unsigned Opcode = Inst.getOpcode();
3886   const MCInstrDesc &Desc = MII.get(Opcode);
3887 
3888   // lds_direct register is defined so that it can be used
3889   // with 9-bit operands only. Ignore encodings which do not accept these.
3890   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3891   if ((Desc.TSFlags & Enc) == 0)
3892     return None;
3893 
3894   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3895     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3896     if (SrcIdx == -1)
3897       break;
3898     const auto &Src = Inst.getOperand(SrcIdx);
3899     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3900 
3901       if (isGFX90A())
3902         return StringRef("lds_direct is not supported on this GPU");
3903 
3904       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3905         return StringRef("lds_direct cannot be used with this instruction");
3906 
3907       if (SrcName != OpName::src0)
3908         return StringRef("lds_direct may be used as src0 only");
3909     }
3910   }
3911 
3912   return None;
3913 }
3914 
3915 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3916   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3917     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3918     if (Op.isFlatOffset())
3919       return Op.getStartLoc();
3920   }
3921   return getLoc();
3922 }
3923 
3924 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3925                                          const OperandVector &Operands) {
3926   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3927   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3928     return true;
3929 
3930   auto Opcode = Inst.getOpcode();
3931   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3932   assert(OpNum != -1);
3933 
3934   const auto &Op = Inst.getOperand(OpNum);
3935   if (!hasFlatOffsets() && Op.getImm() != 0) {
3936     Error(getFlatOffsetLoc(Operands),
3937           "flat offset modifier is not supported on this GPU");
3938     return false;
3939   }
3940 
3941   // For FLAT segment the offset must be positive;
3942   // MSB is ignored and forced to zero.
3943   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3944     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3945     if (!isIntN(OffsetSize, Op.getImm())) {
3946       Error(getFlatOffsetLoc(Operands),
3947             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3948       return false;
3949     }
3950   } else {
3951     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3952     if (!isUIntN(OffsetSize, Op.getImm())) {
3953       Error(getFlatOffsetLoc(Operands),
3954             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3955       return false;
3956     }
3957   }
3958 
3959   return true;
3960 }
3961 
3962 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3963   // Start with second operand because SMEM Offset cannot be dst or src0.
3964   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3965     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3966     if (Op.isSMEMOffset())
3967       return Op.getStartLoc();
3968   }
3969   return getLoc();
3970 }
3971 
3972 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3973                                          const OperandVector &Operands) {
3974   if (isCI() || isSI())
3975     return true;
3976 
3977   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3978   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3979     return true;
3980 
3981   auto Opcode = Inst.getOpcode();
3982   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3983   if (OpNum == -1)
3984     return true;
3985 
3986   const auto &Op = Inst.getOperand(OpNum);
3987   if (!Op.isImm())
3988     return true;
3989 
3990   uint64_t Offset = Op.getImm();
3991   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3992   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3993       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3994     return true;
3995 
3996   Error(getSMEMOffsetLoc(Operands),
3997         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3998                                "expected a 21-bit signed offset");
3999 
4000   return false;
4001 }
4002 
4003 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4004   unsigned Opcode = Inst.getOpcode();
4005   const MCInstrDesc &Desc = MII.get(Opcode);
4006   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4007     return true;
4008 
4009   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4010   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4011 
4012   const int OpIndices[] = { Src0Idx, Src1Idx };
4013 
4014   unsigned NumExprs = 0;
4015   unsigned NumLiterals = 0;
4016   uint32_t LiteralValue;
4017 
4018   for (int OpIdx : OpIndices) {
4019     if (OpIdx == -1) break;
4020 
4021     const MCOperand &MO = Inst.getOperand(OpIdx);
4022     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4023     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4024       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4025         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4026         if (NumLiterals == 0 || LiteralValue != Value) {
4027           LiteralValue = Value;
4028           ++NumLiterals;
4029         }
4030       } else if (MO.isExpr()) {
4031         ++NumExprs;
4032       }
4033     }
4034   }
4035 
4036   return NumLiterals + NumExprs <= 1;
4037 }
4038 
4039 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4040   const unsigned Opc = Inst.getOpcode();
4041   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4042       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4043     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4044     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4045 
4046     if (OpSel & ~3)
4047       return false;
4048   }
4049   return true;
4050 }
4051 
4052 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4053                                   const OperandVector &Operands) {
4054   const unsigned Opc = Inst.getOpcode();
4055   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4056   if (DppCtrlIdx < 0)
4057     return true;
4058   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4059 
4060   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4061     // DPP64 is supported for row_newbcast only.
4062     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4063     if (Src0Idx >= 0 &&
4064         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4065       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4066       Error(S, "64 bit dpp only supports row_newbcast");
4067       return false;
4068     }
4069   }
4070 
4071   return true;
4072 }
4073 
4074 // Check if VCC register matches wavefront size
4075 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4076   auto FB = getFeatureBits();
4077   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4078     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4079 }
4080 
4081 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4082 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4083                                          const OperandVector &Operands) {
4084   unsigned Opcode = Inst.getOpcode();
4085   const MCInstrDesc &Desc = MII.get(Opcode);
4086   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4087   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4088       ImmIdx == -1)
4089     return true;
4090 
4091   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4092   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4093   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4094 
4095   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4096 
4097   unsigned NumExprs = 0;
4098   unsigned NumLiterals = 0;
4099   uint32_t LiteralValue;
4100 
4101   for (int OpIdx : OpIndices) {
4102     if (OpIdx == -1)
4103       continue;
4104 
4105     const MCOperand &MO = Inst.getOperand(OpIdx);
4106     if (!MO.isImm() && !MO.isExpr())
4107       continue;
4108     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4109       continue;
4110 
4111     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4112         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4113       Error(getConstLoc(Operands),
4114             "inline constants are not allowed for this operand");
4115       return false;
4116     }
4117 
4118     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4119       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4120       if (NumLiterals == 0 || LiteralValue != Value) {
4121         LiteralValue = Value;
4122         ++NumLiterals;
4123       }
4124     } else if (MO.isExpr()) {
4125       ++NumExprs;
4126     }
4127   }
4128   NumLiterals += NumExprs;
4129 
4130   if (!NumLiterals)
4131     return true;
4132 
4133   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4134     Error(getLitLoc(Operands), "literal operands are not supported");
4135     return false;
4136   }
4137 
4138   if (NumLiterals > 1) {
4139     Error(getLitLoc(Operands), "only one literal operand is allowed");
4140     return false;
4141   }
4142 
4143   return true;
4144 }
4145 
4146 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4147 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4148                          const MCRegisterInfo *MRI) {
4149   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4150   if (OpIdx < 0)
4151     return -1;
4152 
4153   const MCOperand &Op = Inst.getOperand(OpIdx);
4154   if (!Op.isReg())
4155     return -1;
4156 
4157   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4158   auto Reg = Sub ? Sub : Op.getReg();
4159   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4160   return AGPR32.contains(Reg) ? 1 : 0;
4161 }
4162 
4163 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4164   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4165   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4166                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4167                   SIInstrFlags::DS)) == 0)
4168     return true;
4169 
4170   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4171                                                       : AMDGPU::OpName::vdata;
4172 
4173   const MCRegisterInfo *MRI = getMRI();
4174   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4175   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4176 
4177   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4178     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4179     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4180       return false;
4181   }
4182 
4183   auto FB = getFeatureBits();
4184   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4185     if (DataAreg < 0 || DstAreg < 0)
4186       return true;
4187     return DstAreg == DataAreg;
4188   }
4189 
4190   return DstAreg < 1 && DataAreg < 1;
4191 }
4192 
4193 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4194   auto FB = getFeatureBits();
4195   if (!FB[AMDGPU::FeatureGFX90AInsts])
4196     return true;
4197 
4198   const MCRegisterInfo *MRI = getMRI();
4199   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4200   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4201   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4202     const MCOperand &Op = Inst.getOperand(I);
4203     if (!Op.isReg())
4204       continue;
4205 
4206     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4207     if (!Sub)
4208       continue;
4209 
4210     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4211       return false;
4212     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4213       return false;
4214   }
4215 
4216   return true;
4217 }
4218 
4219 // gfx90a has an undocumented limitation:
4220 // DS_GWS opcodes must use even aligned registers.
4221 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4222                                   const OperandVector &Operands) {
4223   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4224     return true;
4225 
4226   int Opc = Inst.getOpcode();
4227   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4228       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4229     return true;
4230 
4231   const MCRegisterInfo *MRI = getMRI();
4232   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4233   int Data0Pos =
4234       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4235   assert(Data0Pos != -1);
4236   auto Reg = Inst.getOperand(Data0Pos).getReg();
4237   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4238   if (RegIdx & 1) {
4239     SMLoc RegLoc = getRegLoc(Reg, Operands);
4240     Error(RegLoc, "vgpr must be even aligned");
4241     return false;
4242   }
4243 
4244   return true;
4245 }
4246 
4247 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4248                                             const OperandVector &Operands,
4249                                             const SMLoc &IDLoc) {
4250   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4251                                            AMDGPU::OpName::cpol);
4252   if (CPolPos == -1)
4253     return true;
4254 
4255   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4256 
4257   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4258   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4259       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4260     Error(IDLoc, "invalid cache policy for SMRD instruction");
4261     return false;
4262   }
4263 
4264   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4265     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4266     StringRef CStr(S.getPointer());
4267     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4268     Error(S, "scc is not supported on this GPU");
4269     return false;
4270   }
4271 
4272   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4273     return true;
4274 
4275   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4276     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4277       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4278                               : "instruction must use glc");
4279       return false;
4280     }
4281   } else {
4282     if (CPol & CPol::GLC) {
4283       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4284       StringRef CStr(S.getPointer());
4285       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4286       Error(S, isGFX940() ? "instruction must not use sc0"
4287                           : "instruction must not use glc");
4288       return false;
4289     }
4290   }
4291 
4292   return true;
4293 }
4294 
4295 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4296                                           const SMLoc &IDLoc,
4297                                           const OperandVector &Operands) {
4298   if (auto ErrMsg = validateLdsDirect(Inst)) {
4299     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4300     return false;
4301   }
4302   if (!validateSOPLiteral(Inst)) {
4303     Error(getLitLoc(Operands),
4304       "only one literal operand is allowed");
4305     return false;
4306   }
4307   if (!validateVOPLiteral(Inst, Operands)) {
4308     return false;
4309   }
4310   if (!validateConstantBusLimitations(Inst, Operands)) {
4311     return false;
4312   }
4313   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4314     return false;
4315   }
4316   if (!validateIntClampSupported(Inst)) {
4317     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4318       "integer clamping is not supported on this GPU");
4319     return false;
4320   }
4321   if (!validateOpSel(Inst)) {
4322     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4323       "invalid op_sel operand");
4324     return false;
4325   }
4326   if (!validateDPP(Inst, Operands)) {
4327     return false;
4328   }
4329   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4330   if (!validateMIMGD16(Inst)) {
4331     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4332       "d16 modifier is not supported on this GPU");
4333     return false;
4334   }
4335   if (!validateMIMGDim(Inst)) {
4336     Error(IDLoc, "dim modifier is required on this GPU");
4337     return false;
4338   }
4339   if (!validateMIMGMSAA(Inst)) {
4340     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4341           "invalid dim; must be MSAA type");
4342     return false;
4343   }
4344   if (!validateMIMGDataSize(Inst)) {
4345     Error(IDLoc,
4346       "image data size does not match dmask and tfe");
4347     return false;
4348   }
4349   if (!validateMIMGAddrSize(Inst)) {
4350     Error(IDLoc,
4351       "image address size does not match dim and a16");
4352     return false;
4353   }
4354   if (!validateMIMGAtomicDMask(Inst)) {
4355     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4356       "invalid atomic image dmask");
4357     return false;
4358   }
4359   if (!validateMIMGGatherDMask(Inst)) {
4360     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4361       "invalid image_gather dmask: only one bit must be set");
4362     return false;
4363   }
4364   if (!validateMovrels(Inst, Operands)) {
4365     return false;
4366   }
4367   if (!validateFlatOffset(Inst, Operands)) {
4368     return false;
4369   }
4370   if (!validateSMEMOffset(Inst, Operands)) {
4371     return false;
4372   }
4373   if (!validateMAIAccWrite(Inst, Operands)) {
4374     return false;
4375   }
4376   if (!validateMFMA(Inst, Operands)) {
4377     return false;
4378   }
4379   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4380     return false;
4381   }
4382 
4383   if (!validateAGPRLdSt(Inst)) {
4384     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4385     ? "invalid register class: data and dst should be all VGPR or AGPR"
4386     : "invalid register class: agpr loads and stores not supported on this GPU"
4387     );
4388     return false;
4389   }
4390   if (!validateVGPRAlign(Inst)) {
4391     Error(IDLoc,
4392       "invalid register class: vgpr tuples must be 64 bit aligned");
4393     return false;
4394   }
4395   if (!validateGWS(Inst, Operands)) {
4396     return false;
4397   }
4398 
4399   if (!validateDivScale(Inst)) {
4400     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4401     return false;
4402   }
4403   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4404     return false;
4405   }
4406 
4407   return true;
4408 }
4409 
4410 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4411                                             const FeatureBitset &FBS,
4412                                             unsigned VariantID = 0);
4413 
4414 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4415                                 const FeatureBitset &AvailableFeatures,
4416                                 unsigned VariantID);
4417 
4418 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4419                                        const FeatureBitset &FBS) {
4420   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4421 }
4422 
4423 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4424                                        const FeatureBitset &FBS,
4425                                        ArrayRef<unsigned> Variants) {
4426   for (auto Variant : Variants) {
4427     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4428       return true;
4429   }
4430 
4431   return false;
4432 }
4433 
4434 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4435                                                   const SMLoc &IDLoc) {
4436   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4437 
4438   // Check if requested instruction variant is supported.
4439   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4440     return false;
4441 
4442   // This instruction is not supported.
4443   // Clear any other pending errors because they are no longer relevant.
4444   getParser().clearPendingErrors();
4445 
4446   // Requested instruction variant is not supported.
4447   // Check if any other variants are supported.
4448   StringRef VariantName = getMatchedVariantName();
4449   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4450     return Error(IDLoc,
4451                  Twine(VariantName,
4452                        " variant of this instruction is not supported"));
4453   }
4454 
4455   // Finally check if this instruction is supported on any other GPU.
4456   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4457     return Error(IDLoc, "instruction not supported on this GPU");
4458   }
4459 
4460   // Instruction not supported on any GPU. Probably a typo.
4461   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4462   return Error(IDLoc, "invalid instruction" + Suggestion);
4463 }
4464 
4465 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4466                                               OperandVector &Operands,
4467                                               MCStreamer &Out,
4468                                               uint64_t &ErrorInfo,
4469                                               bool MatchingInlineAsm) {
4470   MCInst Inst;
4471   unsigned Result = Match_Success;
4472   for (auto Variant : getMatchedVariants()) {
4473     uint64_t EI;
4474     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4475                                   Variant);
4476     // We order match statuses from least to most specific. We use most specific
4477     // status as resulting
4478     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4479     if ((R == Match_Success) ||
4480         (R == Match_PreferE32) ||
4481         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4482         (R == Match_InvalidOperand && Result != Match_MissingFeature
4483                                    && Result != Match_PreferE32) ||
4484         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4485                                    && Result != Match_MissingFeature
4486                                    && Result != Match_PreferE32)) {
4487       Result = R;
4488       ErrorInfo = EI;
4489     }
4490     if (R == Match_Success)
4491       break;
4492   }
4493 
4494   if (Result == Match_Success) {
4495     if (!validateInstruction(Inst, IDLoc, Operands)) {
4496       return true;
4497     }
4498     Inst.setLoc(IDLoc);
4499     Out.emitInstruction(Inst, getSTI());
4500     return false;
4501   }
4502 
4503   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4504   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4505     return true;
4506   }
4507 
4508   switch (Result) {
4509   default: break;
4510   case Match_MissingFeature:
4511     // It has been verified that the specified instruction
4512     // mnemonic is valid. A match was found but it requires
4513     // features which are not supported on this GPU.
4514     return Error(IDLoc, "operands are not valid for this GPU or mode");
4515 
4516   case Match_InvalidOperand: {
4517     SMLoc ErrorLoc = IDLoc;
4518     if (ErrorInfo != ~0ULL) {
4519       if (ErrorInfo >= Operands.size()) {
4520         return Error(IDLoc, "too few operands for instruction");
4521       }
4522       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4523       if (ErrorLoc == SMLoc())
4524         ErrorLoc = IDLoc;
4525     }
4526     return Error(ErrorLoc, "invalid operand for instruction");
4527   }
4528 
4529   case Match_PreferE32:
4530     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4531                         "should be encoded as e32");
4532   case Match_MnemonicFail:
4533     llvm_unreachable("Invalid instructions should have been handled already");
4534   }
4535   llvm_unreachable("Implement any new match types added!");
4536 }
4537 
4538 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4539   int64_t Tmp = -1;
4540   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4541     return true;
4542   }
4543   if (getParser().parseAbsoluteExpression(Tmp)) {
4544     return true;
4545   }
4546   Ret = static_cast<uint32_t>(Tmp);
4547   return false;
4548 }
4549 
4550 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4551                                                uint32_t &Minor) {
4552   if (ParseAsAbsoluteExpression(Major))
4553     return TokError("invalid major version");
4554 
4555   if (!trySkipToken(AsmToken::Comma))
4556     return TokError("minor version number required, comma expected");
4557 
4558   if (ParseAsAbsoluteExpression(Minor))
4559     return TokError("invalid minor version");
4560 
4561   return false;
4562 }
4563 
4564 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4565   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4566     return TokError("directive only supported for amdgcn architecture");
4567 
4568   std::string TargetIDDirective;
4569   SMLoc TargetStart = getTok().getLoc();
4570   if (getParser().parseEscapedString(TargetIDDirective))
4571     return true;
4572 
4573   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4574   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4575     return getParser().Error(TargetRange.Start,
4576         (Twine(".amdgcn_target directive's target id ") +
4577          Twine(TargetIDDirective) +
4578          Twine(" does not match the specified target id ") +
4579          Twine(getTargetStreamer().getTargetID()->toString())).str());
4580 
4581   return false;
4582 }
4583 
4584 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4585   return Error(Range.Start, "value out of range", Range);
4586 }
4587 
4588 bool AMDGPUAsmParser::calculateGPRBlocks(
4589     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4590     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4591     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4592     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4593   // TODO(scott.linder): These calculations are duplicated from
4594   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4595   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4596 
4597   unsigned NumVGPRs = NextFreeVGPR;
4598   unsigned NumSGPRs = NextFreeSGPR;
4599 
4600   if (Version.Major >= 10)
4601     NumSGPRs = 0;
4602   else {
4603     unsigned MaxAddressableNumSGPRs =
4604         IsaInfo::getAddressableNumSGPRs(&getSTI());
4605 
4606     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4607         NumSGPRs > MaxAddressableNumSGPRs)
4608       return OutOfRangeError(SGPRRange);
4609 
4610     NumSGPRs +=
4611         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4612 
4613     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4614         NumSGPRs > MaxAddressableNumSGPRs)
4615       return OutOfRangeError(SGPRRange);
4616 
4617     if (Features.test(FeatureSGPRInitBug))
4618       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4619   }
4620 
4621   VGPRBlocks =
4622       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4623   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4624 
4625   return false;
4626 }
4627 
4628 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4629   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4630     return TokError("directive only supported for amdgcn architecture");
4631 
4632   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4633     return TokError("directive only supported for amdhsa OS");
4634 
4635   StringRef KernelName;
4636   if (getParser().parseIdentifier(KernelName))
4637     return true;
4638 
4639   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4640 
4641   StringSet<> Seen;
4642 
4643   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4644 
4645   SMRange VGPRRange;
4646   uint64_t NextFreeVGPR = 0;
4647   uint64_t AccumOffset = 0;
4648   SMRange SGPRRange;
4649   uint64_t NextFreeSGPR = 0;
4650 
4651   // Count the number of user SGPRs implied from the enabled feature bits.
4652   unsigned ImpliedUserSGPRCount = 0;
4653 
4654   // Track if the asm explicitly contains the directive for the user SGPR
4655   // count.
4656   Optional<unsigned> ExplicitUserSGPRCount;
4657   bool ReserveVCC = true;
4658   bool ReserveFlatScr = true;
4659   Optional<bool> EnableWavefrontSize32;
4660 
4661   while (true) {
4662     while (trySkipToken(AsmToken::EndOfStatement));
4663 
4664     StringRef ID;
4665     SMRange IDRange = getTok().getLocRange();
4666     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4667       return true;
4668 
4669     if (ID == ".end_amdhsa_kernel")
4670       break;
4671 
4672     if (Seen.find(ID) != Seen.end())
4673       return TokError(".amdhsa_ directives cannot be repeated");
4674     Seen.insert(ID);
4675 
4676     SMLoc ValStart = getLoc();
4677     int64_t IVal;
4678     if (getParser().parseAbsoluteExpression(IVal))
4679       return true;
4680     SMLoc ValEnd = getLoc();
4681     SMRange ValRange = SMRange(ValStart, ValEnd);
4682 
4683     if (IVal < 0)
4684       return OutOfRangeError(ValRange);
4685 
4686     uint64_t Val = IVal;
4687 
4688 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4689   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4690     return OutOfRangeError(RANGE);                                             \
4691   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4692 
4693     if (ID == ".amdhsa_group_segment_fixed_size") {
4694       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4695         return OutOfRangeError(ValRange);
4696       KD.group_segment_fixed_size = Val;
4697     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4698       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4699         return OutOfRangeError(ValRange);
4700       KD.private_segment_fixed_size = Val;
4701     } else if (ID == ".amdhsa_kernarg_size") {
4702       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4703         return OutOfRangeError(ValRange);
4704       KD.kernarg_size = Val;
4705     } else if (ID == ".amdhsa_user_sgpr_count") {
4706       ExplicitUserSGPRCount = Val;
4707     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4708       if (hasArchitectedFlatScratch())
4709         return Error(IDRange.Start,
4710                      "directive is not supported with architected flat scratch",
4711                      IDRange);
4712       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4713                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4714                        Val, ValRange);
4715       if (Val)
4716         ImpliedUserSGPRCount += 4;
4717     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4718       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4719                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4720                        ValRange);
4721       if (Val)
4722         ImpliedUserSGPRCount += 2;
4723     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4724       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4725                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4726                        ValRange);
4727       if (Val)
4728         ImpliedUserSGPRCount += 2;
4729     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4730       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4731                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4732                        Val, ValRange);
4733       if (Val)
4734         ImpliedUserSGPRCount += 2;
4735     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4736       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4737                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4738                        ValRange);
4739       if (Val)
4740         ImpliedUserSGPRCount += 2;
4741     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4742       if (hasArchitectedFlatScratch())
4743         return Error(IDRange.Start,
4744                      "directive is not supported with architected flat scratch",
4745                      IDRange);
4746       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4747                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4748                        ValRange);
4749       if (Val)
4750         ImpliedUserSGPRCount += 2;
4751     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4752       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4753                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4754                        Val, ValRange);
4755       if (Val)
4756         ImpliedUserSGPRCount += 1;
4757     } else if (ID == ".amdhsa_wavefront_size32") {
4758       if (IVersion.Major < 10)
4759         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4760       EnableWavefrontSize32 = Val;
4761       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4762                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4763                        Val, ValRange);
4764     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4765       if (hasArchitectedFlatScratch())
4766         return Error(IDRange.Start,
4767                      "directive is not supported with architected flat scratch",
4768                      IDRange);
4769       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4770                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4771     } else if (ID == ".amdhsa_enable_private_segment") {
4772       if (!hasArchitectedFlatScratch())
4773         return Error(
4774             IDRange.Start,
4775             "directive is not supported without architected flat scratch",
4776             IDRange);
4777       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4778                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4779     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4780       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4781                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4782                        ValRange);
4783     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4784       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4785                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4786                        ValRange);
4787     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4788       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4789                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4790                        ValRange);
4791     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4792       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4793                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4794                        ValRange);
4795     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4796       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4797                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4798                        ValRange);
4799     } else if (ID == ".amdhsa_next_free_vgpr") {
4800       VGPRRange = ValRange;
4801       NextFreeVGPR = Val;
4802     } else if (ID == ".amdhsa_next_free_sgpr") {
4803       SGPRRange = ValRange;
4804       NextFreeSGPR = Val;
4805     } else if (ID == ".amdhsa_accum_offset") {
4806       if (!isGFX90A())
4807         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4808       AccumOffset = Val;
4809     } else if (ID == ".amdhsa_reserve_vcc") {
4810       if (!isUInt<1>(Val))
4811         return OutOfRangeError(ValRange);
4812       ReserveVCC = Val;
4813     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4814       if (IVersion.Major < 7)
4815         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4816       if (hasArchitectedFlatScratch())
4817         return Error(IDRange.Start,
4818                      "directive is not supported with architected flat scratch",
4819                      IDRange);
4820       if (!isUInt<1>(Val))
4821         return OutOfRangeError(ValRange);
4822       ReserveFlatScr = Val;
4823     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4824       if (IVersion.Major < 8)
4825         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4826       if (!isUInt<1>(Val))
4827         return OutOfRangeError(ValRange);
4828       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4829         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4830                                  IDRange);
4831     } else if (ID == ".amdhsa_float_round_mode_32") {
4832       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4833                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4834     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4835       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4836                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4837     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4838       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4839                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4840     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4841       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4842                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4843                        ValRange);
4844     } else if (ID == ".amdhsa_dx10_clamp") {
4845       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4846                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4847     } else if (ID == ".amdhsa_ieee_mode") {
4848       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4849                        Val, ValRange);
4850     } else if (ID == ".amdhsa_fp16_overflow") {
4851       if (IVersion.Major < 9)
4852         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4853       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4854                        ValRange);
4855     } else if (ID == ".amdhsa_tg_split") {
4856       if (!isGFX90A())
4857         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4858       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4859                        ValRange);
4860     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4861       if (IVersion.Major < 10)
4862         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4863       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4864                        ValRange);
4865     } else if (ID == ".amdhsa_memory_ordered") {
4866       if (IVersion.Major < 10)
4867         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4868       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4869                        ValRange);
4870     } else if (ID == ".amdhsa_forward_progress") {
4871       if (IVersion.Major < 10)
4872         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4873       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4874                        ValRange);
4875     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4876       PARSE_BITS_ENTRY(
4877           KD.compute_pgm_rsrc2,
4878           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4879           ValRange);
4880     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4881       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4882                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4883                        Val, ValRange);
4884     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4885       PARSE_BITS_ENTRY(
4886           KD.compute_pgm_rsrc2,
4887           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4888           ValRange);
4889     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4890       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4891                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4892                        Val, ValRange);
4893     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4894       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4895                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4896                        Val, ValRange);
4897     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4898       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4899                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4900                        Val, ValRange);
4901     } else if (ID == ".amdhsa_exception_int_div_zero") {
4902       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4903                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4904                        Val, ValRange);
4905     } else {
4906       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4907     }
4908 
4909 #undef PARSE_BITS_ENTRY
4910   }
4911 
4912   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4913     return TokError(".amdhsa_next_free_vgpr directive is required");
4914 
4915   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4916     return TokError(".amdhsa_next_free_sgpr directive is required");
4917 
4918   unsigned VGPRBlocks;
4919   unsigned SGPRBlocks;
4920   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4921                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4922                          EnableWavefrontSize32, NextFreeVGPR,
4923                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4924                          SGPRBlocks))
4925     return true;
4926 
4927   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4928           VGPRBlocks))
4929     return OutOfRangeError(VGPRRange);
4930   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4931                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4932 
4933   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4934           SGPRBlocks))
4935     return OutOfRangeError(SGPRRange);
4936   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4937                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4938                   SGPRBlocks);
4939 
4940   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
4941     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
4942                     "enabled user SGPRs");
4943 
4944   unsigned UserSGPRCount =
4945       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
4946 
4947   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4948     return TokError("too many user SGPRs enabled");
4949   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4950                   UserSGPRCount);
4951 
4952   if (isGFX90A()) {
4953     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4954       return TokError(".amdhsa_accum_offset directive is required");
4955     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4956       return TokError("accum_offset should be in range [4..256] in "
4957                       "increments of 4");
4958     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4959       return TokError("accum_offset exceeds total VGPR allocation");
4960     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4961                     (AccumOffset / 4 - 1));
4962   }
4963 
4964   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4965       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4966       ReserveFlatScr);
4967   return false;
4968 }
4969 
4970 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4971   uint32_t Major;
4972   uint32_t Minor;
4973 
4974   if (ParseDirectiveMajorMinor(Major, Minor))
4975     return true;
4976 
4977   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4978   return false;
4979 }
4980 
4981 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4982   uint32_t Major;
4983   uint32_t Minor;
4984   uint32_t Stepping;
4985   StringRef VendorName;
4986   StringRef ArchName;
4987 
4988   // If this directive has no arguments, then use the ISA version for the
4989   // targeted GPU.
4990   if (isToken(AsmToken::EndOfStatement)) {
4991     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4992     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4993                                                         ISA.Stepping,
4994                                                         "AMD", "AMDGPU");
4995     return false;
4996   }
4997 
4998   if (ParseDirectiveMajorMinor(Major, Minor))
4999     return true;
5000 
5001   if (!trySkipToken(AsmToken::Comma))
5002     return TokError("stepping version number required, comma expected");
5003 
5004   if (ParseAsAbsoluteExpression(Stepping))
5005     return TokError("invalid stepping version");
5006 
5007   if (!trySkipToken(AsmToken::Comma))
5008     return TokError("vendor name required, comma expected");
5009 
5010   if (!parseString(VendorName, "invalid vendor name"))
5011     return true;
5012 
5013   if (!trySkipToken(AsmToken::Comma))
5014     return TokError("arch name required, comma expected");
5015 
5016   if (!parseString(ArchName, "invalid arch name"))
5017     return true;
5018 
5019   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5020                                                       VendorName, ArchName);
5021   return false;
5022 }
5023 
5024 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5025                                                amd_kernel_code_t &Header) {
5026   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5027   // assembly for backwards compatibility.
5028   if (ID == "max_scratch_backing_memory_byte_size") {
5029     Parser.eatToEndOfStatement();
5030     return false;
5031   }
5032 
5033   SmallString<40> ErrStr;
5034   raw_svector_ostream Err(ErrStr);
5035   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5036     return TokError(Err.str());
5037   }
5038   Lex();
5039 
5040   if (ID == "enable_wavefront_size32") {
5041     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5042       if (!isGFX10Plus())
5043         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5044       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5045         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5046     } else {
5047       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5048         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5049     }
5050   }
5051 
5052   if (ID == "wavefront_size") {
5053     if (Header.wavefront_size == 5) {
5054       if (!isGFX10Plus())
5055         return TokError("wavefront_size=5 is only allowed on GFX10+");
5056       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5057         return TokError("wavefront_size=5 requires +WavefrontSize32");
5058     } else if (Header.wavefront_size == 6) {
5059       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5060         return TokError("wavefront_size=6 requires +WavefrontSize64");
5061     }
5062   }
5063 
5064   if (ID == "enable_wgp_mode") {
5065     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5066         !isGFX10Plus())
5067       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5068   }
5069 
5070   if (ID == "enable_mem_ordered") {
5071     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5072         !isGFX10Plus())
5073       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5074   }
5075 
5076   if (ID == "enable_fwd_progress") {
5077     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5078         !isGFX10Plus())
5079       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5080   }
5081 
5082   return false;
5083 }
5084 
5085 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5086   amd_kernel_code_t Header;
5087   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5088 
5089   while (true) {
5090     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5091     // will set the current token to EndOfStatement.
5092     while(trySkipToken(AsmToken::EndOfStatement));
5093 
5094     StringRef ID;
5095     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5096       return true;
5097 
5098     if (ID == ".end_amd_kernel_code_t")
5099       break;
5100 
5101     if (ParseAMDKernelCodeTValue(ID, Header))
5102       return true;
5103   }
5104 
5105   getTargetStreamer().EmitAMDKernelCodeT(Header);
5106 
5107   return false;
5108 }
5109 
5110 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5111   StringRef KernelName;
5112   if (!parseId(KernelName, "expected symbol name"))
5113     return true;
5114 
5115   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5116                                            ELF::STT_AMDGPU_HSA_KERNEL);
5117 
5118   KernelScope.initialize(getContext());
5119   return false;
5120 }
5121 
5122 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5123   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5124     return Error(getLoc(),
5125                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5126                  "architectures");
5127   }
5128 
5129   auto TargetIDDirective = getLexer().getTok().getStringContents();
5130   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5131     return Error(getParser().getTok().getLoc(), "target id must match options");
5132 
5133   getTargetStreamer().EmitISAVersion();
5134   Lex();
5135 
5136   return false;
5137 }
5138 
5139 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5140   const char *AssemblerDirectiveBegin;
5141   const char *AssemblerDirectiveEnd;
5142   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5143       isHsaAbiVersion3AndAbove(&getSTI())
5144           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5145                             HSAMD::V3::AssemblerDirectiveEnd)
5146           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5147                             HSAMD::AssemblerDirectiveEnd);
5148 
5149   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5150     return Error(getLoc(),
5151                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5152                  "not available on non-amdhsa OSes")).str());
5153   }
5154 
5155   std::string HSAMetadataString;
5156   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5157                           HSAMetadataString))
5158     return true;
5159 
5160   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5161     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5162       return Error(getLoc(), "invalid HSA metadata");
5163   } else {
5164     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5165       return Error(getLoc(), "invalid HSA metadata");
5166   }
5167 
5168   return false;
5169 }
5170 
5171 /// Common code to parse out a block of text (typically YAML) between start and
5172 /// end directives.
5173 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5174                                           const char *AssemblerDirectiveEnd,
5175                                           std::string &CollectString) {
5176 
5177   raw_string_ostream CollectStream(CollectString);
5178 
5179   getLexer().setSkipSpace(false);
5180 
5181   bool FoundEnd = false;
5182   while (!isToken(AsmToken::Eof)) {
5183     while (isToken(AsmToken::Space)) {
5184       CollectStream << getTokenStr();
5185       Lex();
5186     }
5187 
5188     if (trySkipId(AssemblerDirectiveEnd)) {
5189       FoundEnd = true;
5190       break;
5191     }
5192 
5193     CollectStream << Parser.parseStringToEndOfStatement()
5194                   << getContext().getAsmInfo()->getSeparatorString();
5195 
5196     Parser.eatToEndOfStatement();
5197   }
5198 
5199   getLexer().setSkipSpace(true);
5200 
5201   if (isToken(AsmToken::Eof) && !FoundEnd) {
5202     return TokError(Twine("expected directive ") +
5203                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5204   }
5205 
5206   CollectStream.flush();
5207   return false;
5208 }
5209 
5210 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5211 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5212   std::string String;
5213   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5214                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5215     return true;
5216 
5217   auto PALMetadata = getTargetStreamer().getPALMetadata();
5218   if (!PALMetadata->setFromString(String))
5219     return Error(getLoc(), "invalid PAL metadata");
5220   return false;
5221 }
5222 
5223 /// Parse the assembler directive for old linear-format PAL metadata.
5224 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5225   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5226     return Error(getLoc(),
5227                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5228                  "not available on non-amdpal OSes")).str());
5229   }
5230 
5231   auto PALMetadata = getTargetStreamer().getPALMetadata();
5232   PALMetadata->setLegacy();
5233   for (;;) {
5234     uint32_t Key, Value;
5235     if (ParseAsAbsoluteExpression(Key)) {
5236       return TokError(Twine("invalid value in ") +
5237                       Twine(PALMD::AssemblerDirective));
5238     }
5239     if (!trySkipToken(AsmToken::Comma)) {
5240       return TokError(Twine("expected an even number of values in ") +
5241                       Twine(PALMD::AssemblerDirective));
5242     }
5243     if (ParseAsAbsoluteExpression(Value)) {
5244       return TokError(Twine("invalid value in ") +
5245                       Twine(PALMD::AssemblerDirective));
5246     }
5247     PALMetadata->setRegister(Key, Value);
5248     if (!trySkipToken(AsmToken::Comma))
5249       break;
5250   }
5251   return false;
5252 }
5253 
5254 /// ParseDirectiveAMDGPULDS
5255 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5256 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5257   if (getParser().checkForValidSection())
5258     return true;
5259 
5260   StringRef Name;
5261   SMLoc NameLoc = getLoc();
5262   if (getParser().parseIdentifier(Name))
5263     return TokError("expected identifier in directive");
5264 
5265   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5266   if (parseToken(AsmToken::Comma, "expected ','"))
5267     return true;
5268 
5269   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5270 
5271   int64_t Size;
5272   SMLoc SizeLoc = getLoc();
5273   if (getParser().parseAbsoluteExpression(Size))
5274     return true;
5275   if (Size < 0)
5276     return Error(SizeLoc, "size must be non-negative");
5277   if (Size > LocalMemorySize)
5278     return Error(SizeLoc, "size is too large");
5279 
5280   int64_t Alignment = 4;
5281   if (trySkipToken(AsmToken::Comma)) {
5282     SMLoc AlignLoc = getLoc();
5283     if (getParser().parseAbsoluteExpression(Alignment))
5284       return true;
5285     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5286       return Error(AlignLoc, "alignment must be a power of two");
5287 
5288     // Alignment larger than the size of LDS is possible in theory, as long
5289     // as the linker manages to place to symbol at address 0, but we do want
5290     // to make sure the alignment fits nicely into a 32-bit integer.
5291     if (Alignment >= 1u << 31)
5292       return Error(AlignLoc, "alignment is too large");
5293   }
5294 
5295   if (parseToken(AsmToken::EndOfStatement,
5296                  "unexpected token in '.amdgpu_lds' directive"))
5297     return true;
5298 
5299   Symbol->redefineIfPossible();
5300   if (!Symbol->isUndefined())
5301     return Error(NameLoc, "invalid symbol redefinition");
5302 
5303   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5304   return false;
5305 }
5306 
5307 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5308   StringRef IDVal = DirectiveID.getString();
5309 
5310   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5311     if (IDVal == ".amdhsa_kernel")
5312      return ParseDirectiveAMDHSAKernel();
5313 
5314     // TODO: Restructure/combine with PAL metadata directive.
5315     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5316       return ParseDirectiveHSAMetadata();
5317   } else {
5318     if (IDVal == ".hsa_code_object_version")
5319       return ParseDirectiveHSACodeObjectVersion();
5320 
5321     if (IDVal == ".hsa_code_object_isa")
5322       return ParseDirectiveHSACodeObjectISA();
5323 
5324     if (IDVal == ".amd_kernel_code_t")
5325       return ParseDirectiveAMDKernelCodeT();
5326 
5327     if (IDVal == ".amdgpu_hsa_kernel")
5328       return ParseDirectiveAMDGPUHsaKernel();
5329 
5330     if (IDVal == ".amd_amdgpu_isa")
5331       return ParseDirectiveISAVersion();
5332 
5333     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5334       return ParseDirectiveHSAMetadata();
5335   }
5336 
5337   if (IDVal == ".amdgcn_target")
5338     return ParseDirectiveAMDGCNTarget();
5339 
5340   if (IDVal == ".amdgpu_lds")
5341     return ParseDirectiveAMDGPULDS();
5342 
5343   if (IDVal == PALMD::AssemblerDirectiveBegin)
5344     return ParseDirectivePALMetadataBegin();
5345 
5346   if (IDVal == PALMD::AssemblerDirective)
5347     return ParseDirectivePALMetadata();
5348 
5349   return true;
5350 }
5351 
5352 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5353                                            unsigned RegNo) {
5354 
5355   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5356     return isGFX9Plus();
5357 
5358   // GFX10 has 2 more SGPRs 104 and 105.
5359   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5360     return hasSGPR104_SGPR105();
5361 
5362   switch (RegNo) {
5363   case AMDGPU::SRC_SHARED_BASE:
5364   case AMDGPU::SRC_SHARED_LIMIT:
5365   case AMDGPU::SRC_PRIVATE_BASE:
5366   case AMDGPU::SRC_PRIVATE_LIMIT:
5367   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5368     return isGFX9Plus();
5369   case AMDGPU::TBA:
5370   case AMDGPU::TBA_LO:
5371   case AMDGPU::TBA_HI:
5372   case AMDGPU::TMA:
5373   case AMDGPU::TMA_LO:
5374   case AMDGPU::TMA_HI:
5375     return !isGFX9Plus();
5376   case AMDGPU::XNACK_MASK:
5377   case AMDGPU::XNACK_MASK_LO:
5378   case AMDGPU::XNACK_MASK_HI:
5379     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5380   case AMDGPU::SGPR_NULL:
5381     return isGFX10Plus();
5382   default:
5383     break;
5384   }
5385 
5386   if (isCI())
5387     return true;
5388 
5389   if (isSI() || isGFX10Plus()) {
5390     // No flat_scr on SI.
5391     // On GFX10 flat scratch is not a valid register operand and can only be
5392     // accessed with s_setreg/s_getreg.
5393     switch (RegNo) {
5394     case AMDGPU::FLAT_SCR:
5395     case AMDGPU::FLAT_SCR_LO:
5396     case AMDGPU::FLAT_SCR_HI:
5397       return false;
5398     default:
5399       return true;
5400     }
5401   }
5402 
5403   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5404   // SI/CI have.
5405   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5406     return hasSGPR102_SGPR103();
5407 
5408   return true;
5409 }
5410 
5411 OperandMatchResultTy
5412 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5413                               OperandMode Mode) {
5414   // Try to parse with a custom parser
5415   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5416 
5417   // If we successfully parsed the operand or if there as an error parsing,
5418   // we are done.
5419   //
5420   // If we are parsing after we reach EndOfStatement then this means we
5421   // are appending default values to the Operands list.  This is only done
5422   // by custom parser, so we shouldn't continue on to the generic parsing.
5423   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5424       isToken(AsmToken::EndOfStatement))
5425     return ResTy;
5426 
5427   SMLoc RBraceLoc;
5428   SMLoc LBraceLoc = getLoc();
5429   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5430     unsigned Prefix = Operands.size();
5431 
5432     for (;;) {
5433       auto Loc = getLoc();
5434       ResTy = parseReg(Operands);
5435       if (ResTy == MatchOperand_NoMatch)
5436         Error(Loc, "expected a register");
5437       if (ResTy != MatchOperand_Success)
5438         return MatchOperand_ParseFail;
5439 
5440       RBraceLoc = getLoc();
5441       if (trySkipToken(AsmToken::RBrac))
5442         break;
5443 
5444       if (!skipToken(AsmToken::Comma,
5445                      "expected a comma or a closing square bracket")) {
5446         return MatchOperand_ParseFail;
5447       }
5448     }
5449 
5450     if (Operands.size() - Prefix > 1) {
5451       Operands.insert(Operands.begin() + Prefix,
5452                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5453       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5454     }
5455 
5456     return MatchOperand_Success;
5457   }
5458 
5459   return parseRegOrImm(Operands);
5460 }
5461 
5462 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5463   // Clear any forced encodings from the previous instruction.
5464   setForcedEncodingSize(0);
5465   setForcedDPP(false);
5466   setForcedSDWA(false);
5467 
5468   if (Name.endswith("_e64")) {
5469     setForcedEncodingSize(64);
5470     return Name.substr(0, Name.size() - 4);
5471   } else if (Name.endswith("_e32")) {
5472     setForcedEncodingSize(32);
5473     return Name.substr(0, Name.size() - 4);
5474   } else if (Name.endswith("_dpp")) {
5475     setForcedDPP(true);
5476     return Name.substr(0, Name.size() - 4);
5477   } else if (Name.endswith("_sdwa")) {
5478     setForcedSDWA(true);
5479     return Name.substr(0, Name.size() - 5);
5480   }
5481   return Name;
5482 }
5483 
5484 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5485                                        StringRef Name,
5486                                        SMLoc NameLoc, OperandVector &Operands) {
5487   // Add the instruction mnemonic
5488   Name = parseMnemonicSuffix(Name);
5489   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5490 
5491   bool IsMIMG = Name.startswith("image_");
5492 
5493   while (!trySkipToken(AsmToken::EndOfStatement)) {
5494     OperandMode Mode = OperandMode_Default;
5495     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5496       Mode = OperandMode_NSA;
5497     CPolSeen = 0;
5498     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5499 
5500     if (Res != MatchOperand_Success) {
5501       checkUnsupportedInstruction(Name, NameLoc);
5502       if (!Parser.hasPendingError()) {
5503         // FIXME: use real operand location rather than the current location.
5504         StringRef Msg =
5505           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5506                                             "not a valid operand.";
5507         Error(getLoc(), Msg);
5508       }
5509       while (!trySkipToken(AsmToken::EndOfStatement)) {
5510         lex();
5511       }
5512       return true;
5513     }
5514 
5515     // Eat the comma or space if there is one.
5516     trySkipToken(AsmToken::Comma);
5517   }
5518 
5519   return false;
5520 }
5521 
5522 //===----------------------------------------------------------------------===//
5523 // Utility functions
5524 //===----------------------------------------------------------------------===//
5525 
5526 OperandMatchResultTy
5527 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5528 
5529   if (!trySkipId(Prefix, AsmToken::Colon))
5530     return MatchOperand_NoMatch;
5531 
5532   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5533 }
5534 
5535 OperandMatchResultTy
5536 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5537                                     AMDGPUOperand::ImmTy ImmTy,
5538                                     bool (*ConvertResult)(int64_t&)) {
5539   SMLoc S = getLoc();
5540   int64_t Value = 0;
5541 
5542   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5543   if (Res != MatchOperand_Success)
5544     return Res;
5545 
5546   if (ConvertResult && !ConvertResult(Value)) {
5547     Error(S, "invalid " + StringRef(Prefix) + " value.");
5548   }
5549 
5550   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5551   return MatchOperand_Success;
5552 }
5553 
5554 OperandMatchResultTy
5555 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5556                                              OperandVector &Operands,
5557                                              AMDGPUOperand::ImmTy ImmTy,
5558                                              bool (*ConvertResult)(int64_t&)) {
5559   SMLoc S = getLoc();
5560   if (!trySkipId(Prefix, AsmToken::Colon))
5561     return MatchOperand_NoMatch;
5562 
5563   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5564     return MatchOperand_ParseFail;
5565 
5566   unsigned Val = 0;
5567   const unsigned MaxSize = 4;
5568 
5569   // FIXME: How to verify the number of elements matches the number of src
5570   // operands?
5571   for (int I = 0; ; ++I) {
5572     int64_t Op;
5573     SMLoc Loc = getLoc();
5574     if (!parseExpr(Op))
5575       return MatchOperand_ParseFail;
5576 
5577     if (Op != 0 && Op != 1) {
5578       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5579       return MatchOperand_ParseFail;
5580     }
5581 
5582     Val |= (Op << I);
5583 
5584     if (trySkipToken(AsmToken::RBrac))
5585       break;
5586 
5587     if (I + 1 == MaxSize) {
5588       Error(getLoc(), "expected a closing square bracket");
5589       return MatchOperand_ParseFail;
5590     }
5591 
5592     if (!skipToken(AsmToken::Comma, "expected a comma"))
5593       return MatchOperand_ParseFail;
5594   }
5595 
5596   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5597   return MatchOperand_Success;
5598 }
5599 
5600 OperandMatchResultTy
5601 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5602                                AMDGPUOperand::ImmTy ImmTy) {
5603   int64_t Bit;
5604   SMLoc S = getLoc();
5605 
5606   if (trySkipId(Name)) {
5607     Bit = 1;
5608   } else if (trySkipId("no", Name)) {
5609     Bit = 0;
5610   } else {
5611     return MatchOperand_NoMatch;
5612   }
5613 
5614   if (Name == "r128" && !hasMIMG_R128()) {
5615     Error(S, "r128 modifier is not supported on this GPU");
5616     return MatchOperand_ParseFail;
5617   }
5618   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5619     Error(S, "a16 modifier is not supported on this GPU");
5620     return MatchOperand_ParseFail;
5621   }
5622 
5623   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5624     ImmTy = AMDGPUOperand::ImmTyR128A16;
5625 
5626   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5627   return MatchOperand_Success;
5628 }
5629 
5630 OperandMatchResultTy
5631 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5632   unsigned CPolOn = 0;
5633   unsigned CPolOff = 0;
5634   SMLoc S = getLoc();
5635 
5636   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5637   if (isGFX940() && !Mnemo.startswith("s_")) {
5638     if (trySkipId("sc0"))
5639       CPolOn = AMDGPU::CPol::SC0;
5640     else if (trySkipId("nosc0"))
5641       CPolOff = AMDGPU::CPol::SC0;
5642     else if (trySkipId("nt"))
5643       CPolOn = AMDGPU::CPol::NT;
5644     else if (trySkipId("nont"))
5645       CPolOff = AMDGPU::CPol::NT;
5646     else if (trySkipId("sc1"))
5647       CPolOn = AMDGPU::CPol::SC1;
5648     else if (trySkipId("nosc1"))
5649       CPolOff = AMDGPU::CPol::SC1;
5650     else
5651       return MatchOperand_NoMatch;
5652   }
5653   else if (trySkipId("glc"))
5654     CPolOn = AMDGPU::CPol::GLC;
5655   else if (trySkipId("noglc"))
5656     CPolOff = AMDGPU::CPol::GLC;
5657   else if (trySkipId("slc"))
5658     CPolOn = AMDGPU::CPol::SLC;
5659   else if (trySkipId("noslc"))
5660     CPolOff = AMDGPU::CPol::SLC;
5661   else if (trySkipId("dlc"))
5662     CPolOn = AMDGPU::CPol::DLC;
5663   else if (trySkipId("nodlc"))
5664     CPolOff = AMDGPU::CPol::DLC;
5665   else if (trySkipId("scc"))
5666     CPolOn = AMDGPU::CPol::SCC;
5667   else if (trySkipId("noscc"))
5668     CPolOff = AMDGPU::CPol::SCC;
5669   else
5670     return MatchOperand_NoMatch;
5671 
5672   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5673     Error(S, "dlc modifier is not supported on this GPU");
5674     return MatchOperand_ParseFail;
5675   }
5676 
5677   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5678     Error(S, "scc modifier is not supported on this GPU");
5679     return MatchOperand_ParseFail;
5680   }
5681 
5682   if (CPolSeen & (CPolOn | CPolOff)) {
5683     Error(S, "duplicate cache policy modifier");
5684     return MatchOperand_ParseFail;
5685   }
5686 
5687   CPolSeen |= (CPolOn | CPolOff);
5688 
5689   for (unsigned I = 1; I != Operands.size(); ++I) {
5690     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5691     if (Op.isCPol()) {
5692       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5693       return MatchOperand_Success;
5694     }
5695   }
5696 
5697   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5698                                               AMDGPUOperand::ImmTyCPol));
5699 
5700   return MatchOperand_Success;
5701 }
5702 
5703 static void addOptionalImmOperand(
5704   MCInst& Inst, const OperandVector& Operands,
5705   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5706   AMDGPUOperand::ImmTy ImmT,
5707   int64_t Default = 0) {
5708   auto i = OptionalIdx.find(ImmT);
5709   if (i != OptionalIdx.end()) {
5710     unsigned Idx = i->second;
5711     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5712   } else {
5713     Inst.addOperand(MCOperand::createImm(Default));
5714   }
5715 }
5716 
5717 OperandMatchResultTy
5718 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5719                                        StringRef &Value,
5720                                        SMLoc &StringLoc) {
5721   if (!trySkipId(Prefix, AsmToken::Colon))
5722     return MatchOperand_NoMatch;
5723 
5724   StringLoc = getLoc();
5725   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5726                                                   : MatchOperand_ParseFail;
5727 }
5728 
5729 //===----------------------------------------------------------------------===//
5730 // MTBUF format
5731 //===----------------------------------------------------------------------===//
5732 
5733 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5734                                   int64_t MaxVal,
5735                                   int64_t &Fmt) {
5736   int64_t Val;
5737   SMLoc Loc = getLoc();
5738 
5739   auto Res = parseIntWithPrefix(Pref, Val);
5740   if (Res == MatchOperand_ParseFail)
5741     return false;
5742   if (Res == MatchOperand_NoMatch)
5743     return true;
5744 
5745   if (Val < 0 || Val > MaxVal) {
5746     Error(Loc, Twine("out of range ", StringRef(Pref)));
5747     return false;
5748   }
5749 
5750   Fmt = Val;
5751   return true;
5752 }
5753 
5754 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5755 // values to live in a joint format operand in the MCInst encoding.
5756 OperandMatchResultTy
5757 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5758   using namespace llvm::AMDGPU::MTBUFFormat;
5759 
5760   int64_t Dfmt = DFMT_UNDEF;
5761   int64_t Nfmt = NFMT_UNDEF;
5762 
5763   // dfmt and nfmt can appear in either order, and each is optional.
5764   for (int I = 0; I < 2; ++I) {
5765     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5766       return MatchOperand_ParseFail;
5767 
5768     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5769       return MatchOperand_ParseFail;
5770     }
5771     // Skip optional comma between dfmt/nfmt
5772     // but guard against 2 commas following each other.
5773     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5774         !peekToken().is(AsmToken::Comma)) {
5775       trySkipToken(AsmToken::Comma);
5776     }
5777   }
5778 
5779   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5780     return MatchOperand_NoMatch;
5781 
5782   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5783   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5784 
5785   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5786   return MatchOperand_Success;
5787 }
5788 
5789 OperandMatchResultTy
5790 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5791   using namespace llvm::AMDGPU::MTBUFFormat;
5792 
5793   int64_t Fmt = UFMT_UNDEF;
5794 
5795   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5796     return MatchOperand_ParseFail;
5797 
5798   if (Fmt == UFMT_UNDEF)
5799     return MatchOperand_NoMatch;
5800 
5801   Format = Fmt;
5802   return MatchOperand_Success;
5803 }
5804 
5805 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5806                                     int64_t &Nfmt,
5807                                     StringRef FormatStr,
5808                                     SMLoc Loc) {
5809   using namespace llvm::AMDGPU::MTBUFFormat;
5810   int64_t Format;
5811 
5812   Format = getDfmt(FormatStr);
5813   if (Format != DFMT_UNDEF) {
5814     Dfmt = Format;
5815     return true;
5816   }
5817 
5818   Format = getNfmt(FormatStr, getSTI());
5819   if (Format != NFMT_UNDEF) {
5820     Nfmt = Format;
5821     return true;
5822   }
5823 
5824   Error(Loc, "unsupported format");
5825   return false;
5826 }
5827 
5828 OperandMatchResultTy
5829 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5830                                           SMLoc FormatLoc,
5831                                           int64_t &Format) {
5832   using namespace llvm::AMDGPU::MTBUFFormat;
5833 
5834   int64_t Dfmt = DFMT_UNDEF;
5835   int64_t Nfmt = NFMT_UNDEF;
5836   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5837     return MatchOperand_ParseFail;
5838 
5839   if (trySkipToken(AsmToken::Comma)) {
5840     StringRef Str;
5841     SMLoc Loc = getLoc();
5842     if (!parseId(Str, "expected a format string") ||
5843         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5844       return MatchOperand_ParseFail;
5845     }
5846     if (Dfmt == DFMT_UNDEF) {
5847       Error(Loc, "duplicate numeric format");
5848       return MatchOperand_ParseFail;
5849     } else if (Nfmt == NFMT_UNDEF) {
5850       Error(Loc, "duplicate data format");
5851       return MatchOperand_ParseFail;
5852     }
5853   }
5854 
5855   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5856   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5857 
5858   if (isGFX10Plus()) {
5859     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5860     if (Ufmt == UFMT_UNDEF) {
5861       Error(FormatLoc, "unsupported format");
5862       return MatchOperand_ParseFail;
5863     }
5864     Format = Ufmt;
5865   } else {
5866     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5867   }
5868 
5869   return MatchOperand_Success;
5870 }
5871 
5872 OperandMatchResultTy
5873 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5874                                             SMLoc Loc,
5875                                             int64_t &Format) {
5876   using namespace llvm::AMDGPU::MTBUFFormat;
5877 
5878   auto Id = getUnifiedFormat(FormatStr);
5879   if (Id == UFMT_UNDEF)
5880     return MatchOperand_NoMatch;
5881 
5882   if (!isGFX10Plus()) {
5883     Error(Loc, "unified format is not supported on this GPU");
5884     return MatchOperand_ParseFail;
5885   }
5886 
5887   Format = Id;
5888   return MatchOperand_Success;
5889 }
5890 
5891 OperandMatchResultTy
5892 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5893   using namespace llvm::AMDGPU::MTBUFFormat;
5894   SMLoc Loc = getLoc();
5895 
5896   if (!parseExpr(Format))
5897     return MatchOperand_ParseFail;
5898   if (!isValidFormatEncoding(Format, getSTI())) {
5899     Error(Loc, "out of range format");
5900     return MatchOperand_ParseFail;
5901   }
5902 
5903   return MatchOperand_Success;
5904 }
5905 
5906 OperandMatchResultTy
5907 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5908   using namespace llvm::AMDGPU::MTBUFFormat;
5909 
5910   if (!trySkipId("format", AsmToken::Colon))
5911     return MatchOperand_NoMatch;
5912 
5913   if (trySkipToken(AsmToken::LBrac)) {
5914     StringRef FormatStr;
5915     SMLoc Loc = getLoc();
5916     if (!parseId(FormatStr, "expected a format string"))
5917       return MatchOperand_ParseFail;
5918 
5919     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5920     if (Res == MatchOperand_NoMatch)
5921       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5922     if (Res != MatchOperand_Success)
5923       return Res;
5924 
5925     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5926       return MatchOperand_ParseFail;
5927 
5928     return MatchOperand_Success;
5929   }
5930 
5931   return parseNumericFormat(Format);
5932 }
5933 
5934 OperandMatchResultTy
5935 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5936   using namespace llvm::AMDGPU::MTBUFFormat;
5937 
5938   int64_t Format = getDefaultFormatEncoding(getSTI());
5939   OperandMatchResultTy Res;
5940   SMLoc Loc = getLoc();
5941 
5942   // Parse legacy format syntax.
5943   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5944   if (Res == MatchOperand_ParseFail)
5945     return Res;
5946 
5947   bool FormatFound = (Res == MatchOperand_Success);
5948 
5949   Operands.push_back(
5950     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5951 
5952   if (FormatFound)
5953     trySkipToken(AsmToken::Comma);
5954 
5955   if (isToken(AsmToken::EndOfStatement)) {
5956     // We are expecting an soffset operand,
5957     // but let matcher handle the error.
5958     return MatchOperand_Success;
5959   }
5960 
5961   // Parse soffset.
5962   Res = parseRegOrImm(Operands);
5963   if (Res != MatchOperand_Success)
5964     return Res;
5965 
5966   trySkipToken(AsmToken::Comma);
5967 
5968   if (!FormatFound) {
5969     Res = parseSymbolicOrNumericFormat(Format);
5970     if (Res == MatchOperand_ParseFail)
5971       return Res;
5972     if (Res == MatchOperand_Success) {
5973       auto Size = Operands.size();
5974       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5975       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5976       Op.setImm(Format);
5977     }
5978     return MatchOperand_Success;
5979   }
5980 
5981   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5982     Error(getLoc(), "duplicate format");
5983     return MatchOperand_ParseFail;
5984   }
5985   return MatchOperand_Success;
5986 }
5987 
5988 //===----------------------------------------------------------------------===//
5989 // ds
5990 //===----------------------------------------------------------------------===//
5991 
5992 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5993                                     const OperandVector &Operands) {
5994   OptionalImmIndexMap OptionalIdx;
5995 
5996   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5997     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5998 
5999     // Add the register arguments
6000     if (Op.isReg()) {
6001       Op.addRegOperands(Inst, 1);
6002       continue;
6003     }
6004 
6005     // Handle optional arguments
6006     OptionalIdx[Op.getImmTy()] = i;
6007   }
6008 
6009   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6010   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6011   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6012 
6013   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6014 }
6015 
6016 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6017                                 bool IsGdsHardcoded) {
6018   OptionalImmIndexMap OptionalIdx;
6019 
6020   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6021     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6022 
6023     // Add the register arguments
6024     if (Op.isReg()) {
6025       Op.addRegOperands(Inst, 1);
6026       continue;
6027     }
6028 
6029     if (Op.isToken() && Op.getToken() == "gds") {
6030       IsGdsHardcoded = true;
6031       continue;
6032     }
6033 
6034     // Handle optional arguments
6035     OptionalIdx[Op.getImmTy()] = i;
6036   }
6037 
6038   AMDGPUOperand::ImmTy OffsetType =
6039     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6040      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6041      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6042                                                       AMDGPUOperand::ImmTyOffset;
6043 
6044   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6045 
6046   if (!IsGdsHardcoded) {
6047     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6048   }
6049   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6050 }
6051 
6052 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6053   OptionalImmIndexMap OptionalIdx;
6054 
6055   unsigned OperandIdx[4];
6056   unsigned EnMask = 0;
6057   int SrcIdx = 0;
6058 
6059   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6060     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6061 
6062     // Add the register arguments
6063     if (Op.isReg()) {
6064       assert(SrcIdx < 4);
6065       OperandIdx[SrcIdx] = Inst.size();
6066       Op.addRegOperands(Inst, 1);
6067       ++SrcIdx;
6068       continue;
6069     }
6070 
6071     if (Op.isOff()) {
6072       assert(SrcIdx < 4);
6073       OperandIdx[SrcIdx] = Inst.size();
6074       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6075       ++SrcIdx;
6076       continue;
6077     }
6078 
6079     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6080       Op.addImmOperands(Inst, 1);
6081       continue;
6082     }
6083 
6084     if (Op.isToken() && Op.getToken() == "done")
6085       continue;
6086 
6087     // Handle optional arguments
6088     OptionalIdx[Op.getImmTy()] = i;
6089   }
6090 
6091   assert(SrcIdx == 4);
6092 
6093   bool Compr = false;
6094   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6095     Compr = true;
6096     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6097     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6098     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6099   }
6100 
6101   for (auto i = 0; i < SrcIdx; ++i) {
6102     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6103       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6104     }
6105   }
6106 
6107   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6108   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6109 
6110   Inst.addOperand(MCOperand::createImm(EnMask));
6111 }
6112 
6113 //===----------------------------------------------------------------------===//
6114 // s_waitcnt
6115 //===----------------------------------------------------------------------===//
6116 
6117 static bool
6118 encodeCnt(
6119   const AMDGPU::IsaVersion ISA,
6120   int64_t &IntVal,
6121   int64_t CntVal,
6122   bool Saturate,
6123   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6124   unsigned (*decode)(const IsaVersion &Version, unsigned))
6125 {
6126   bool Failed = false;
6127 
6128   IntVal = encode(ISA, IntVal, CntVal);
6129   if (CntVal != decode(ISA, IntVal)) {
6130     if (Saturate) {
6131       IntVal = encode(ISA, IntVal, -1);
6132     } else {
6133       Failed = true;
6134     }
6135   }
6136   return Failed;
6137 }
6138 
6139 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6140 
6141   SMLoc CntLoc = getLoc();
6142   StringRef CntName = getTokenStr();
6143 
6144   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6145       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6146     return false;
6147 
6148   int64_t CntVal;
6149   SMLoc ValLoc = getLoc();
6150   if (!parseExpr(CntVal))
6151     return false;
6152 
6153   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6154 
6155   bool Failed = true;
6156   bool Sat = CntName.endswith("_sat");
6157 
6158   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6159     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6160   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6161     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6162   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6163     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6164   } else {
6165     Error(CntLoc, "invalid counter name " + CntName);
6166     return false;
6167   }
6168 
6169   if (Failed) {
6170     Error(ValLoc, "too large value for " + CntName);
6171     return false;
6172   }
6173 
6174   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6175     return false;
6176 
6177   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6178     if (isToken(AsmToken::EndOfStatement)) {
6179       Error(getLoc(), "expected a counter name");
6180       return false;
6181     }
6182   }
6183 
6184   return true;
6185 }
6186 
6187 OperandMatchResultTy
6188 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6189   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6190   int64_t Waitcnt = getWaitcntBitMask(ISA);
6191   SMLoc S = getLoc();
6192 
6193   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6194     while (!isToken(AsmToken::EndOfStatement)) {
6195       if (!parseCnt(Waitcnt))
6196         return MatchOperand_ParseFail;
6197     }
6198   } else {
6199     if (!parseExpr(Waitcnt))
6200       return MatchOperand_ParseFail;
6201   }
6202 
6203   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6204   return MatchOperand_Success;
6205 }
6206 
6207 bool
6208 AMDGPUOperand::isSWaitCnt() const {
6209   return isImm();
6210 }
6211 
6212 //===----------------------------------------------------------------------===//
6213 // hwreg
6214 //===----------------------------------------------------------------------===//
6215 
6216 bool
6217 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6218                                 OperandInfoTy &Offset,
6219                                 OperandInfoTy &Width) {
6220   using namespace llvm::AMDGPU::Hwreg;
6221 
6222   // The register may be specified by name or using a numeric code
6223   HwReg.Loc = getLoc();
6224   if (isToken(AsmToken::Identifier) &&
6225       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) >= 0) {
6226     HwReg.IsSymbolic = true;
6227     lex(); // skip register name
6228   } else if (!parseExpr(HwReg.Id, "a register name")) {
6229     return false;
6230   }
6231 
6232   if (trySkipToken(AsmToken::RParen))
6233     return true;
6234 
6235   // parse optional params
6236   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6237     return false;
6238 
6239   Offset.Loc = getLoc();
6240   if (!parseExpr(Offset.Id))
6241     return false;
6242 
6243   if (!skipToken(AsmToken::Comma, "expected a comma"))
6244     return false;
6245 
6246   Width.Loc = getLoc();
6247   return parseExpr(Width.Id) &&
6248          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6249 }
6250 
6251 bool
6252 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6253                                const OperandInfoTy &Offset,
6254                                const OperandInfoTy &Width) {
6255 
6256   using namespace llvm::AMDGPU::Hwreg;
6257 
6258   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6259     Error(HwReg.Loc,
6260           "specified hardware register is not supported on this GPU");
6261     return false;
6262   }
6263   if (!isValidHwreg(HwReg.Id)) {
6264     Error(HwReg.Loc,
6265           "invalid code of hardware register: only 6-bit values are legal");
6266     return false;
6267   }
6268   if (!isValidHwregOffset(Offset.Id)) {
6269     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6270     return false;
6271   }
6272   if (!isValidHwregWidth(Width.Id)) {
6273     Error(Width.Loc,
6274           "invalid bitfield width: only values from 1 to 32 are legal");
6275     return false;
6276   }
6277   return true;
6278 }
6279 
6280 OperandMatchResultTy
6281 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6282   using namespace llvm::AMDGPU::Hwreg;
6283 
6284   int64_t ImmVal = 0;
6285   SMLoc Loc = getLoc();
6286 
6287   if (trySkipId("hwreg", AsmToken::LParen)) {
6288     OperandInfoTy HwReg(ID_UNKNOWN_);
6289     OperandInfoTy Offset(OFFSET_DEFAULT_);
6290     OperandInfoTy Width(WIDTH_DEFAULT_);
6291     if (parseHwregBody(HwReg, Offset, Width) &&
6292         validateHwreg(HwReg, Offset, Width)) {
6293       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6294     } else {
6295       return MatchOperand_ParseFail;
6296     }
6297   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6298     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6299       Error(Loc, "invalid immediate: only 16-bit values are legal");
6300       return MatchOperand_ParseFail;
6301     }
6302   } else {
6303     return MatchOperand_ParseFail;
6304   }
6305 
6306   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6307   return MatchOperand_Success;
6308 }
6309 
6310 bool AMDGPUOperand::isHwreg() const {
6311   return isImmTy(ImmTyHwreg);
6312 }
6313 
6314 //===----------------------------------------------------------------------===//
6315 // sendmsg
6316 //===----------------------------------------------------------------------===//
6317 
6318 bool
6319 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6320                                   OperandInfoTy &Op,
6321                                   OperandInfoTy &Stream) {
6322   using namespace llvm::AMDGPU::SendMsg;
6323 
6324   Msg.Loc = getLoc();
6325   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6326     Msg.IsSymbolic = true;
6327     lex(); // skip message name
6328   } else if (!parseExpr(Msg.Id, "a message name")) {
6329     return false;
6330   }
6331 
6332   if (trySkipToken(AsmToken::Comma)) {
6333     Op.IsDefined = true;
6334     Op.Loc = getLoc();
6335     if (isToken(AsmToken::Identifier) &&
6336         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6337       lex(); // skip operation name
6338     } else if (!parseExpr(Op.Id, "an operation name")) {
6339       return false;
6340     }
6341 
6342     if (trySkipToken(AsmToken::Comma)) {
6343       Stream.IsDefined = true;
6344       Stream.Loc = getLoc();
6345       if (!parseExpr(Stream.Id))
6346         return false;
6347     }
6348   }
6349 
6350   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6351 }
6352 
6353 bool
6354 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6355                                  const OperandInfoTy &Op,
6356                                  const OperandInfoTy &Stream) {
6357   using namespace llvm::AMDGPU::SendMsg;
6358 
6359   // Validation strictness depends on whether message is specified
6360   // in a symbolic or in a numeric form. In the latter case
6361   // only encoding possibility is checked.
6362   bool Strict = Msg.IsSymbolic;
6363 
6364   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6365     Error(Msg.Loc, "invalid message id");
6366     return false;
6367   }
6368   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6369     if (Op.IsDefined) {
6370       Error(Op.Loc, "message does not support operations");
6371     } else {
6372       Error(Msg.Loc, "missing message operation");
6373     }
6374     return false;
6375   }
6376   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6377     Error(Op.Loc, "invalid operation id");
6378     return false;
6379   }
6380   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6381     Error(Stream.Loc, "message operation does not support streams");
6382     return false;
6383   }
6384   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6385     Error(Stream.Loc, "invalid message stream id");
6386     return false;
6387   }
6388   return true;
6389 }
6390 
6391 OperandMatchResultTy
6392 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6393   using namespace llvm::AMDGPU::SendMsg;
6394 
6395   int64_t ImmVal = 0;
6396   SMLoc Loc = getLoc();
6397 
6398   if (trySkipId("sendmsg", AsmToken::LParen)) {
6399     OperandInfoTy Msg(ID_UNKNOWN_);
6400     OperandInfoTy Op(OP_NONE_);
6401     OperandInfoTy Stream(STREAM_ID_NONE_);
6402     if (parseSendMsgBody(Msg, Op, Stream) &&
6403         validateSendMsg(Msg, Op, Stream)) {
6404       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6405     } else {
6406       return MatchOperand_ParseFail;
6407     }
6408   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6409     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6410       Error(Loc, "invalid immediate: only 16-bit values are legal");
6411       return MatchOperand_ParseFail;
6412     }
6413   } else {
6414     return MatchOperand_ParseFail;
6415   }
6416 
6417   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6418   return MatchOperand_Success;
6419 }
6420 
6421 bool AMDGPUOperand::isSendMsg() const {
6422   return isImmTy(ImmTySendMsg);
6423 }
6424 
6425 //===----------------------------------------------------------------------===//
6426 // v_interp
6427 //===----------------------------------------------------------------------===//
6428 
6429 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6430   StringRef Str;
6431   SMLoc S = getLoc();
6432 
6433   if (!parseId(Str))
6434     return MatchOperand_NoMatch;
6435 
6436   int Slot = StringSwitch<int>(Str)
6437     .Case("p10", 0)
6438     .Case("p20", 1)
6439     .Case("p0", 2)
6440     .Default(-1);
6441 
6442   if (Slot == -1) {
6443     Error(S, "invalid interpolation slot");
6444     return MatchOperand_ParseFail;
6445   }
6446 
6447   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6448                                               AMDGPUOperand::ImmTyInterpSlot));
6449   return MatchOperand_Success;
6450 }
6451 
6452 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6453   StringRef Str;
6454   SMLoc S = getLoc();
6455 
6456   if (!parseId(Str))
6457     return MatchOperand_NoMatch;
6458 
6459   if (!Str.startswith("attr")) {
6460     Error(S, "invalid interpolation attribute");
6461     return MatchOperand_ParseFail;
6462   }
6463 
6464   StringRef Chan = Str.take_back(2);
6465   int AttrChan = StringSwitch<int>(Chan)
6466     .Case(".x", 0)
6467     .Case(".y", 1)
6468     .Case(".z", 2)
6469     .Case(".w", 3)
6470     .Default(-1);
6471   if (AttrChan == -1) {
6472     Error(S, "invalid or missing interpolation attribute channel");
6473     return MatchOperand_ParseFail;
6474   }
6475 
6476   Str = Str.drop_back(2).drop_front(4);
6477 
6478   uint8_t Attr;
6479   if (Str.getAsInteger(10, Attr)) {
6480     Error(S, "invalid or missing interpolation attribute number");
6481     return MatchOperand_ParseFail;
6482   }
6483 
6484   if (Attr > 63) {
6485     Error(S, "out of bounds interpolation attribute number");
6486     return MatchOperand_ParseFail;
6487   }
6488 
6489   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6490 
6491   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6492                                               AMDGPUOperand::ImmTyInterpAttr));
6493   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6494                                               AMDGPUOperand::ImmTyAttrChan));
6495   return MatchOperand_Success;
6496 }
6497 
6498 //===----------------------------------------------------------------------===//
6499 // exp
6500 //===----------------------------------------------------------------------===//
6501 
6502 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6503   using namespace llvm::AMDGPU::Exp;
6504 
6505   StringRef Str;
6506   SMLoc S = getLoc();
6507 
6508   if (!parseId(Str))
6509     return MatchOperand_NoMatch;
6510 
6511   unsigned Id = getTgtId(Str);
6512   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6513     Error(S, (Id == ET_INVALID) ?
6514                 "invalid exp target" :
6515                 "exp target is not supported on this GPU");
6516     return MatchOperand_ParseFail;
6517   }
6518 
6519   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6520                                               AMDGPUOperand::ImmTyExpTgt));
6521   return MatchOperand_Success;
6522 }
6523 
6524 //===----------------------------------------------------------------------===//
6525 // parser helpers
6526 //===----------------------------------------------------------------------===//
6527 
6528 bool
6529 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6530   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6531 }
6532 
6533 bool
6534 AMDGPUAsmParser::isId(const StringRef Id) const {
6535   return isId(getToken(), Id);
6536 }
6537 
6538 bool
6539 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6540   return getTokenKind() == Kind;
6541 }
6542 
6543 bool
6544 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6545   if (isId(Id)) {
6546     lex();
6547     return true;
6548   }
6549   return false;
6550 }
6551 
6552 bool
6553 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6554   if (isToken(AsmToken::Identifier)) {
6555     StringRef Tok = getTokenStr();
6556     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6557       lex();
6558       return true;
6559     }
6560   }
6561   return false;
6562 }
6563 
6564 bool
6565 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6566   if (isId(Id) && peekToken().is(Kind)) {
6567     lex();
6568     lex();
6569     return true;
6570   }
6571   return false;
6572 }
6573 
6574 bool
6575 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6576   if (isToken(Kind)) {
6577     lex();
6578     return true;
6579   }
6580   return false;
6581 }
6582 
6583 bool
6584 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6585                            const StringRef ErrMsg) {
6586   if (!trySkipToken(Kind)) {
6587     Error(getLoc(), ErrMsg);
6588     return false;
6589   }
6590   return true;
6591 }
6592 
6593 bool
6594 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6595   SMLoc S = getLoc();
6596 
6597   const MCExpr *Expr;
6598   if (Parser.parseExpression(Expr))
6599     return false;
6600 
6601   if (Expr->evaluateAsAbsolute(Imm))
6602     return true;
6603 
6604   if (Expected.empty()) {
6605     Error(S, "expected absolute expression");
6606   } else {
6607     Error(S, Twine("expected ", Expected) +
6608              Twine(" or an absolute expression"));
6609   }
6610   return false;
6611 }
6612 
6613 bool
6614 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6615   SMLoc S = getLoc();
6616 
6617   const MCExpr *Expr;
6618   if (Parser.parseExpression(Expr))
6619     return false;
6620 
6621   int64_t IntVal;
6622   if (Expr->evaluateAsAbsolute(IntVal)) {
6623     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6624   } else {
6625     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6626   }
6627   return true;
6628 }
6629 
6630 bool
6631 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6632   if (isToken(AsmToken::String)) {
6633     Val = getToken().getStringContents();
6634     lex();
6635     return true;
6636   } else {
6637     Error(getLoc(), ErrMsg);
6638     return false;
6639   }
6640 }
6641 
6642 bool
6643 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6644   if (isToken(AsmToken::Identifier)) {
6645     Val = getTokenStr();
6646     lex();
6647     return true;
6648   } else {
6649     if (!ErrMsg.empty())
6650       Error(getLoc(), ErrMsg);
6651     return false;
6652   }
6653 }
6654 
6655 AsmToken
6656 AMDGPUAsmParser::getToken() const {
6657   return Parser.getTok();
6658 }
6659 
6660 AsmToken
6661 AMDGPUAsmParser::peekToken() {
6662   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6663 }
6664 
6665 void
6666 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6667   auto TokCount = getLexer().peekTokens(Tokens);
6668 
6669   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6670     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6671 }
6672 
6673 AsmToken::TokenKind
6674 AMDGPUAsmParser::getTokenKind() const {
6675   return getLexer().getKind();
6676 }
6677 
6678 SMLoc
6679 AMDGPUAsmParser::getLoc() const {
6680   return getToken().getLoc();
6681 }
6682 
6683 StringRef
6684 AMDGPUAsmParser::getTokenStr() const {
6685   return getToken().getString();
6686 }
6687 
6688 void
6689 AMDGPUAsmParser::lex() {
6690   Parser.Lex();
6691 }
6692 
6693 SMLoc
6694 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6695                                const OperandVector &Operands) const {
6696   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6697     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6698     if (Test(Op))
6699       return Op.getStartLoc();
6700   }
6701   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6702 }
6703 
6704 SMLoc
6705 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6706                            const OperandVector &Operands) const {
6707   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6708   return getOperandLoc(Test, Operands);
6709 }
6710 
6711 SMLoc
6712 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6713                            const OperandVector &Operands) const {
6714   auto Test = [=](const AMDGPUOperand& Op) {
6715     return Op.isRegKind() && Op.getReg() == Reg;
6716   };
6717   return getOperandLoc(Test, Operands);
6718 }
6719 
6720 SMLoc
6721 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6722   auto Test = [](const AMDGPUOperand& Op) {
6723     return Op.IsImmKindLiteral() || Op.isExpr();
6724   };
6725   return getOperandLoc(Test, Operands);
6726 }
6727 
6728 SMLoc
6729 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6730   auto Test = [](const AMDGPUOperand& Op) {
6731     return Op.isImmKindConst();
6732   };
6733   return getOperandLoc(Test, Operands);
6734 }
6735 
6736 //===----------------------------------------------------------------------===//
6737 // swizzle
6738 //===----------------------------------------------------------------------===//
6739 
6740 LLVM_READNONE
6741 static unsigned
6742 encodeBitmaskPerm(const unsigned AndMask,
6743                   const unsigned OrMask,
6744                   const unsigned XorMask) {
6745   using namespace llvm::AMDGPU::Swizzle;
6746 
6747   return BITMASK_PERM_ENC |
6748          (AndMask << BITMASK_AND_SHIFT) |
6749          (OrMask  << BITMASK_OR_SHIFT)  |
6750          (XorMask << BITMASK_XOR_SHIFT);
6751 }
6752 
6753 bool
6754 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6755                                      const unsigned MinVal,
6756                                      const unsigned MaxVal,
6757                                      const StringRef ErrMsg,
6758                                      SMLoc &Loc) {
6759   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6760     return false;
6761   }
6762   Loc = getLoc();
6763   if (!parseExpr(Op)) {
6764     return false;
6765   }
6766   if (Op < MinVal || Op > MaxVal) {
6767     Error(Loc, ErrMsg);
6768     return false;
6769   }
6770 
6771   return true;
6772 }
6773 
6774 bool
6775 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6776                                       const unsigned MinVal,
6777                                       const unsigned MaxVal,
6778                                       const StringRef ErrMsg) {
6779   SMLoc Loc;
6780   for (unsigned i = 0; i < OpNum; ++i) {
6781     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6782       return false;
6783   }
6784 
6785   return true;
6786 }
6787 
6788 bool
6789 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6790   using namespace llvm::AMDGPU::Swizzle;
6791 
6792   int64_t Lane[LANE_NUM];
6793   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6794                            "expected a 2-bit lane id")) {
6795     Imm = QUAD_PERM_ENC;
6796     for (unsigned I = 0; I < LANE_NUM; ++I) {
6797       Imm |= Lane[I] << (LANE_SHIFT * I);
6798     }
6799     return true;
6800   }
6801   return false;
6802 }
6803 
6804 bool
6805 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6806   using namespace llvm::AMDGPU::Swizzle;
6807 
6808   SMLoc Loc;
6809   int64_t GroupSize;
6810   int64_t LaneIdx;
6811 
6812   if (!parseSwizzleOperand(GroupSize,
6813                            2, 32,
6814                            "group size must be in the interval [2,32]",
6815                            Loc)) {
6816     return false;
6817   }
6818   if (!isPowerOf2_64(GroupSize)) {
6819     Error(Loc, "group size must be a power of two");
6820     return false;
6821   }
6822   if (parseSwizzleOperand(LaneIdx,
6823                           0, GroupSize - 1,
6824                           "lane id must be in the interval [0,group size - 1]",
6825                           Loc)) {
6826     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6827     return true;
6828   }
6829   return false;
6830 }
6831 
6832 bool
6833 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6834   using namespace llvm::AMDGPU::Swizzle;
6835 
6836   SMLoc Loc;
6837   int64_t GroupSize;
6838 
6839   if (!parseSwizzleOperand(GroupSize,
6840                            2, 32,
6841                            "group size must be in the interval [2,32]",
6842                            Loc)) {
6843     return false;
6844   }
6845   if (!isPowerOf2_64(GroupSize)) {
6846     Error(Loc, "group size must be a power of two");
6847     return false;
6848   }
6849 
6850   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6851   return true;
6852 }
6853 
6854 bool
6855 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6856   using namespace llvm::AMDGPU::Swizzle;
6857 
6858   SMLoc Loc;
6859   int64_t GroupSize;
6860 
6861   if (!parseSwizzleOperand(GroupSize,
6862                            1, 16,
6863                            "group size must be in the interval [1,16]",
6864                            Loc)) {
6865     return false;
6866   }
6867   if (!isPowerOf2_64(GroupSize)) {
6868     Error(Loc, "group size must be a power of two");
6869     return false;
6870   }
6871 
6872   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6873   return true;
6874 }
6875 
6876 bool
6877 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6878   using namespace llvm::AMDGPU::Swizzle;
6879 
6880   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6881     return false;
6882   }
6883 
6884   StringRef Ctl;
6885   SMLoc StrLoc = getLoc();
6886   if (!parseString(Ctl)) {
6887     return false;
6888   }
6889   if (Ctl.size() != BITMASK_WIDTH) {
6890     Error(StrLoc, "expected a 5-character mask");
6891     return false;
6892   }
6893 
6894   unsigned AndMask = 0;
6895   unsigned OrMask = 0;
6896   unsigned XorMask = 0;
6897 
6898   for (size_t i = 0; i < Ctl.size(); ++i) {
6899     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6900     switch(Ctl[i]) {
6901     default:
6902       Error(StrLoc, "invalid mask");
6903       return false;
6904     case '0':
6905       break;
6906     case '1':
6907       OrMask |= Mask;
6908       break;
6909     case 'p':
6910       AndMask |= Mask;
6911       break;
6912     case 'i':
6913       AndMask |= Mask;
6914       XorMask |= Mask;
6915       break;
6916     }
6917   }
6918 
6919   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6920   return true;
6921 }
6922 
6923 bool
6924 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6925 
6926   SMLoc OffsetLoc = getLoc();
6927 
6928   if (!parseExpr(Imm, "a swizzle macro")) {
6929     return false;
6930   }
6931   if (!isUInt<16>(Imm)) {
6932     Error(OffsetLoc, "expected a 16-bit offset");
6933     return false;
6934   }
6935   return true;
6936 }
6937 
6938 bool
6939 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6940   using namespace llvm::AMDGPU::Swizzle;
6941 
6942   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6943 
6944     SMLoc ModeLoc = getLoc();
6945     bool Ok = false;
6946 
6947     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6948       Ok = parseSwizzleQuadPerm(Imm);
6949     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6950       Ok = parseSwizzleBitmaskPerm(Imm);
6951     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6952       Ok = parseSwizzleBroadcast(Imm);
6953     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6954       Ok = parseSwizzleSwap(Imm);
6955     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6956       Ok = parseSwizzleReverse(Imm);
6957     } else {
6958       Error(ModeLoc, "expected a swizzle mode");
6959     }
6960 
6961     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6962   }
6963 
6964   return false;
6965 }
6966 
6967 OperandMatchResultTy
6968 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6969   SMLoc S = getLoc();
6970   int64_t Imm = 0;
6971 
6972   if (trySkipId("offset")) {
6973 
6974     bool Ok = false;
6975     if (skipToken(AsmToken::Colon, "expected a colon")) {
6976       if (trySkipId("swizzle")) {
6977         Ok = parseSwizzleMacro(Imm);
6978       } else {
6979         Ok = parseSwizzleOffset(Imm);
6980       }
6981     }
6982 
6983     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6984 
6985     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6986   } else {
6987     // Swizzle "offset" operand is optional.
6988     // If it is omitted, try parsing other optional operands.
6989     return parseOptionalOpr(Operands);
6990   }
6991 }
6992 
6993 bool
6994 AMDGPUOperand::isSwizzle() const {
6995   return isImmTy(ImmTySwizzle);
6996 }
6997 
6998 //===----------------------------------------------------------------------===//
6999 // VGPR Index Mode
7000 //===----------------------------------------------------------------------===//
7001 
7002 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7003 
7004   using namespace llvm::AMDGPU::VGPRIndexMode;
7005 
7006   if (trySkipToken(AsmToken::RParen)) {
7007     return OFF;
7008   }
7009 
7010   int64_t Imm = 0;
7011 
7012   while (true) {
7013     unsigned Mode = 0;
7014     SMLoc S = getLoc();
7015 
7016     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7017       if (trySkipId(IdSymbolic[ModeId])) {
7018         Mode = 1 << ModeId;
7019         break;
7020       }
7021     }
7022 
7023     if (Mode == 0) {
7024       Error(S, (Imm == 0)?
7025                "expected a VGPR index mode or a closing parenthesis" :
7026                "expected a VGPR index mode");
7027       return UNDEF;
7028     }
7029 
7030     if (Imm & Mode) {
7031       Error(S, "duplicate VGPR index mode");
7032       return UNDEF;
7033     }
7034     Imm |= Mode;
7035 
7036     if (trySkipToken(AsmToken::RParen))
7037       break;
7038     if (!skipToken(AsmToken::Comma,
7039                    "expected a comma or a closing parenthesis"))
7040       return UNDEF;
7041   }
7042 
7043   return Imm;
7044 }
7045 
7046 OperandMatchResultTy
7047 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7048 
7049   using namespace llvm::AMDGPU::VGPRIndexMode;
7050 
7051   int64_t Imm = 0;
7052   SMLoc S = getLoc();
7053 
7054   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7055     Imm = parseGPRIdxMacro();
7056     if (Imm == UNDEF)
7057       return MatchOperand_ParseFail;
7058   } else {
7059     if (getParser().parseAbsoluteExpression(Imm))
7060       return MatchOperand_ParseFail;
7061     if (Imm < 0 || !isUInt<4>(Imm)) {
7062       Error(S, "invalid immediate: only 4-bit values are legal");
7063       return MatchOperand_ParseFail;
7064     }
7065   }
7066 
7067   Operands.push_back(
7068       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7069   return MatchOperand_Success;
7070 }
7071 
7072 bool AMDGPUOperand::isGPRIdxMode() const {
7073   return isImmTy(ImmTyGprIdxMode);
7074 }
7075 
7076 //===----------------------------------------------------------------------===//
7077 // sopp branch targets
7078 //===----------------------------------------------------------------------===//
7079 
7080 OperandMatchResultTy
7081 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7082 
7083   // Make sure we are not parsing something
7084   // that looks like a label or an expression but is not.
7085   // This will improve error messages.
7086   if (isRegister() || isModifier())
7087     return MatchOperand_NoMatch;
7088 
7089   if (!parseExpr(Operands))
7090     return MatchOperand_ParseFail;
7091 
7092   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7093   assert(Opr.isImm() || Opr.isExpr());
7094   SMLoc Loc = Opr.getStartLoc();
7095 
7096   // Currently we do not support arbitrary expressions as branch targets.
7097   // Only labels and absolute expressions are accepted.
7098   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7099     Error(Loc, "expected an absolute expression or a label");
7100   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7101     Error(Loc, "expected a 16-bit signed jump offset");
7102   }
7103 
7104   return MatchOperand_Success;
7105 }
7106 
7107 //===----------------------------------------------------------------------===//
7108 // Boolean holding registers
7109 //===----------------------------------------------------------------------===//
7110 
7111 OperandMatchResultTy
7112 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7113   return parseReg(Operands);
7114 }
7115 
7116 //===----------------------------------------------------------------------===//
7117 // mubuf
7118 //===----------------------------------------------------------------------===//
7119 
7120 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7121   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7122 }
7123 
7124 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7125                                    const OperandVector &Operands,
7126                                    bool IsAtomic,
7127                                    bool IsLds) {
7128   bool IsLdsOpcode = IsLds;
7129   bool HasLdsModifier = false;
7130   OptionalImmIndexMap OptionalIdx;
7131   unsigned FirstOperandIdx = 1;
7132   bool IsAtomicReturn = false;
7133 
7134   if (IsAtomic) {
7135     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7136       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7137       if (!Op.isCPol())
7138         continue;
7139       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7140       break;
7141     }
7142 
7143     if (!IsAtomicReturn) {
7144       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7145       if (NewOpc != -1)
7146         Inst.setOpcode(NewOpc);
7147     }
7148 
7149     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7150                       SIInstrFlags::IsAtomicRet;
7151   }
7152 
7153   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7154     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7155 
7156     // Add the register arguments
7157     if (Op.isReg()) {
7158       Op.addRegOperands(Inst, 1);
7159       // Insert a tied src for atomic return dst.
7160       // This cannot be postponed as subsequent calls to
7161       // addImmOperands rely on correct number of MC operands.
7162       if (IsAtomicReturn && i == FirstOperandIdx)
7163         Op.addRegOperands(Inst, 1);
7164       continue;
7165     }
7166 
7167     // Handle the case where soffset is an immediate
7168     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7169       Op.addImmOperands(Inst, 1);
7170       continue;
7171     }
7172 
7173     HasLdsModifier |= Op.isLDS();
7174 
7175     // Handle tokens like 'offen' which are sometimes hard-coded into the
7176     // asm string.  There are no MCInst operands for these.
7177     if (Op.isToken()) {
7178       continue;
7179     }
7180     assert(Op.isImm());
7181 
7182     // Handle optional arguments
7183     OptionalIdx[Op.getImmTy()] = i;
7184   }
7185 
7186   // This is a workaround for an llvm quirk which may result in an
7187   // incorrect instruction selection. Lds and non-lds versions of
7188   // MUBUF instructions are identical except that lds versions
7189   // have mandatory 'lds' modifier. However this modifier follows
7190   // optional modifiers and llvm asm matcher regards this 'lds'
7191   // modifier as an optional one. As a result, an lds version
7192   // of opcode may be selected even if it has no 'lds' modifier.
7193   if (IsLdsOpcode && !HasLdsModifier) {
7194     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7195     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7196       Inst.setOpcode(NoLdsOpcode);
7197       IsLdsOpcode = false;
7198     }
7199   }
7200 
7201   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7202   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7203 
7204   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7205     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7206   }
7207   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7208 }
7209 
7210 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7211   OptionalImmIndexMap OptionalIdx;
7212 
7213   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7214     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7215 
7216     // Add the register arguments
7217     if (Op.isReg()) {
7218       Op.addRegOperands(Inst, 1);
7219       continue;
7220     }
7221 
7222     // Handle the case where soffset is an immediate
7223     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7224       Op.addImmOperands(Inst, 1);
7225       continue;
7226     }
7227 
7228     // Handle tokens like 'offen' which are sometimes hard-coded into the
7229     // asm string.  There are no MCInst operands for these.
7230     if (Op.isToken()) {
7231       continue;
7232     }
7233     assert(Op.isImm());
7234 
7235     // Handle optional arguments
7236     OptionalIdx[Op.getImmTy()] = i;
7237   }
7238 
7239   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7240                         AMDGPUOperand::ImmTyOffset);
7241   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7242   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7243   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7244   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7245 }
7246 
7247 //===----------------------------------------------------------------------===//
7248 // mimg
7249 //===----------------------------------------------------------------------===//
7250 
7251 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7252                               bool IsAtomic) {
7253   unsigned I = 1;
7254   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7255   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7256     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7257   }
7258 
7259   if (IsAtomic) {
7260     // Add src, same as dst
7261     assert(Desc.getNumDefs() == 1);
7262     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7263   }
7264 
7265   OptionalImmIndexMap OptionalIdx;
7266 
7267   for (unsigned E = Operands.size(); I != E; ++I) {
7268     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7269 
7270     // Add the register arguments
7271     if (Op.isReg()) {
7272       Op.addRegOperands(Inst, 1);
7273     } else if (Op.isImmModifier()) {
7274       OptionalIdx[Op.getImmTy()] = I;
7275     } else if (!Op.isToken()) {
7276       llvm_unreachable("unexpected operand type");
7277     }
7278   }
7279 
7280   bool IsGFX10Plus = isGFX10Plus();
7281 
7282   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7283   if (IsGFX10Plus)
7284     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7285   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7286   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7287   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7288   if (IsGFX10Plus)
7289     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7290   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7291     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7292   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7293   if (!IsGFX10Plus)
7294     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7295   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7296 }
7297 
7298 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7299   cvtMIMG(Inst, Operands, true);
7300 }
7301 
7302 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7303   OptionalImmIndexMap OptionalIdx;
7304   bool IsAtomicReturn = false;
7305 
7306   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7307     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7308     if (!Op.isCPol())
7309       continue;
7310     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7311     break;
7312   }
7313 
7314   if (!IsAtomicReturn) {
7315     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7316     if (NewOpc != -1)
7317       Inst.setOpcode(NewOpc);
7318   }
7319 
7320   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7321                     SIInstrFlags::IsAtomicRet;
7322 
7323   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7324     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7325 
7326     // Add the register arguments
7327     if (Op.isReg()) {
7328       Op.addRegOperands(Inst, 1);
7329       if (IsAtomicReturn && i == 1)
7330         Op.addRegOperands(Inst, 1);
7331       continue;
7332     }
7333 
7334     // Handle the case where soffset is an immediate
7335     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7336       Op.addImmOperands(Inst, 1);
7337       continue;
7338     }
7339 
7340     // Handle tokens like 'offen' which are sometimes hard-coded into the
7341     // asm string.  There are no MCInst operands for these.
7342     if (Op.isToken()) {
7343       continue;
7344     }
7345     assert(Op.isImm());
7346 
7347     // Handle optional arguments
7348     OptionalIdx[Op.getImmTy()] = i;
7349   }
7350 
7351   if ((int)Inst.getNumOperands() <=
7352       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7353     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7354   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7355 }
7356 
7357 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7358                                       const OperandVector &Operands) {
7359   for (unsigned I = 1; I < Operands.size(); ++I) {
7360     auto &Operand = (AMDGPUOperand &)*Operands[I];
7361     if (Operand.isReg())
7362       Operand.addRegOperands(Inst, 1);
7363   }
7364 
7365   Inst.addOperand(MCOperand::createImm(1)); // a16
7366 }
7367 
7368 //===----------------------------------------------------------------------===//
7369 // smrd
7370 //===----------------------------------------------------------------------===//
7371 
7372 bool AMDGPUOperand::isSMRDOffset8() const {
7373   return isImm() && isUInt<8>(getImm());
7374 }
7375 
7376 bool AMDGPUOperand::isSMEMOffset() const {
7377   return isImm(); // Offset range is checked later by validator.
7378 }
7379 
7380 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7381   // 32-bit literals are only supported on CI and we only want to use them
7382   // when the offset is > 8-bits.
7383   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7384 }
7385 
7386 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7387   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7388 }
7389 
7390 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7391   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7392 }
7393 
7394 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7395   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7396 }
7397 
7398 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7399   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7400 }
7401 
7402 //===----------------------------------------------------------------------===//
7403 // vop3
7404 //===----------------------------------------------------------------------===//
7405 
7406 static bool ConvertOmodMul(int64_t &Mul) {
7407   if (Mul != 1 && Mul != 2 && Mul != 4)
7408     return false;
7409 
7410   Mul >>= 1;
7411   return true;
7412 }
7413 
7414 static bool ConvertOmodDiv(int64_t &Div) {
7415   if (Div == 1) {
7416     Div = 0;
7417     return true;
7418   }
7419 
7420   if (Div == 2) {
7421     Div = 3;
7422     return true;
7423   }
7424 
7425   return false;
7426 }
7427 
7428 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7429 // This is intentional and ensures compatibility with sp3.
7430 // See bug 35397 for details.
7431 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7432   if (BoundCtrl == 0 || BoundCtrl == 1) {
7433     BoundCtrl = 1;
7434     return true;
7435   }
7436   return false;
7437 }
7438 
7439 // Note: the order in this table matches the order of operands in AsmString.
7440 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7441   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7442   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7443   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7444   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7445   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7446   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7447   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7448   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7449   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7450   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7451   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7452   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7453   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7454   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7455   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7456   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7457   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7458   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7459   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7460   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7461   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7462   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7463   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7464   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7465   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7466   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7467   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7468   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7469   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7470   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7471   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7472   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7473   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7474   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7475   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7476   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7477   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7478   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7479   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7480   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7481   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7482 };
7483 
7484 void AMDGPUAsmParser::onBeginOfFile() {
7485   if (!getParser().getStreamer().getTargetStreamer() ||
7486       getSTI().getTargetTriple().getArch() == Triple::r600)
7487     return;
7488 
7489   if (!getTargetStreamer().getTargetID())
7490     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7491 
7492   if (isHsaAbiVersion3AndAbove(&getSTI()))
7493     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7494 }
7495 
7496 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7497 
7498   OperandMatchResultTy res = parseOptionalOpr(Operands);
7499 
7500   // This is a hack to enable hardcoded mandatory operands which follow
7501   // optional operands.
7502   //
7503   // Current design assumes that all operands after the first optional operand
7504   // are also optional. However implementation of some instructions violates
7505   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7506   //
7507   // To alleviate this problem, we have to (implicitly) parse extra operands
7508   // to make sure autogenerated parser of custom operands never hit hardcoded
7509   // mandatory operands.
7510 
7511   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7512     if (res != MatchOperand_Success ||
7513         isToken(AsmToken::EndOfStatement))
7514       break;
7515 
7516     trySkipToken(AsmToken::Comma);
7517     res = parseOptionalOpr(Operands);
7518   }
7519 
7520   return res;
7521 }
7522 
7523 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7524   OperandMatchResultTy res;
7525   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7526     // try to parse any optional operand here
7527     if (Op.IsBit) {
7528       res = parseNamedBit(Op.Name, Operands, Op.Type);
7529     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7530       res = parseOModOperand(Operands);
7531     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7532                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7533                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7534       res = parseSDWASel(Operands, Op.Name, Op.Type);
7535     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7536       res = parseSDWADstUnused(Operands);
7537     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7538                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7539                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7540                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7541       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7542                                         Op.ConvertResult);
7543     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7544       res = parseDim(Operands);
7545     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7546       res = parseCPol(Operands);
7547     } else {
7548       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7549     }
7550     if (res != MatchOperand_NoMatch) {
7551       return res;
7552     }
7553   }
7554   return MatchOperand_NoMatch;
7555 }
7556 
7557 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7558   StringRef Name = getTokenStr();
7559   if (Name == "mul") {
7560     return parseIntWithPrefix("mul", Operands,
7561                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7562   }
7563 
7564   if (Name == "div") {
7565     return parseIntWithPrefix("div", Operands,
7566                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7567   }
7568 
7569   return MatchOperand_NoMatch;
7570 }
7571 
7572 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7573   cvtVOP3P(Inst, Operands);
7574 
7575   int Opc = Inst.getOpcode();
7576 
7577   int SrcNum;
7578   const int Ops[] = { AMDGPU::OpName::src0,
7579                       AMDGPU::OpName::src1,
7580                       AMDGPU::OpName::src2 };
7581   for (SrcNum = 0;
7582        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7583        ++SrcNum);
7584   assert(SrcNum > 0);
7585 
7586   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7587   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7588 
7589   if ((OpSel & (1 << SrcNum)) != 0) {
7590     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7591     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7592     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7593   }
7594 }
7595 
7596 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7597       // 1. This operand is input modifiers
7598   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7599       // 2. This is not last operand
7600       && Desc.NumOperands > (OpNum + 1)
7601       // 3. Next operand is register class
7602       && Desc.OpInfo[OpNum + 1].RegClass != -1
7603       // 4. Next register is not tied to any other operand
7604       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7605 }
7606 
7607 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7608 {
7609   OptionalImmIndexMap OptionalIdx;
7610   unsigned Opc = Inst.getOpcode();
7611 
7612   unsigned I = 1;
7613   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7614   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7615     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7616   }
7617 
7618   for (unsigned E = Operands.size(); I != E; ++I) {
7619     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7620     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7621       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7622     } else if (Op.isInterpSlot() ||
7623                Op.isInterpAttr() ||
7624                Op.isAttrChan()) {
7625       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7626     } else if (Op.isImmModifier()) {
7627       OptionalIdx[Op.getImmTy()] = I;
7628     } else {
7629       llvm_unreachable("unhandled operand type");
7630     }
7631   }
7632 
7633   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7634     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7635   }
7636 
7637   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7638     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7639   }
7640 
7641   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7642     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7643   }
7644 }
7645 
7646 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7647                               OptionalImmIndexMap &OptionalIdx) {
7648   unsigned Opc = Inst.getOpcode();
7649 
7650   unsigned I = 1;
7651   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7652   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7653     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7654   }
7655 
7656   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7657     // This instruction has src modifiers
7658     for (unsigned E = Operands.size(); I != E; ++I) {
7659       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7660       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7661         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7662       } else if (Op.isImmModifier()) {
7663         OptionalIdx[Op.getImmTy()] = I;
7664       } else if (Op.isRegOrImm()) {
7665         Op.addRegOrImmOperands(Inst, 1);
7666       } else {
7667         llvm_unreachable("unhandled operand type");
7668       }
7669     }
7670   } else {
7671     // No src modifiers
7672     for (unsigned E = Operands.size(); I != E; ++I) {
7673       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7674       if (Op.isMod()) {
7675         OptionalIdx[Op.getImmTy()] = I;
7676       } else {
7677         Op.addRegOrImmOperands(Inst, 1);
7678       }
7679     }
7680   }
7681 
7682   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7683     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7684   }
7685 
7686   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7687     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7688   }
7689 
7690   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7691   // it has src2 register operand that is tied to dst operand
7692   // we don't allow modifiers for this operand in assembler so src2_modifiers
7693   // should be 0.
7694   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7695       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7696       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7697       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7698       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7699       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7700       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7701       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7702       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7703       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7704       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7705     auto it = Inst.begin();
7706     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7707     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7708     ++it;
7709     // Copy the operand to ensure it's not invalidated when Inst grows.
7710     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7711   }
7712 }
7713 
7714 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7715   OptionalImmIndexMap OptionalIdx;
7716   cvtVOP3(Inst, Operands, OptionalIdx);
7717 }
7718 
7719 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7720                                OptionalImmIndexMap &OptIdx) {
7721   const int Opc = Inst.getOpcode();
7722   const MCInstrDesc &Desc = MII.get(Opc);
7723 
7724   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7725 
7726   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7727     assert(!IsPacked);
7728     Inst.addOperand(Inst.getOperand(0));
7729   }
7730 
7731   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7732   // instruction, and then figure out where to actually put the modifiers
7733 
7734   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7735   if (OpSelIdx != -1) {
7736     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7737   }
7738 
7739   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7740   if (OpSelHiIdx != -1) {
7741     int DefaultVal = IsPacked ? -1 : 0;
7742     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7743                           DefaultVal);
7744   }
7745 
7746   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7747   if (NegLoIdx != -1) {
7748     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7749     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7750   }
7751 
7752   const int Ops[] = { AMDGPU::OpName::src0,
7753                       AMDGPU::OpName::src1,
7754                       AMDGPU::OpName::src2 };
7755   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7756                          AMDGPU::OpName::src1_modifiers,
7757                          AMDGPU::OpName::src2_modifiers };
7758 
7759   unsigned OpSel = 0;
7760   unsigned OpSelHi = 0;
7761   unsigned NegLo = 0;
7762   unsigned NegHi = 0;
7763 
7764   if (OpSelIdx != -1)
7765     OpSel = Inst.getOperand(OpSelIdx).getImm();
7766 
7767   if (OpSelHiIdx != -1)
7768     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7769 
7770   if (NegLoIdx != -1) {
7771     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7772     NegLo = Inst.getOperand(NegLoIdx).getImm();
7773     NegHi = Inst.getOperand(NegHiIdx).getImm();
7774   }
7775 
7776   for (int J = 0; J < 3; ++J) {
7777     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7778     if (OpIdx == -1)
7779       break;
7780 
7781     uint32_t ModVal = 0;
7782 
7783     if ((OpSel & (1 << J)) != 0)
7784       ModVal |= SISrcMods::OP_SEL_0;
7785 
7786     if ((OpSelHi & (1 << J)) != 0)
7787       ModVal |= SISrcMods::OP_SEL_1;
7788 
7789     if ((NegLo & (1 << J)) != 0)
7790       ModVal |= SISrcMods::NEG;
7791 
7792     if ((NegHi & (1 << J)) != 0)
7793       ModVal |= SISrcMods::NEG_HI;
7794 
7795     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7796 
7797     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7798   }
7799 }
7800 
7801 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7802   OptionalImmIndexMap OptIdx;
7803   cvtVOP3(Inst, Operands, OptIdx);
7804   cvtVOP3P(Inst, Operands, OptIdx);
7805 }
7806 
7807 //===----------------------------------------------------------------------===//
7808 // dpp
7809 //===----------------------------------------------------------------------===//
7810 
7811 bool AMDGPUOperand::isDPP8() const {
7812   return isImmTy(ImmTyDPP8);
7813 }
7814 
7815 bool AMDGPUOperand::isDPPCtrl() const {
7816   using namespace AMDGPU::DPP;
7817 
7818   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7819   if (result) {
7820     int64_t Imm = getImm();
7821     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7822            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7823            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7824            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7825            (Imm == DppCtrl::WAVE_SHL1) ||
7826            (Imm == DppCtrl::WAVE_ROL1) ||
7827            (Imm == DppCtrl::WAVE_SHR1) ||
7828            (Imm == DppCtrl::WAVE_ROR1) ||
7829            (Imm == DppCtrl::ROW_MIRROR) ||
7830            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7831            (Imm == DppCtrl::BCAST15) ||
7832            (Imm == DppCtrl::BCAST31) ||
7833            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7834            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7835   }
7836   return false;
7837 }
7838 
7839 //===----------------------------------------------------------------------===//
7840 // mAI
7841 //===----------------------------------------------------------------------===//
7842 
7843 bool AMDGPUOperand::isBLGP() const {
7844   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7845 }
7846 
7847 bool AMDGPUOperand::isCBSZ() const {
7848   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7849 }
7850 
7851 bool AMDGPUOperand::isABID() const {
7852   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7853 }
7854 
7855 bool AMDGPUOperand::isS16Imm() const {
7856   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7857 }
7858 
7859 bool AMDGPUOperand::isU16Imm() const {
7860   return isImm() && isUInt<16>(getImm());
7861 }
7862 
7863 //===----------------------------------------------------------------------===//
7864 // dim
7865 //===----------------------------------------------------------------------===//
7866 
7867 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7868   // We want to allow "dim:1D" etc.,
7869   // but the initial 1 is tokenized as an integer.
7870   std::string Token;
7871   if (isToken(AsmToken::Integer)) {
7872     SMLoc Loc = getToken().getEndLoc();
7873     Token = std::string(getTokenStr());
7874     lex();
7875     if (getLoc() != Loc)
7876       return false;
7877   }
7878 
7879   StringRef Suffix;
7880   if (!parseId(Suffix))
7881     return false;
7882   Token += Suffix;
7883 
7884   StringRef DimId = Token;
7885   if (DimId.startswith("SQ_RSRC_IMG_"))
7886     DimId = DimId.drop_front(12);
7887 
7888   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7889   if (!DimInfo)
7890     return false;
7891 
7892   Encoding = DimInfo->Encoding;
7893   return true;
7894 }
7895 
7896 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7897   if (!isGFX10Plus())
7898     return MatchOperand_NoMatch;
7899 
7900   SMLoc S = getLoc();
7901 
7902   if (!trySkipId("dim", AsmToken::Colon))
7903     return MatchOperand_NoMatch;
7904 
7905   unsigned Encoding;
7906   SMLoc Loc = getLoc();
7907   if (!parseDimId(Encoding)) {
7908     Error(Loc, "invalid dim value");
7909     return MatchOperand_ParseFail;
7910   }
7911 
7912   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7913                                               AMDGPUOperand::ImmTyDim));
7914   return MatchOperand_Success;
7915 }
7916 
7917 //===----------------------------------------------------------------------===//
7918 // dpp
7919 //===----------------------------------------------------------------------===//
7920 
7921 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7922   SMLoc S = getLoc();
7923 
7924   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7925     return MatchOperand_NoMatch;
7926 
7927   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7928 
7929   int64_t Sels[8];
7930 
7931   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7932     return MatchOperand_ParseFail;
7933 
7934   for (size_t i = 0; i < 8; ++i) {
7935     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7936       return MatchOperand_ParseFail;
7937 
7938     SMLoc Loc = getLoc();
7939     if (getParser().parseAbsoluteExpression(Sels[i]))
7940       return MatchOperand_ParseFail;
7941     if (0 > Sels[i] || 7 < Sels[i]) {
7942       Error(Loc, "expected a 3-bit value");
7943       return MatchOperand_ParseFail;
7944     }
7945   }
7946 
7947   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7948     return MatchOperand_ParseFail;
7949 
7950   unsigned DPP8 = 0;
7951   for (size_t i = 0; i < 8; ++i)
7952     DPP8 |= (Sels[i] << (i * 3));
7953 
7954   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7955   return MatchOperand_Success;
7956 }
7957 
7958 bool
7959 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7960                                     const OperandVector &Operands) {
7961   if (Ctrl == "row_newbcast")
7962     return isGFX90A();
7963 
7964   if (Ctrl == "row_share" ||
7965       Ctrl == "row_xmask")
7966     return isGFX10Plus();
7967 
7968   if (Ctrl == "wave_shl" ||
7969       Ctrl == "wave_shr" ||
7970       Ctrl == "wave_rol" ||
7971       Ctrl == "wave_ror" ||
7972       Ctrl == "row_bcast")
7973     return isVI() || isGFX9();
7974 
7975   return Ctrl == "row_mirror" ||
7976          Ctrl == "row_half_mirror" ||
7977          Ctrl == "quad_perm" ||
7978          Ctrl == "row_shl" ||
7979          Ctrl == "row_shr" ||
7980          Ctrl == "row_ror";
7981 }
7982 
7983 int64_t
7984 AMDGPUAsmParser::parseDPPCtrlPerm() {
7985   // quad_perm:[%d,%d,%d,%d]
7986 
7987   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7988     return -1;
7989 
7990   int64_t Val = 0;
7991   for (int i = 0; i < 4; ++i) {
7992     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7993       return -1;
7994 
7995     int64_t Temp;
7996     SMLoc Loc = getLoc();
7997     if (getParser().parseAbsoluteExpression(Temp))
7998       return -1;
7999     if (Temp < 0 || Temp > 3) {
8000       Error(Loc, "expected a 2-bit value");
8001       return -1;
8002     }
8003 
8004     Val += (Temp << i * 2);
8005   }
8006 
8007   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8008     return -1;
8009 
8010   return Val;
8011 }
8012 
8013 int64_t
8014 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8015   using namespace AMDGPU::DPP;
8016 
8017   // sel:%d
8018 
8019   int64_t Val;
8020   SMLoc Loc = getLoc();
8021 
8022   if (getParser().parseAbsoluteExpression(Val))
8023     return -1;
8024 
8025   struct DppCtrlCheck {
8026     int64_t Ctrl;
8027     int Lo;
8028     int Hi;
8029   };
8030 
8031   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8032     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8033     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8034     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8035     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8036     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8037     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8038     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8039     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8040     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8041     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8042     .Default({-1, 0, 0});
8043 
8044   bool Valid;
8045   if (Check.Ctrl == -1) {
8046     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8047     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8048   } else {
8049     Valid = Check.Lo <= Val && Val <= Check.Hi;
8050     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8051   }
8052 
8053   if (!Valid) {
8054     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8055     return -1;
8056   }
8057 
8058   return Val;
8059 }
8060 
8061 OperandMatchResultTy
8062 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8063   using namespace AMDGPU::DPP;
8064 
8065   if (!isToken(AsmToken::Identifier) ||
8066       !isSupportedDPPCtrl(getTokenStr(), Operands))
8067     return MatchOperand_NoMatch;
8068 
8069   SMLoc S = getLoc();
8070   int64_t Val = -1;
8071   StringRef Ctrl;
8072 
8073   parseId(Ctrl);
8074 
8075   if (Ctrl == "row_mirror") {
8076     Val = DppCtrl::ROW_MIRROR;
8077   } else if (Ctrl == "row_half_mirror") {
8078     Val = DppCtrl::ROW_HALF_MIRROR;
8079   } else {
8080     if (skipToken(AsmToken::Colon, "expected a colon")) {
8081       if (Ctrl == "quad_perm") {
8082         Val = parseDPPCtrlPerm();
8083       } else {
8084         Val = parseDPPCtrlSel(Ctrl);
8085       }
8086     }
8087   }
8088 
8089   if (Val == -1)
8090     return MatchOperand_ParseFail;
8091 
8092   Operands.push_back(
8093     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8094   return MatchOperand_Success;
8095 }
8096 
8097 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8098   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8099 }
8100 
8101 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8102   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8103 }
8104 
8105 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8106   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8107 }
8108 
8109 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8110   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8111 }
8112 
8113 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8114   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8115 }
8116 
8117 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8118   OptionalImmIndexMap OptionalIdx;
8119 
8120   unsigned Opc = Inst.getOpcode();
8121   bool HasModifiers =
8122       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8123   unsigned I = 1;
8124   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8125   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8126     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8127   }
8128 
8129   int Fi = 0;
8130   for (unsigned E = Operands.size(); I != E; ++I) {
8131     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8132                                             MCOI::TIED_TO);
8133     if (TiedTo != -1) {
8134       assert((unsigned)TiedTo < Inst.getNumOperands());
8135       // handle tied old or src2 for MAC instructions
8136       Inst.addOperand(Inst.getOperand(TiedTo));
8137     }
8138     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8139     // Add the register arguments
8140     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8141       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8142       // Skip it.
8143       continue;
8144     }
8145 
8146     if (IsDPP8) {
8147       if (Op.isDPP8()) {
8148         Op.addImmOperands(Inst, 1);
8149       } else if (HasModifiers &&
8150                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8151         Op.addRegWithFPInputModsOperands(Inst, 2);
8152       } else if (Op.isFI()) {
8153         Fi = Op.getImm();
8154       } else if (Op.isReg()) {
8155         Op.addRegOperands(Inst, 1);
8156       } else {
8157         llvm_unreachable("Invalid operand type");
8158       }
8159     } else {
8160       if (HasModifiers &&
8161           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8162         Op.addRegWithFPInputModsOperands(Inst, 2);
8163       } else if (Op.isReg()) {
8164         Op.addRegOperands(Inst, 1);
8165       } else if (Op.isDPPCtrl()) {
8166         Op.addImmOperands(Inst, 1);
8167       } else if (Op.isImm()) {
8168         // Handle optional arguments
8169         OptionalIdx[Op.getImmTy()] = I;
8170       } else {
8171         llvm_unreachable("Invalid operand type");
8172       }
8173     }
8174   }
8175 
8176   if (IsDPP8) {
8177     using namespace llvm::AMDGPU::DPP;
8178     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8179   } else {
8180     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8181     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8182     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8183     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8184       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8185     }
8186   }
8187 }
8188 
8189 //===----------------------------------------------------------------------===//
8190 // sdwa
8191 //===----------------------------------------------------------------------===//
8192 
8193 OperandMatchResultTy
8194 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8195                               AMDGPUOperand::ImmTy Type) {
8196   using namespace llvm::AMDGPU::SDWA;
8197 
8198   SMLoc S = getLoc();
8199   StringRef Value;
8200   OperandMatchResultTy res;
8201 
8202   SMLoc StringLoc;
8203   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8204   if (res != MatchOperand_Success) {
8205     return res;
8206   }
8207 
8208   int64_t Int;
8209   Int = StringSwitch<int64_t>(Value)
8210         .Case("BYTE_0", SdwaSel::BYTE_0)
8211         .Case("BYTE_1", SdwaSel::BYTE_1)
8212         .Case("BYTE_2", SdwaSel::BYTE_2)
8213         .Case("BYTE_3", SdwaSel::BYTE_3)
8214         .Case("WORD_0", SdwaSel::WORD_0)
8215         .Case("WORD_1", SdwaSel::WORD_1)
8216         .Case("DWORD", SdwaSel::DWORD)
8217         .Default(0xffffffff);
8218 
8219   if (Int == 0xffffffff) {
8220     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8221     return MatchOperand_ParseFail;
8222   }
8223 
8224   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8225   return MatchOperand_Success;
8226 }
8227 
8228 OperandMatchResultTy
8229 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8230   using namespace llvm::AMDGPU::SDWA;
8231 
8232   SMLoc S = getLoc();
8233   StringRef Value;
8234   OperandMatchResultTy res;
8235 
8236   SMLoc StringLoc;
8237   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8238   if (res != MatchOperand_Success) {
8239     return res;
8240   }
8241 
8242   int64_t Int;
8243   Int = StringSwitch<int64_t>(Value)
8244         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8245         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8246         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8247         .Default(0xffffffff);
8248 
8249   if (Int == 0xffffffff) {
8250     Error(StringLoc, "invalid dst_unused value");
8251     return MatchOperand_ParseFail;
8252   }
8253 
8254   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8255   return MatchOperand_Success;
8256 }
8257 
8258 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8259   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8260 }
8261 
8262 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8263   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8264 }
8265 
8266 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8267   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8268 }
8269 
8270 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8271   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8272 }
8273 
8274 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8275   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8276 }
8277 
8278 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8279                               uint64_t BasicInstType,
8280                               bool SkipDstVcc,
8281                               bool SkipSrcVcc) {
8282   using namespace llvm::AMDGPU::SDWA;
8283 
8284   OptionalImmIndexMap OptionalIdx;
8285   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8286   bool SkippedVcc = false;
8287 
8288   unsigned I = 1;
8289   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8290   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8291     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8292   }
8293 
8294   for (unsigned E = Operands.size(); I != E; ++I) {
8295     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8296     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8297         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8298       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8299       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8300       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8301       // Skip VCC only if we didn't skip it on previous iteration.
8302       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8303       if (BasicInstType == SIInstrFlags::VOP2 &&
8304           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8305            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8306         SkippedVcc = true;
8307         continue;
8308       } else if (BasicInstType == SIInstrFlags::VOPC &&
8309                  Inst.getNumOperands() == 0) {
8310         SkippedVcc = true;
8311         continue;
8312       }
8313     }
8314     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8315       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8316     } else if (Op.isImm()) {
8317       // Handle optional arguments
8318       OptionalIdx[Op.getImmTy()] = I;
8319     } else {
8320       llvm_unreachable("Invalid operand type");
8321     }
8322     SkippedVcc = false;
8323   }
8324 
8325   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8326       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8327       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8328     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8329     switch (BasicInstType) {
8330     case SIInstrFlags::VOP1:
8331       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8332       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8333         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8334       }
8335       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8336       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8337       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8338       break;
8339 
8340     case SIInstrFlags::VOP2:
8341       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8342       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8343         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8344       }
8345       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8346       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8347       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8348       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8349       break;
8350 
8351     case SIInstrFlags::VOPC:
8352       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8353         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8354       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8355       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8356       break;
8357 
8358     default:
8359       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8360     }
8361   }
8362 
8363   // special case v_mac_{f16, f32}:
8364   // it has src2 register operand that is tied to dst operand
8365   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8366       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8367     auto it = Inst.begin();
8368     std::advance(
8369       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8370     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8371   }
8372 }
8373 
8374 //===----------------------------------------------------------------------===//
8375 // mAI
8376 //===----------------------------------------------------------------------===//
8377 
8378 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8379   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8380 }
8381 
8382 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8383   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8384 }
8385 
8386 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8387   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8388 }
8389 
8390 /// Force static initialization.
8391 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8392   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8393   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8394 }
8395 
8396 #define GET_REGISTER_MATCHER
8397 #define GET_MATCHER_IMPLEMENTATION
8398 #define GET_MNEMONIC_SPELL_CHECKER
8399 #define GET_MNEMONIC_CHECKER
8400 #include "AMDGPUGenAsmMatcher.inc"
8401 
8402 // This function should be defined after auto-generated include so that we have
8403 // MatchClassKind enum defined
8404 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8405                                                      unsigned Kind) {
8406   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8407   // But MatchInstructionImpl() expects to meet token and fails to validate
8408   // operand. This method checks if we are given immediate operand but expect to
8409   // get corresponding token.
8410   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8411   switch (Kind) {
8412   case MCK_addr64:
8413     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8414   case MCK_gds:
8415     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8416   case MCK_lds:
8417     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8418   case MCK_idxen:
8419     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8420   case MCK_offen:
8421     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8422   case MCK_SSrcB32:
8423     // When operands have expression values, they will return true for isToken,
8424     // because it is not possible to distinguish between a token and an
8425     // expression at parse time. MatchInstructionImpl() will always try to
8426     // match an operand as a token, when isToken returns true, and when the
8427     // name of the expression is not a valid token, the match will fail,
8428     // so we need to handle it here.
8429     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8430   case MCK_SSrcF32:
8431     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8432   case MCK_SoppBrTarget:
8433     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8434   case MCK_VReg32OrOff:
8435     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8436   case MCK_InterpSlot:
8437     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8438   case MCK_Attr:
8439     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8440   case MCK_AttrChan:
8441     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8442   case MCK_ImmSMEMOffset:
8443     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8444   case MCK_SReg_64:
8445   case MCK_SReg_64_XEXEC:
8446     // Null is defined as a 32-bit register but
8447     // it should also be enabled with 64-bit operands.
8448     // The following code enables it for SReg_64 operands
8449     // used as source and destination. Remaining source
8450     // operands are handled in isInlinableImm.
8451     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8452   default:
8453     return Match_InvalidOperand;
8454   }
8455 }
8456 
8457 //===----------------------------------------------------------------------===//
8458 // endpgm
8459 //===----------------------------------------------------------------------===//
8460 
8461 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8462   SMLoc S = getLoc();
8463   int64_t Imm = 0;
8464 
8465   if (!parseExpr(Imm)) {
8466     // The operand is optional, if not present default to 0
8467     Imm = 0;
8468   }
8469 
8470   if (!isUInt<16>(Imm)) {
8471     Error(S, "expected a 16-bit value");
8472     return MatchOperand_ParseFail;
8473   }
8474 
8475   Operands.push_back(
8476       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8477   return MatchOperand_Success;
8478 }
8479 
8480 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8481