1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/TargetParser.h"
39 
40 using namespace llvm;
41 using namespace llvm::AMDGPU;
42 using namespace llvm::amdhsa;
43 
44 namespace {
45 
46 class AMDGPUAsmParser;
47 
48 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
49 
50 //===----------------------------------------------------------------------===//
51 // Operand
52 //===----------------------------------------------------------------------===//
53 
54 class AMDGPUOperand : public MCParsedAsmOperand {
55   enum KindTy {
56     Token,
57     Immediate,
58     Register,
59     Expression
60   } Kind;
61 
62   SMLoc StartLoc, EndLoc;
63   const AMDGPUAsmParser *AsmParser;
64 
65 public:
66   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
67       : Kind(Kind_), AsmParser(AsmParser_) {}
68 
69   using Ptr = std::unique_ptr<AMDGPUOperand>;
70 
71   struct Modifiers {
72     bool Abs = false;
73     bool Neg = false;
74     bool Sext = false;
75 
76     bool hasFPModifiers() const { return Abs || Neg; }
77     bool hasIntModifiers() const { return Sext; }
78     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
79 
80     int64_t getFPModifiersOperand() const {
81       int64_t Operand = 0;
82       Operand |= Abs ? SISrcMods::ABS : 0u;
83       Operand |= Neg ? SISrcMods::NEG : 0u;
84       return Operand;
85     }
86 
87     int64_t getIntModifiersOperand() const {
88       int64_t Operand = 0;
89       Operand |= Sext ? SISrcMods::SEXT : 0u;
90       return Operand;
91     }
92 
93     int64_t getModifiersOperand() const {
94       assert(!(hasFPModifiers() && hasIntModifiers())
95            && "fp and int modifiers should not be used simultaneously");
96       if (hasFPModifiers()) {
97         return getFPModifiersOperand();
98       } else if (hasIntModifiers()) {
99         return getIntModifiersOperand();
100       } else {
101         return 0;
102       }
103     }
104 
105     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
106   };
107 
108   enum ImmTy {
109     ImmTyNone,
110     ImmTyGDS,
111     ImmTyLDS,
112     ImmTyOffen,
113     ImmTyIdxen,
114     ImmTyAddr64,
115     ImmTyOffset,
116     ImmTyInstOffset,
117     ImmTyOffset0,
118     ImmTyOffset1,
119     ImmTyCPol,
120     ImmTySWZ,
121     ImmTyTFE,
122     ImmTyD16,
123     ImmTyClampSI,
124     ImmTyOModSI,
125     ImmTyDPP8,
126     ImmTyDppCtrl,
127     ImmTyDppRowMask,
128     ImmTyDppBankMask,
129     ImmTyDppBoundCtrl,
130     ImmTyDppFi,
131     ImmTySdwaDstSel,
132     ImmTySdwaSrc0Sel,
133     ImmTySdwaSrc1Sel,
134     ImmTySdwaDstUnused,
135     ImmTyDMask,
136     ImmTyDim,
137     ImmTyUNorm,
138     ImmTyDA,
139     ImmTyR128A16,
140     ImmTyA16,
141     ImmTyLWE,
142     ImmTyExpTgt,
143     ImmTyExpCompr,
144     ImmTyExpVM,
145     ImmTyFORMAT,
146     ImmTyHwreg,
147     ImmTyOff,
148     ImmTySendMsg,
149     ImmTyInterpSlot,
150     ImmTyInterpAttr,
151     ImmTyAttrChan,
152     ImmTyOpSel,
153     ImmTyOpSelHi,
154     ImmTyNegLo,
155     ImmTyNegHi,
156     ImmTySwizzle,
157     ImmTyGprIdxMode,
158     ImmTyHigh,
159     ImmTyBLGP,
160     ImmTyCBSZ,
161     ImmTyABID,
162     ImmTyEndpgm,
163   };
164 
165   enum ImmKindTy {
166     ImmKindTyNone,
167     ImmKindTyLiteral,
168     ImmKindTyConst,
169   };
170 
171 private:
172   struct TokOp {
173     const char *Data;
174     unsigned Length;
175   };
176 
177   struct ImmOp {
178     int64_t Val;
179     ImmTy Type;
180     bool IsFPImm;
181     mutable ImmKindTy Kind;
182     Modifiers Mods;
183   };
184 
185   struct RegOp {
186     unsigned RegNo;
187     Modifiers Mods;
188   };
189 
190   union {
191     TokOp Tok;
192     ImmOp Imm;
193     RegOp Reg;
194     const MCExpr *Expr;
195   };
196 
197 public:
198   bool isToken() const override {
199     if (Kind == Token)
200       return true;
201 
202     // When parsing operands, we can't always tell if something was meant to be
203     // a token, like 'gds', or an expression that references a global variable.
204     // In this case, we assume the string is an expression, and if we need to
205     // interpret is a token, then we treat the symbol name as the token.
206     return isSymbolRefExpr();
207   }
208 
209   bool isSymbolRefExpr() const {
210     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
211   }
212 
213   bool isImm() const override {
214     return Kind == Immediate;
215   }
216 
217   void setImmKindNone() const {
218     assert(isImm());
219     Imm.Kind = ImmKindTyNone;
220   }
221 
222   void setImmKindLiteral() const {
223     assert(isImm());
224     Imm.Kind = ImmKindTyLiteral;
225   }
226 
227   void setImmKindConst() const {
228     assert(isImm());
229     Imm.Kind = ImmKindTyConst;
230   }
231 
232   bool IsImmKindLiteral() const {
233     return isImm() && Imm.Kind == ImmKindTyLiteral;
234   }
235 
236   bool isImmKindConst() const {
237     return isImm() && Imm.Kind == ImmKindTyConst;
238   }
239 
240   bool isInlinableImm(MVT type) const;
241   bool isLiteralImm(MVT type) const;
242 
243   bool isRegKind() const {
244     return Kind == Register;
245   }
246 
247   bool isReg() const override {
248     return isRegKind() && !hasModifiers();
249   }
250 
251   bool isRegOrInline(unsigned RCID, MVT type) const {
252     return isRegClass(RCID) || isInlinableImm(type);
253   }
254 
255   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
256     return isRegOrInline(RCID, type) || isLiteralImm(type);
257   }
258 
259   bool isRegOrImmWithInt16InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
261   }
262 
263   bool isRegOrImmWithInt32InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
265   }
266 
267   bool isRegOrImmWithInt64InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
269   }
270 
271   bool isRegOrImmWithFP16InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
273   }
274 
275   bool isRegOrImmWithFP32InputMods() const {
276     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
277   }
278 
279   bool isRegOrImmWithFP64InputMods() const {
280     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
281   }
282 
283   bool isVReg() const {
284     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
285            isRegClass(AMDGPU::VReg_64RegClassID) ||
286            isRegClass(AMDGPU::VReg_96RegClassID) ||
287            isRegClass(AMDGPU::VReg_128RegClassID) ||
288            isRegClass(AMDGPU::VReg_160RegClassID) ||
289            isRegClass(AMDGPU::VReg_192RegClassID) ||
290            isRegClass(AMDGPU::VReg_256RegClassID) ||
291            isRegClass(AMDGPU::VReg_512RegClassID) ||
292            isRegClass(AMDGPU::VReg_1024RegClassID);
293   }
294 
295   bool isVReg32() const {
296     return isRegClass(AMDGPU::VGPR_32RegClassID);
297   }
298 
299   bool isVReg32OrOff() const {
300     return isOff() || isVReg32();
301   }
302 
303   bool isNull() const {
304     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
305   }
306 
307   bool isVRegWithInputMods() const;
308 
309   bool isSDWAOperand(MVT type) const;
310   bool isSDWAFP16Operand() const;
311   bool isSDWAFP32Operand() const;
312   bool isSDWAInt16Operand() const;
313   bool isSDWAInt32Operand() const;
314 
315   bool isImmTy(ImmTy ImmT) const {
316     return isImm() && Imm.Type == ImmT;
317   }
318 
319   bool isImmModifier() const {
320     return isImm() && Imm.Type != ImmTyNone;
321   }
322 
323   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
324   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
325   bool isDMask() const { return isImmTy(ImmTyDMask); }
326   bool isDim() const { return isImmTy(ImmTyDim); }
327   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
328   bool isDA() const { return isImmTy(ImmTyDA); }
329   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
330   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
331   bool isLWE() const { return isImmTy(ImmTyLWE); }
332   bool isOff() const { return isImmTy(ImmTyOff); }
333   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
334   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
335   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
336   bool isOffen() const { return isImmTy(ImmTyOffen); }
337   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
338   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
339   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
340   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
341   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
342 
343   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
344   bool isGDS() const { return isImmTy(ImmTyGDS); }
345   bool isLDS() const { return isImmTy(ImmTyLDS); }
346   bool isCPol() const { return isImmTy(ImmTyCPol); }
347   bool isSWZ() const { return isImmTy(ImmTySWZ); }
348   bool isTFE() const { return isImmTy(ImmTyTFE); }
349   bool isD16() const { return isImmTy(ImmTyD16); }
350   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
351   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
352   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
353   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
354   bool isFI() const { return isImmTy(ImmTyDppFi); }
355   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
356   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
357   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
358   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
359   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
360   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
361   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
362   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
363   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
364   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
365   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
366   bool isHigh() const { return isImmTy(ImmTyHigh); }
367 
368   bool isMod() const {
369     return isClampSI() || isOModSI();
370   }
371 
372   bool isRegOrImm() const {
373     return isReg() || isImm();
374   }
375 
376   bool isRegClass(unsigned RCID) const;
377 
378   bool isInlineValue() const;
379 
380   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
381     return isRegOrInline(RCID, type) && !hasModifiers();
382   }
383 
384   bool isSCSrcB16() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
386   }
387 
388   bool isSCSrcV2B16() const {
389     return isSCSrcB16();
390   }
391 
392   bool isSCSrcB32() const {
393     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
394   }
395 
396   bool isSCSrcB64() const {
397     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
398   }
399 
400   bool isBoolReg() const;
401 
402   bool isSCSrcF16() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
404   }
405 
406   bool isSCSrcV2F16() const {
407     return isSCSrcF16();
408   }
409 
410   bool isSCSrcF32() const {
411     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
412   }
413 
414   bool isSCSrcF64() const {
415     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
416   }
417 
418   bool isSSrcB32() const {
419     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
420   }
421 
422   bool isSSrcB16() const {
423     return isSCSrcB16() || isLiteralImm(MVT::i16);
424   }
425 
426   bool isSSrcV2B16() const {
427     llvm_unreachable("cannot happen");
428     return isSSrcB16();
429   }
430 
431   bool isSSrcB64() const {
432     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
433     // See isVSrc64().
434     return isSCSrcB64() || isLiteralImm(MVT::i64);
435   }
436 
437   bool isSSrcF32() const {
438     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
439   }
440 
441   bool isSSrcF64() const {
442     return isSCSrcB64() || isLiteralImm(MVT::f64);
443   }
444 
445   bool isSSrcF16() const {
446     return isSCSrcB16() || isLiteralImm(MVT::f16);
447   }
448 
449   bool isSSrcV2F16() const {
450     llvm_unreachable("cannot happen");
451     return isSSrcF16();
452   }
453 
454   bool isSSrcV2FP32() const {
455     llvm_unreachable("cannot happen");
456     return isSSrcF32();
457   }
458 
459   bool isSCSrcV2FP32() const {
460     llvm_unreachable("cannot happen");
461     return isSCSrcF32();
462   }
463 
464   bool isSSrcV2INT32() const {
465     llvm_unreachable("cannot happen");
466     return isSSrcB32();
467   }
468 
469   bool isSCSrcV2INT32() const {
470     llvm_unreachable("cannot happen");
471     return isSCSrcB32();
472   }
473 
474   bool isSSrcOrLdsB32() const {
475     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
476            isLiteralImm(MVT::i32) || isExpr();
477   }
478 
479   bool isVCSrcB32() const {
480     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
481   }
482 
483   bool isVCSrcB64() const {
484     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
485   }
486 
487   bool isVCSrcB16() const {
488     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
489   }
490 
491   bool isVCSrcV2B16() const {
492     return isVCSrcB16();
493   }
494 
495   bool isVCSrcF32() const {
496     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
497   }
498 
499   bool isVCSrcF64() const {
500     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
501   }
502 
503   bool isVCSrcF16() const {
504     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
505   }
506 
507   bool isVCSrcV2F16() const {
508     return isVCSrcF16();
509   }
510 
511   bool isVSrcB32() const {
512     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
513   }
514 
515   bool isVSrcB64() const {
516     return isVCSrcF64() || isLiteralImm(MVT::i64);
517   }
518 
519   bool isVSrcB16() const {
520     return isVCSrcB16() || isLiteralImm(MVT::i16);
521   }
522 
523   bool isVSrcV2B16() const {
524     return isVSrcB16() || isLiteralImm(MVT::v2i16);
525   }
526 
527   bool isVCSrcV2FP32() const {
528     return isVCSrcF64();
529   }
530 
531   bool isVSrcV2FP32() const {
532     return isVSrcF64() || isLiteralImm(MVT::v2f32);
533   }
534 
535   bool isVCSrcV2INT32() const {
536     return isVCSrcB64();
537   }
538 
539   bool isVSrcV2INT32() const {
540     return isVSrcB64() || isLiteralImm(MVT::v2i32);
541   }
542 
543   bool isVSrcF32() const {
544     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
545   }
546 
547   bool isVSrcF64() const {
548     return isVCSrcF64() || isLiteralImm(MVT::f64);
549   }
550 
551   bool isVSrcF16() const {
552     return isVCSrcF16() || isLiteralImm(MVT::f16);
553   }
554 
555   bool isVSrcV2F16() const {
556     return isVSrcF16() || isLiteralImm(MVT::v2f16);
557   }
558 
559   bool isVISrcB32() const {
560     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
561   }
562 
563   bool isVISrcB16() const {
564     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
565   }
566 
567   bool isVISrcV2B16() const {
568     return isVISrcB16();
569   }
570 
571   bool isVISrcF32() const {
572     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
573   }
574 
575   bool isVISrcF16() const {
576     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
577   }
578 
579   bool isVISrcV2F16() const {
580     return isVISrcF16() || isVISrcB32();
581   }
582 
583   bool isVISrc_64B64() const {
584     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
585   }
586 
587   bool isVISrc_64F64() const {
588     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
589   }
590 
591   bool isVISrc_64V2FP32() const {
592     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
593   }
594 
595   bool isVISrc_64V2INT32() const {
596     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
597   }
598 
599   bool isVISrc_256B64() const {
600     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
601   }
602 
603   bool isVISrc_256F64() const {
604     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
605   }
606 
607   bool isVISrc_128B16() const {
608     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
609   }
610 
611   bool isVISrc_128V2B16() const {
612     return isVISrc_128B16();
613   }
614 
615   bool isVISrc_128B32() const {
616     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
617   }
618 
619   bool isVISrc_128F32() const {
620     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
621   }
622 
623   bool isVISrc_256V2FP32() const {
624     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
625   }
626 
627   bool isVISrc_256V2INT32() const {
628     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
629   }
630 
631   bool isVISrc_512B32() const {
632     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
633   }
634 
635   bool isVISrc_512B16() const {
636     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
637   }
638 
639   bool isVISrc_512V2B16() const {
640     return isVISrc_512B16();
641   }
642 
643   bool isVISrc_512F32() const {
644     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
645   }
646 
647   bool isVISrc_512F16() const {
648     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
649   }
650 
651   bool isVISrc_512V2F16() const {
652     return isVISrc_512F16() || isVISrc_512B32();
653   }
654 
655   bool isVISrc_1024B32() const {
656     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
657   }
658 
659   bool isVISrc_1024B16() const {
660     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
661   }
662 
663   bool isVISrc_1024V2B16() const {
664     return isVISrc_1024B16();
665   }
666 
667   bool isVISrc_1024F32() const {
668     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
669   }
670 
671   bool isVISrc_1024F16() const {
672     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
673   }
674 
675   bool isVISrc_1024V2F16() const {
676     return isVISrc_1024F16() || isVISrc_1024B32();
677   }
678 
679   bool isAISrcB32() const {
680     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
681   }
682 
683   bool isAISrcB16() const {
684     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
685   }
686 
687   bool isAISrcV2B16() const {
688     return isAISrcB16();
689   }
690 
691   bool isAISrcF32() const {
692     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
693   }
694 
695   bool isAISrcF16() const {
696     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
697   }
698 
699   bool isAISrcV2F16() const {
700     return isAISrcF16() || isAISrcB32();
701   }
702 
703   bool isAISrc_64B64() const {
704     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
705   }
706 
707   bool isAISrc_64F64() const {
708     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
709   }
710 
711   bool isAISrc_128B32() const {
712     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
713   }
714 
715   bool isAISrc_128B16() const {
716     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
717   }
718 
719   bool isAISrc_128V2B16() const {
720     return isAISrc_128B16();
721   }
722 
723   bool isAISrc_128F32() const {
724     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
725   }
726 
727   bool isAISrc_128F16() const {
728     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
729   }
730 
731   bool isAISrc_128V2F16() const {
732     return isAISrc_128F16() || isAISrc_128B32();
733   }
734 
735   bool isVISrc_128F16() const {
736     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
737   }
738 
739   bool isVISrc_128V2F16() const {
740     return isVISrc_128F16() || isVISrc_128B32();
741   }
742 
743   bool isAISrc_256B64() const {
744     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
745   }
746 
747   bool isAISrc_256F64() const {
748     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
749   }
750 
751   bool isAISrc_512B32() const {
752     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
753   }
754 
755   bool isAISrc_512B16() const {
756     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
757   }
758 
759   bool isAISrc_512V2B16() const {
760     return isAISrc_512B16();
761   }
762 
763   bool isAISrc_512F32() const {
764     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
765   }
766 
767   bool isAISrc_512F16() const {
768     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
769   }
770 
771   bool isAISrc_512V2F16() const {
772     return isAISrc_512F16() || isAISrc_512B32();
773   }
774 
775   bool isAISrc_1024B32() const {
776     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
777   }
778 
779   bool isAISrc_1024B16() const {
780     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
781   }
782 
783   bool isAISrc_1024V2B16() const {
784     return isAISrc_1024B16();
785   }
786 
787   bool isAISrc_1024F32() const {
788     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
789   }
790 
791   bool isAISrc_1024F16() const {
792     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
793   }
794 
795   bool isAISrc_1024V2F16() const {
796     return isAISrc_1024F16() || isAISrc_1024B32();
797   }
798 
799   bool isKImmFP32() const {
800     return isLiteralImm(MVT::f32);
801   }
802 
803   bool isKImmFP16() const {
804     return isLiteralImm(MVT::f16);
805   }
806 
807   bool isMem() const override {
808     return false;
809   }
810 
811   bool isExpr() const {
812     return Kind == Expression;
813   }
814 
815   bool isSoppBrTarget() const {
816     return isExpr() || isImm();
817   }
818 
819   bool isSWaitCnt() const;
820   bool isHwreg() const;
821   bool isSendMsg() const;
822   bool isSwizzle() const;
823   bool isSMRDOffset8() const;
824   bool isSMEMOffset() const;
825   bool isSMRDLiteralOffset() const;
826   bool isDPP8() const;
827   bool isDPPCtrl() const;
828   bool isBLGP() const;
829   bool isCBSZ() const;
830   bool isABID() const;
831   bool isGPRIdxMode() const;
832   bool isS16Imm() const;
833   bool isU16Imm() const;
834   bool isEndpgm() const;
835 
836   StringRef getExpressionAsToken() const {
837     assert(isExpr());
838     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
839     return S->getSymbol().getName();
840   }
841 
842   StringRef getToken() const {
843     assert(isToken());
844 
845     if (Kind == Expression)
846       return getExpressionAsToken();
847 
848     return StringRef(Tok.Data, Tok.Length);
849   }
850 
851   int64_t getImm() const {
852     assert(isImm());
853     return Imm.Val;
854   }
855 
856   void setImm(int64_t Val) {
857     assert(isImm());
858     Imm.Val = Val;
859   }
860 
861   ImmTy getImmTy() const {
862     assert(isImm());
863     return Imm.Type;
864   }
865 
866   unsigned getReg() const override {
867     assert(isRegKind());
868     return Reg.RegNo;
869   }
870 
871   SMLoc getStartLoc() const override {
872     return StartLoc;
873   }
874 
875   SMLoc getEndLoc() const override {
876     return EndLoc;
877   }
878 
879   SMRange getLocRange() const {
880     return SMRange(StartLoc, EndLoc);
881   }
882 
883   Modifiers getModifiers() const {
884     assert(isRegKind() || isImmTy(ImmTyNone));
885     return isRegKind() ? Reg.Mods : Imm.Mods;
886   }
887 
888   void setModifiers(Modifiers Mods) {
889     assert(isRegKind() || isImmTy(ImmTyNone));
890     if (isRegKind())
891       Reg.Mods = Mods;
892     else
893       Imm.Mods = Mods;
894   }
895 
896   bool hasModifiers() const {
897     return getModifiers().hasModifiers();
898   }
899 
900   bool hasFPModifiers() const {
901     return getModifiers().hasFPModifiers();
902   }
903 
904   bool hasIntModifiers() const {
905     return getModifiers().hasIntModifiers();
906   }
907 
908   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
909 
910   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
911 
912   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
913 
914   template <unsigned Bitwidth>
915   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
916 
917   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
918     addKImmFPOperands<16>(Inst, N);
919   }
920 
921   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
922     addKImmFPOperands<32>(Inst, N);
923   }
924 
925   void addRegOperands(MCInst &Inst, unsigned N) const;
926 
927   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
928     addRegOperands(Inst, N);
929   }
930 
931   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
932     if (isRegKind())
933       addRegOperands(Inst, N);
934     else if (isExpr())
935       Inst.addOperand(MCOperand::createExpr(Expr));
936     else
937       addImmOperands(Inst, N);
938   }
939 
940   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
941     Modifiers Mods = getModifiers();
942     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
943     if (isRegKind()) {
944       addRegOperands(Inst, N);
945     } else {
946       addImmOperands(Inst, N, false);
947     }
948   }
949 
950   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
951     assert(!hasIntModifiers());
952     addRegOrImmWithInputModsOperands(Inst, N);
953   }
954 
955   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
956     assert(!hasFPModifiers());
957     addRegOrImmWithInputModsOperands(Inst, N);
958   }
959 
960   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
961     Modifiers Mods = getModifiers();
962     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
963     assert(isRegKind());
964     addRegOperands(Inst, N);
965   }
966 
967   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
968     assert(!hasIntModifiers());
969     addRegWithInputModsOperands(Inst, N);
970   }
971 
972   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
973     assert(!hasFPModifiers());
974     addRegWithInputModsOperands(Inst, N);
975   }
976 
977   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
978     if (isImm())
979       addImmOperands(Inst, N);
980     else {
981       assert(isExpr());
982       Inst.addOperand(MCOperand::createExpr(Expr));
983     }
984   }
985 
986   static void printImmTy(raw_ostream& OS, ImmTy Type) {
987     switch (Type) {
988     case ImmTyNone: OS << "None"; break;
989     case ImmTyGDS: OS << "GDS"; break;
990     case ImmTyLDS: OS << "LDS"; break;
991     case ImmTyOffen: OS << "Offen"; break;
992     case ImmTyIdxen: OS << "Idxen"; break;
993     case ImmTyAddr64: OS << "Addr64"; break;
994     case ImmTyOffset: OS << "Offset"; break;
995     case ImmTyInstOffset: OS << "InstOffset"; break;
996     case ImmTyOffset0: OS << "Offset0"; break;
997     case ImmTyOffset1: OS << "Offset1"; break;
998     case ImmTyCPol: OS << "CPol"; break;
999     case ImmTySWZ: OS << "SWZ"; break;
1000     case ImmTyTFE: OS << "TFE"; break;
1001     case ImmTyD16: OS << "D16"; break;
1002     case ImmTyFORMAT: OS << "FORMAT"; break;
1003     case ImmTyClampSI: OS << "ClampSI"; break;
1004     case ImmTyOModSI: OS << "OModSI"; break;
1005     case ImmTyDPP8: OS << "DPP8"; break;
1006     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1007     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1008     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1009     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1010     case ImmTyDppFi: OS << "FI"; break;
1011     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1012     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1013     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1014     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1015     case ImmTyDMask: OS << "DMask"; break;
1016     case ImmTyDim: OS << "Dim"; break;
1017     case ImmTyUNorm: OS << "UNorm"; break;
1018     case ImmTyDA: OS << "DA"; break;
1019     case ImmTyR128A16: OS << "R128A16"; break;
1020     case ImmTyA16: OS << "A16"; break;
1021     case ImmTyLWE: OS << "LWE"; break;
1022     case ImmTyOff: OS << "Off"; break;
1023     case ImmTyExpTgt: OS << "ExpTgt"; break;
1024     case ImmTyExpCompr: OS << "ExpCompr"; break;
1025     case ImmTyExpVM: OS << "ExpVM"; break;
1026     case ImmTyHwreg: OS << "Hwreg"; break;
1027     case ImmTySendMsg: OS << "SendMsg"; break;
1028     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1029     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1030     case ImmTyAttrChan: OS << "AttrChan"; break;
1031     case ImmTyOpSel: OS << "OpSel"; break;
1032     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1033     case ImmTyNegLo: OS << "NegLo"; break;
1034     case ImmTyNegHi: OS << "NegHi"; break;
1035     case ImmTySwizzle: OS << "Swizzle"; break;
1036     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1037     case ImmTyHigh: OS << "High"; break;
1038     case ImmTyBLGP: OS << "BLGP"; break;
1039     case ImmTyCBSZ: OS << "CBSZ"; break;
1040     case ImmTyABID: OS << "ABID"; break;
1041     case ImmTyEndpgm: OS << "Endpgm"; break;
1042     }
1043   }
1044 
1045   void print(raw_ostream &OS) const override {
1046     switch (Kind) {
1047     case Register:
1048       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1049       break;
1050     case Immediate:
1051       OS << '<' << getImm();
1052       if (getImmTy() != ImmTyNone) {
1053         OS << " type: "; printImmTy(OS, getImmTy());
1054       }
1055       OS << " mods: " << Imm.Mods << '>';
1056       break;
1057     case Token:
1058       OS << '\'' << getToken() << '\'';
1059       break;
1060     case Expression:
1061       OS << "<expr " << *Expr << '>';
1062       break;
1063     }
1064   }
1065 
1066   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1067                                       int64_t Val, SMLoc Loc,
1068                                       ImmTy Type = ImmTyNone,
1069                                       bool IsFPImm = false) {
1070     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1071     Op->Imm.Val = Val;
1072     Op->Imm.IsFPImm = IsFPImm;
1073     Op->Imm.Kind = ImmKindTyNone;
1074     Op->Imm.Type = Type;
1075     Op->Imm.Mods = Modifiers();
1076     Op->StartLoc = Loc;
1077     Op->EndLoc = Loc;
1078     return Op;
1079   }
1080 
1081   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1082                                         StringRef Str, SMLoc Loc,
1083                                         bool HasExplicitEncodingSize = true) {
1084     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1085     Res->Tok.Data = Str.data();
1086     Res->Tok.Length = Str.size();
1087     Res->StartLoc = Loc;
1088     Res->EndLoc = Loc;
1089     return Res;
1090   }
1091 
1092   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1093                                       unsigned RegNo, SMLoc S,
1094                                       SMLoc E) {
1095     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1096     Op->Reg.RegNo = RegNo;
1097     Op->Reg.Mods = Modifiers();
1098     Op->StartLoc = S;
1099     Op->EndLoc = E;
1100     return Op;
1101   }
1102 
1103   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1104                                        const class MCExpr *Expr, SMLoc S) {
1105     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1106     Op->Expr = Expr;
1107     Op->StartLoc = S;
1108     Op->EndLoc = S;
1109     return Op;
1110   }
1111 };
1112 
1113 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1114   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1115   return OS;
1116 }
1117 
1118 //===----------------------------------------------------------------------===//
1119 // AsmParser
1120 //===----------------------------------------------------------------------===//
1121 
1122 // Holds info related to the current kernel, e.g. count of SGPRs used.
1123 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1124 // .amdgpu_hsa_kernel or at EOF.
1125 class KernelScopeInfo {
1126   int SgprIndexUnusedMin = -1;
1127   int VgprIndexUnusedMin = -1;
1128   int AgprIndexUnusedMin = -1;
1129   MCContext *Ctx = nullptr;
1130   MCSubtargetInfo const *MSTI = nullptr;
1131 
1132   void usesSgprAt(int i) {
1133     if (i >= SgprIndexUnusedMin) {
1134       SgprIndexUnusedMin = ++i;
1135       if (Ctx) {
1136         MCSymbol* const Sym =
1137           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1138         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1139       }
1140     }
1141   }
1142 
1143   void usesVgprAt(int i) {
1144     if (i >= VgprIndexUnusedMin) {
1145       VgprIndexUnusedMin = ++i;
1146       if (Ctx) {
1147         MCSymbol* const Sym =
1148           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1149         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1150                                          VgprIndexUnusedMin);
1151         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1152       }
1153     }
1154   }
1155 
1156   void usesAgprAt(int i) {
1157     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1158     if (!hasMAIInsts(*MSTI))
1159       return;
1160 
1161     if (i >= AgprIndexUnusedMin) {
1162       AgprIndexUnusedMin = ++i;
1163       if (Ctx) {
1164         MCSymbol* const Sym =
1165           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1166         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1167 
1168         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1169         MCSymbol* const vSym =
1170           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1171         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1172                                          VgprIndexUnusedMin);
1173         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1174       }
1175     }
1176   }
1177 
1178 public:
1179   KernelScopeInfo() = default;
1180 
1181   void initialize(MCContext &Context) {
1182     Ctx = &Context;
1183     MSTI = Ctx->getSubtargetInfo();
1184 
1185     usesSgprAt(SgprIndexUnusedMin = -1);
1186     usesVgprAt(VgprIndexUnusedMin = -1);
1187     if (hasMAIInsts(*MSTI)) {
1188       usesAgprAt(AgprIndexUnusedMin = -1);
1189     }
1190   }
1191 
1192   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1193     switch (RegKind) {
1194       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1195       case IS_AGPR: usesAgprAt(DwordRegIndex + RegWidth - 1); break;
1196       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1197       default: break;
1198     }
1199   }
1200 };
1201 
1202 class AMDGPUAsmParser : public MCTargetAsmParser {
1203   MCAsmParser &Parser;
1204 
1205   // Number of extra operands parsed after the first optional operand.
1206   // This may be necessary to skip hardcoded mandatory operands.
1207   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1208 
1209   unsigned ForcedEncodingSize = 0;
1210   bool ForcedDPP = false;
1211   bool ForcedSDWA = false;
1212   KernelScopeInfo KernelScope;
1213   unsigned CPolSeen;
1214 
1215   /// @name Auto-generated Match Functions
1216   /// {
1217 
1218 #define GET_ASSEMBLER_HEADER
1219 #include "AMDGPUGenAsmMatcher.inc"
1220 
1221   /// }
1222 
1223 private:
1224   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1225   bool OutOfRangeError(SMRange Range);
1226   /// Calculate VGPR/SGPR blocks required for given target, reserved
1227   /// registers, and user-specified NextFreeXGPR values.
1228   ///
1229   /// \param Features [in] Target features, used for bug corrections.
1230   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1231   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1232   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1233   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1234   /// descriptor field, if valid.
1235   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1236   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1237   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1238   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1239   /// \param VGPRBlocks [out] Result VGPR block count.
1240   /// \param SGPRBlocks [out] Result SGPR block count.
1241   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1242                           bool FlatScrUsed, bool XNACKUsed,
1243                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1244                           SMRange VGPRRange, unsigned NextFreeSGPR,
1245                           SMRange SGPRRange, unsigned &VGPRBlocks,
1246                           unsigned &SGPRBlocks);
1247   bool ParseDirectiveAMDGCNTarget();
1248   bool ParseDirectiveAMDHSAKernel();
1249   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1250   bool ParseDirectiveHSACodeObjectVersion();
1251   bool ParseDirectiveHSACodeObjectISA();
1252   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1253   bool ParseDirectiveAMDKernelCodeT();
1254   // TODO: Possibly make subtargetHasRegister const.
1255   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1256   bool ParseDirectiveAMDGPUHsaKernel();
1257 
1258   bool ParseDirectiveISAVersion();
1259   bool ParseDirectiveHSAMetadata();
1260   bool ParseDirectivePALMetadataBegin();
1261   bool ParseDirectivePALMetadata();
1262   bool ParseDirectiveAMDGPULDS();
1263 
1264   /// Common code to parse out a block of text (typically YAML) between start and
1265   /// end directives.
1266   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1267                            const char *AssemblerDirectiveEnd,
1268                            std::string &CollectString);
1269 
1270   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1271                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1272   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1273                            unsigned &RegNum, unsigned &RegWidth,
1274                            bool RestoreOnFailure = false);
1275   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1276                            unsigned &RegNum, unsigned &RegWidth,
1277                            SmallVectorImpl<AsmToken> &Tokens);
1278   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1279                            unsigned &RegWidth,
1280                            SmallVectorImpl<AsmToken> &Tokens);
1281   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1282                            unsigned &RegWidth,
1283                            SmallVectorImpl<AsmToken> &Tokens);
1284   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1285                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1286   bool ParseRegRange(unsigned& Num, unsigned& Width);
1287   unsigned getRegularReg(RegisterKind RegKind,
1288                          unsigned RegNum,
1289                          unsigned RegWidth,
1290                          SMLoc Loc);
1291 
1292   bool isRegister();
1293   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1294   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1295   void initializeGprCountSymbol(RegisterKind RegKind);
1296   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1297                              unsigned RegWidth);
1298   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1299                     bool IsAtomic, bool IsLds = false);
1300   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1301                  bool IsGdsHardcoded);
1302 
1303 public:
1304   enum AMDGPUMatchResultTy {
1305     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1306   };
1307   enum OperandMode {
1308     OperandMode_Default,
1309     OperandMode_NSA,
1310   };
1311 
1312   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1313 
1314   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1315                const MCInstrInfo &MII,
1316                const MCTargetOptions &Options)
1317       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1318     MCAsmParserExtension::Initialize(Parser);
1319 
1320     if (getFeatureBits().none()) {
1321       // Set default features.
1322       copySTI().ToggleFeature("southern-islands");
1323     }
1324 
1325     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1326 
1327     {
1328       // TODO: make those pre-defined variables read-only.
1329       // Currently there is none suitable machinery in the core llvm-mc for this.
1330       // MCSymbol::isRedefinable is intended for another purpose, and
1331       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1332       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1333       MCContext &Ctx = getContext();
1334       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1335         MCSymbol *Sym =
1336             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1337         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1338         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1339         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1340         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1341         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1342       } else {
1343         MCSymbol *Sym =
1344             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1345         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1346         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1347         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1348         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1349         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1350       }
1351       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1352         initializeGprCountSymbol(IS_VGPR);
1353         initializeGprCountSymbol(IS_SGPR);
1354       } else
1355         KernelScope.initialize(getContext());
1356     }
1357   }
1358 
1359   bool hasMIMG_R128() const {
1360     return AMDGPU::hasMIMG_R128(getSTI());
1361   }
1362 
1363   bool hasPackedD16() const {
1364     return AMDGPU::hasPackedD16(getSTI());
1365   }
1366 
1367   bool hasGFX10A16() const {
1368     return AMDGPU::hasGFX10A16(getSTI());
1369   }
1370 
1371   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1372 
1373   bool isSI() const {
1374     return AMDGPU::isSI(getSTI());
1375   }
1376 
1377   bool isCI() const {
1378     return AMDGPU::isCI(getSTI());
1379   }
1380 
1381   bool isVI() const {
1382     return AMDGPU::isVI(getSTI());
1383   }
1384 
1385   bool isGFX9() const {
1386     return AMDGPU::isGFX9(getSTI());
1387   }
1388 
1389   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1390   bool isGFX90A() const {
1391     return AMDGPU::isGFX90A(getSTI());
1392   }
1393 
1394   bool isGFX940() const {
1395     return AMDGPU::isGFX940(getSTI());
1396   }
1397 
1398   bool isGFX9Plus() const {
1399     return AMDGPU::isGFX9Plus(getSTI());
1400   }
1401 
1402   bool isGFX10() const {
1403     return AMDGPU::isGFX10(getSTI());
1404   }
1405 
1406   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1407 
1408   bool isGFX10_BEncoding() const {
1409     return AMDGPU::isGFX10_BEncoding(getSTI());
1410   }
1411 
1412   bool hasInv2PiInlineImm() const {
1413     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1414   }
1415 
1416   bool hasFlatOffsets() const {
1417     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1418   }
1419 
1420   bool hasArchitectedFlatScratch() const {
1421     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1422   }
1423 
1424   bool hasSGPR102_SGPR103() const {
1425     return !isVI() && !isGFX9();
1426   }
1427 
1428   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1429 
1430   bool hasIntClamp() const {
1431     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1432   }
1433 
1434   AMDGPUTargetStreamer &getTargetStreamer() {
1435     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1436     return static_cast<AMDGPUTargetStreamer &>(TS);
1437   }
1438 
1439   const MCRegisterInfo *getMRI() const {
1440     // We need this const_cast because for some reason getContext() is not const
1441     // in MCAsmParser.
1442     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1443   }
1444 
1445   const MCInstrInfo *getMII() const {
1446     return &MII;
1447   }
1448 
1449   const FeatureBitset &getFeatureBits() const {
1450     return getSTI().getFeatureBits();
1451   }
1452 
1453   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1454   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1455   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1456 
1457   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1458   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1459   bool isForcedDPP() const { return ForcedDPP; }
1460   bool isForcedSDWA() const { return ForcedSDWA; }
1461   ArrayRef<unsigned> getMatchedVariants() const;
1462   StringRef getMatchedVariantName() const;
1463 
1464   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1465   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1466                      bool RestoreOnFailure);
1467   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1468   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1469                                         SMLoc &EndLoc) override;
1470   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1471   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1472                                       unsigned Kind) override;
1473   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1474                                OperandVector &Operands, MCStreamer &Out,
1475                                uint64_t &ErrorInfo,
1476                                bool MatchingInlineAsm) override;
1477   bool ParseDirective(AsmToken DirectiveID) override;
1478   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1479                                     OperandMode Mode = OperandMode_Default);
1480   StringRef parseMnemonicSuffix(StringRef Name);
1481   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1482                         SMLoc NameLoc, OperandVector &Operands) override;
1483   //bool ProcessInstruction(MCInst &Inst);
1484 
1485   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1486 
1487   OperandMatchResultTy
1488   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1489                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1490                      bool (*ConvertResult)(int64_t &) = nullptr);
1491 
1492   OperandMatchResultTy
1493   parseOperandArrayWithPrefix(const char *Prefix,
1494                               OperandVector &Operands,
1495                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1496                               bool (*ConvertResult)(int64_t&) = nullptr);
1497 
1498   OperandMatchResultTy
1499   parseNamedBit(StringRef Name, OperandVector &Operands,
1500                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1501   OperandMatchResultTy parseCPol(OperandVector &Operands);
1502   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1503                                              StringRef &Value,
1504                                              SMLoc &StringLoc);
1505 
1506   bool isModifier();
1507   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1508   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1509   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1510   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1511   bool parseSP3NegModifier();
1512   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1513   OperandMatchResultTy parseReg(OperandVector &Operands);
1514   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1515   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1516   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1517   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1518   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1519   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1520   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1521   OperandMatchResultTy parseUfmt(int64_t &Format);
1522   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1523   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1524   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1525   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1526   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1527   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1528   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1529 
1530   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1531   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1532   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1533   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1534 
1535   bool parseCnt(int64_t &IntVal);
1536   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1537   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1538 
1539 private:
1540   struct OperandInfoTy {
1541     SMLoc Loc;
1542     int64_t Id;
1543     bool IsSymbolic = false;
1544     bool IsDefined = false;
1545 
1546     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1547   };
1548 
1549   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1550   bool validateSendMsg(const OperandInfoTy &Msg,
1551                        const OperandInfoTy &Op,
1552                        const OperandInfoTy &Stream);
1553 
1554   bool parseHwregBody(OperandInfoTy &HwReg,
1555                       OperandInfoTy &Offset,
1556                       OperandInfoTy &Width);
1557   bool validateHwreg(const OperandInfoTy &HwReg,
1558                      const OperandInfoTy &Offset,
1559                      const OperandInfoTy &Width);
1560 
1561   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1562   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1563 
1564   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1565                       const OperandVector &Operands) const;
1566   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1567   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1568   SMLoc getLitLoc(const OperandVector &Operands) const;
1569   SMLoc getConstLoc(const OperandVector &Operands) const;
1570 
1571   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1572   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1573   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1574   bool validateSOPLiteral(const MCInst &Inst) const;
1575   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1576   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1577   bool validateIntClampSupported(const MCInst &Inst);
1578   bool validateMIMGAtomicDMask(const MCInst &Inst);
1579   bool validateMIMGGatherDMask(const MCInst &Inst);
1580   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1581   bool validateMIMGDataSize(const MCInst &Inst);
1582   bool validateMIMGAddrSize(const MCInst &Inst);
1583   bool validateMIMGD16(const MCInst &Inst);
1584   bool validateMIMGDim(const MCInst &Inst);
1585   bool validateMIMGMSAA(const MCInst &Inst);
1586   bool validateOpSel(const MCInst &Inst);
1587   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1588   bool validateVccOperand(unsigned Reg) const;
1589   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1590   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1591   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1592   bool validateAGPRLdSt(const MCInst &Inst) const;
1593   bool validateVGPRAlign(const MCInst &Inst) const;
1594   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1595   bool validateDivScale(const MCInst &Inst);
1596   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1597                              const SMLoc &IDLoc);
1598   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1599   unsigned getConstantBusLimit(unsigned Opcode) const;
1600   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1601   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1602   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1603 
1604   bool isSupportedMnemo(StringRef Mnemo,
1605                         const FeatureBitset &FBS);
1606   bool isSupportedMnemo(StringRef Mnemo,
1607                         const FeatureBitset &FBS,
1608                         ArrayRef<unsigned> Variants);
1609   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1610 
1611   bool isId(const StringRef Id) const;
1612   bool isId(const AsmToken &Token, const StringRef Id) const;
1613   bool isToken(const AsmToken::TokenKind Kind) const;
1614   bool trySkipId(const StringRef Id);
1615   bool trySkipId(const StringRef Pref, const StringRef Id);
1616   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1617   bool trySkipToken(const AsmToken::TokenKind Kind);
1618   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1619   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1620   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1621 
1622   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1623   AsmToken::TokenKind getTokenKind() const;
1624   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1625   bool parseExpr(OperandVector &Operands);
1626   StringRef getTokenStr() const;
1627   AsmToken peekToken();
1628   AsmToken getToken() const;
1629   SMLoc getLoc() const;
1630   void lex();
1631 
1632 public:
1633   void onBeginOfFile() override;
1634 
1635   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1636   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1637 
1638   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1639   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1640   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1641   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1642   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1643   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1644 
1645   bool parseSwizzleOperand(int64_t &Op,
1646                            const unsigned MinVal,
1647                            const unsigned MaxVal,
1648                            const StringRef ErrMsg,
1649                            SMLoc &Loc);
1650   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1651                             const unsigned MinVal,
1652                             const unsigned MaxVal,
1653                             const StringRef ErrMsg);
1654   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1655   bool parseSwizzleOffset(int64_t &Imm);
1656   bool parseSwizzleMacro(int64_t &Imm);
1657   bool parseSwizzleQuadPerm(int64_t &Imm);
1658   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1659   bool parseSwizzleBroadcast(int64_t &Imm);
1660   bool parseSwizzleSwap(int64_t &Imm);
1661   bool parseSwizzleReverse(int64_t &Imm);
1662 
1663   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1664   int64_t parseGPRIdxMacro();
1665 
1666   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1667   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1668   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1669   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1670 
1671   AMDGPUOperand::Ptr defaultCPol() const;
1672 
1673   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1674   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1675   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1676   AMDGPUOperand::Ptr defaultFlatOffset() const;
1677 
1678   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1679 
1680   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1681                OptionalImmIndexMap &OptionalIdx);
1682   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1683   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1684   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1685   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1686                 OptionalImmIndexMap &OptionalIdx);
1687 
1688   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1689 
1690   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1691                bool IsAtomic = false);
1692   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1693   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1694 
1695   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1696 
1697   bool parseDimId(unsigned &Encoding);
1698   OperandMatchResultTy parseDim(OperandVector &Operands);
1699   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1700   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1701   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1702   int64_t parseDPPCtrlSel(StringRef Ctrl);
1703   int64_t parseDPPCtrlPerm();
1704   AMDGPUOperand::Ptr defaultRowMask() const;
1705   AMDGPUOperand::Ptr defaultBankMask() const;
1706   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1707   AMDGPUOperand::Ptr defaultFI() const;
1708   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1709   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1710 
1711   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1712                                     AMDGPUOperand::ImmTy Type);
1713   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1714   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1715   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1716   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1717   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1718   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1719   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1720                uint64_t BasicInstType,
1721                bool SkipDstVcc = false,
1722                bool SkipSrcVcc = false);
1723 
1724   AMDGPUOperand::Ptr defaultBLGP() const;
1725   AMDGPUOperand::Ptr defaultCBSZ() const;
1726   AMDGPUOperand::Ptr defaultABID() const;
1727 
1728   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1729   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1730 };
1731 
1732 struct OptionalOperand {
1733   const char *Name;
1734   AMDGPUOperand::ImmTy Type;
1735   bool IsBit;
1736   bool (*ConvertResult)(int64_t&);
1737 };
1738 
1739 } // end anonymous namespace
1740 
1741 // May be called with integer type with equivalent bitwidth.
1742 static const fltSemantics *getFltSemantics(unsigned Size) {
1743   switch (Size) {
1744   case 4:
1745     return &APFloat::IEEEsingle();
1746   case 8:
1747     return &APFloat::IEEEdouble();
1748   case 2:
1749     return &APFloat::IEEEhalf();
1750   default:
1751     llvm_unreachable("unsupported fp type");
1752   }
1753 }
1754 
1755 static const fltSemantics *getFltSemantics(MVT VT) {
1756   return getFltSemantics(VT.getSizeInBits() / 8);
1757 }
1758 
1759 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1760   switch (OperandType) {
1761   case AMDGPU::OPERAND_REG_IMM_INT32:
1762   case AMDGPU::OPERAND_REG_IMM_FP32:
1763   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1764   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1765   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1766   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1767   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1768   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1769   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1770   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1771   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1772   case AMDGPU::OPERAND_KIMM32:
1773     return &APFloat::IEEEsingle();
1774   case AMDGPU::OPERAND_REG_IMM_INT64:
1775   case AMDGPU::OPERAND_REG_IMM_FP64:
1776   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1777   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1778   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1779     return &APFloat::IEEEdouble();
1780   case AMDGPU::OPERAND_REG_IMM_INT16:
1781   case AMDGPU::OPERAND_REG_IMM_FP16:
1782   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1783   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1784   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1785   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1786   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1787   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1788   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1789   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1790   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1791   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1792   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1793   case AMDGPU::OPERAND_KIMM16:
1794     return &APFloat::IEEEhalf();
1795   default:
1796     llvm_unreachable("unsupported fp type");
1797   }
1798 }
1799 
1800 //===----------------------------------------------------------------------===//
1801 // Operand
1802 //===----------------------------------------------------------------------===//
1803 
1804 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1805   bool Lost;
1806 
1807   // Convert literal to single precision
1808   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1809                                                APFloat::rmNearestTiesToEven,
1810                                                &Lost);
1811   // We allow precision lost but not overflow or underflow
1812   if (Status != APFloat::opOK &&
1813       Lost &&
1814       ((Status & APFloat::opOverflow)  != 0 ||
1815        (Status & APFloat::opUnderflow) != 0)) {
1816     return false;
1817   }
1818 
1819   return true;
1820 }
1821 
1822 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1823   return isUIntN(Size, Val) || isIntN(Size, Val);
1824 }
1825 
1826 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1827   if (VT.getScalarType() == MVT::i16) {
1828     // FP immediate values are broken.
1829     return isInlinableIntLiteral(Val);
1830   }
1831 
1832   // f16/v2f16 operands work correctly for all values.
1833   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1834 }
1835 
1836 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1837 
1838   // This is a hack to enable named inline values like
1839   // shared_base with both 32-bit and 64-bit operands.
1840   // Note that these values are defined as
1841   // 32-bit operands only.
1842   if (isInlineValue()) {
1843     return true;
1844   }
1845 
1846   if (!isImmTy(ImmTyNone)) {
1847     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1848     return false;
1849   }
1850   // TODO: We should avoid using host float here. It would be better to
1851   // check the float bit values which is what a few other places do.
1852   // We've had bot failures before due to weird NaN support on mips hosts.
1853 
1854   APInt Literal(64, Imm.Val);
1855 
1856   if (Imm.IsFPImm) { // We got fp literal token
1857     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1858       return AMDGPU::isInlinableLiteral64(Imm.Val,
1859                                           AsmParser->hasInv2PiInlineImm());
1860     }
1861 
1862     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1863     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1864       return false;
1865 
1866     if (type.getScalarSizeInBits() == 16) {
1867       return isInlineableLiteralOp16(
1868         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1869         type, AsmParser->hasInv2PiInlineImm());
1870     }
1871 
1872     // Check if single precision literal is inlinable
1873     return AMDGPU::isInlinableLiteral32(
1874       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1875       AsmParser->hasInv2PiInlineImm());
1876   }
1877 
1878   // We got int literal token.
1879   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1880     return AMDGPU::isInlinableLiteral64(Imm.Val,
1881                                         AsmParser->hasInv2PiInlineImm());
1882   }
1883 
1884   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1885     return false;
1886   }
1887 
1888   if (type.getScalarSizeInBits() == 16) {
1889     return isInlineableLiteralOp16(
1890       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1891       type, AsmParser->hasInv2PiInlineImm());
1892   }
1893 
1894   return AMDGPU::isInlinableLiteral32(
1895     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1896     AsmParser->hasInv2PiInlineImm());
1897 }
1898 
1899 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1900   // Check that this immediate can be added as literal
1901   if (!isImmTy(ImmTyNone)) {
1902     return false;
1903   }
1904 
1905   if (!Imm.IsFPImm) {
1906     // We got int literal token.
1907 
1908     if (type == MVT::f64 && hasFPModifiers()) {
1909       // Cannot apply fp modifiers to int literals preserving the same semantics
1910       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1911       // disable these cases.
1912       return false;
1913     }
1914 
1915     unsigned Size = type.getSizeInBits();
1916     if (Size == 64)
1917       Size = 32;
1918 
1919     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1920     // types.
1921     return isSafeTruncation(Imm.Val, Size);
1922   }
1923 
1924   // We got fp literal token
1925   if (type == MVT::f64) { // Expected 64-bit fp operand
1926     // We would set low 64-bits of literal to zeroes but we accept this literals
1927     return true;
1928   }
1929 
1930   if (type == MVT::i64) { // Expected 64-bit int operand
1931     // We don't allow fp literals in 64-bit integer instructions. It is
1932     // unclear how we should encode them.
1933     return false;
1934   }
1935 
1936   // We allow fp literals with f16x2 operands assuming that the specified
1937   // literal goes into the lower half and the upper half is zero. We also
1938   // require that the literal may be losslessly converted to f16.
1939   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1940                      (type == MVT::v2i16)? MVT::i16 :
1941                      (type == MVT::v2f32)? MVT::f32 : type;
1942 
1943   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1944   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1945 }
1946 
1947 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1948   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1949 }
1950 
1951 bool AMDGPUOperand::isVRegWithInputMods() const {
1952   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1953          // GFX90A allows DPP on 64-bit operands.
1954          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1955           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1956 }
1957 
1958 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1959   if (AsmParser->isVI())
1960     return isVReg32();
1961   else if (AsmParser->isGFX9Plus())
1962     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1963   else
1964     return false;
1965 }
1966 
1967 bool AMDGPUOperand::isSDWAFP16Operand() const {
1968   return isSDWAOperand(MVT::f16);
1969 }
1970 
1971 bool AMDGPUOperand::isSDWAFP32Operand() const {
1972   return isSDWAOperand(MVT::f32);
1973 }
1974 
1975 bool AMDGPUOperand::isSDWAInt16Operand() const {
1976   return isSDWAOperand(MVT::i16);
1977 }
1978 
1979 bool AMDGPUOperand::isSDWAInt32Operand() const {
1980   return isSDWAOperand(MVT::i32);
1981 }
1982 
1983 bool AMDGPUOperand::isBoolReg() const {
1984   auto FB = AsmParser->getFeatureBits();
1985   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1986                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1987 }
1988 
1989 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1990 {
1991   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1992   assert(Size == 2 || Size == 4 || Size == 8);
1993 
1994   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1995 
1996   if (Imm.Mods.Abs) {
1997     Val &= ~FpSignMask;
1998   }
1999   if (Imm.Mods.Neg) {
2000     Val ^= FpSignMask;
2001   }
2002 
2003   return Val;
2004 }
2005 
2006 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2007   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2008                              Inst.getNumOperands())) {
2009     addLiteralImmOperand(Inst, Imm.Val,
2010                          ApplyModifiers &
2011                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2012   } else {
2013     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2014     Inst.addOperand(MCOperand::createImm(Imm.Val));
2015     setImmKindNone();
2016   }
2017 }
2018 
2019 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2020   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2021   auto OpNum = Inst.getNumOperands();
2022   // Check that this operand accepts literals
2023   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2024 
2025   if (ApplyModifiers) {
2026     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2027     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2028     Val = applyInputFPModifiers(Val, Size);
2029   }
2030 
2031   APInt Literal(64, Val);
2032   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2033 
2034   if (Imm.IsFPImm) { // We got fp literal token
2035     switch (OpTy) {
2036     case AMDGPU::OPERAND_REG_IMM_INT64:
2037     case AMDGPU::OPERAND_REG_IMM_FP64:
2038     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2039     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2040     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2041       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2042                                        AsmParser->hasInv2PiInlineImm())) {
2043         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2044         setImmKindConst();
2045         return;
2046       }
2047 
2048       // Non-inlineable
2049       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2050         // For fp operands we check if low 32 bits are zeros
2051         if (Literal.getLoBits(32) != 0) {
2052           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2053           "Can't encode literal as exact 64-bit floating-point operand. "
2054           "Low 32-bits will be set to zero");
2055         }
2056 
2057         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2058         setImmKindLiteral();
2059         return;
2060       }
2061 
2062       // We don't allow fp literals in 64-bit integer instructions. It is
2063       // unclear how we should encode them. This case should be checked earlier
2064       // in predicate methods (isLiteralImm())
2065       llvm_unreachable("fp literal in 64-bit integer instruction.");
2066 
2067     case AMDGPU::OPERAND_REG_IMM_INT32:
2068     case AMDGPU::OPERAND_REG_IMM_FP32:
2069     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2070     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2071     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2072     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2073     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2074     case AMDGPU::OPERAND_REG_IMM_INT16:
2075     case AMDGPU::OPERAND_REG_IMM_FP16:
2076     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2077     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2078     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2079     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2080     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2081     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2082     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2083     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2084     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2085     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2086     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2087     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2088     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2089     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2090     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2091     case AMDGPU::OPERAND_KIMM32:
2092     case AMDGPU::OPERAND_KIMM16: {
2093       bool lost;
2094       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2095       // Convert literal to single precision
2096       FPLiteral.convert(*getOpFltSemantics(OpTy),
2097                         APFloat::rmNearestTiesToEven, &lost);
2098       // We allow precision lost but not overflow or underflow. This should be
2099       // checked earlier in isLiteralImm()
2100 
2101       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2102       Inst.addOperand(MCOperand::createImm(ImmVal));
2103       setImmKindLiteral();
2104       return;
2105     }
2106     default:
2107       llvm_unreachable("invalid operand size");
2108     }
2109 
2110     return;
2111   }
2112 
2113   // We got int literal token.
2114   // Only sign extend inline immediates.
2115   switch (OpTy) {
2116   case AMDGPU::OPERAND_REG_IMM_INT32:
2117   case AMDGPU::OPERAND_REG_IMM_FP32:
2118   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2119   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2120   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2121   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2122   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2123   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2124   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2125   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2126   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2127   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2128   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2129     if (isSafeTruncation(Val, 32) &&
2130         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2131                                      AsmParser->hasInv2PiInlineImm())) {
2132       Inst.addOperand(MCOperand::createImm(Val));
2133       setImmKindConst();
2134       return;
2135     }
2136 
2137     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2138     setImmKindLiteral();
2139     return;
2140 
2141   case AMDGPU::OPERAND_REG_IMM_INT64:
2142   case AMDGPU::OPERAND_REG_IMM_FP64:
2143   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2144   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2145   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2146     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2147       Inst.addOperand(MCOperand::createImm(Val));
2148       setImmKindConst();
2149       return;
2150     }
2151 
2152     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2153     setImmKindLiteral();
2154     return;
2155 
2156   case AMDGPU::OPERAND_REG_IMM_INT16:
2157   case AMDGPU::OPERAND_REG_IMM_FP16:
2158   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2159   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2160   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2161   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2162   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2163     if (isSafeTruncation(Val, 16) &&
2164         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2165                                      AsmParser->hasInv2PiInlineImm())) {
2166       Inst.addOperand(MCOperand::createImm(Val));
2167       setImmKindConst();
2168       return;
2169     }
2170 
2171     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2172     setImmKindLiteral();
2173     return;
2174 
2175   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2176   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2177   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2178   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2179     assert(isSafeTruncation(Val, 16));
2180     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2181                                         AsmParser->hasInv2PiInlineImm()));
2182 
2183     Inst.addOperand(MCOperand::createImm(Val));
2184     return;
2185   }
2186   case AMDGPU::OPERAND_KIMM32:
2187     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2188     setImmKindNone();
2189     return;
2190   case AMDGPU::OPERAND_KIMM16:
2191     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2192     setImmKindNone();
2193     return;
2194   default:
2195     llvm_unreachable("invalid operand size");
2196   }
2197 }
2198 
2199 template <unsigned Bitwidth>
2200 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2201   APInt Literal(64, Imm.Val);
2202   setImmKindNone();
2203 
2204   if (!Imm.IsFPImm) {
2205     // We got int literal token.
2206     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2207     return;
2208   }
2209 
2210   bool Lost;
2211   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2212   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2213                     APFloat::rmNearestTiesToEven, &Lost);
2214   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2215 }
2216 
2217 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2218   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2219 }
2220 
2221 static bool isInlineValue(unsigned Reg) {
2222   switch (Reg) {
2223   case AMDGPU::SRC_SHARED_BASE:
2224   case AMDGPU::SRC_SHARED_LIMIT:
2225   case AMDGPU::SRC_PRIVATE_BASE:
2226   case AMDGPU::SRC_PRIVATE_LIMIT:
2227   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2228     return true;
2229   case AMDGPU::SRC_VCCZ:
2230   case AMDGPU::SRC_EXECZ:
2231   case AMDGPU::SRC_SCC:
2232     return true;
2233   case AMDGPU::SGPR_NULL:
2234     return true;
2235   default:
2236     return false;
2237   }
2238 }
2239 
2240 bool AMDGPUOperand::isInlineValue() const {
2241   return isRegKind() && ::isInlineValue(getReg());
2242 }
2243 
2244 //===----------------------------------------------------------------------===//
2245 // AsmParser
2246 //===----------------------------------------------------------------------===//
2247 
2248 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2249   if (Is == IS_VGPR) {
2250     switch (RegWidth) {
2251       default: return -1;
2252       case 1: return AMDGPU::VGPR_32RegClassID;
2253       case 2: return AMDGPU::VReg_64RegClassID;
2254       case 3: return AMDGPU::VReg_96RegClassID;
2255       case 4: return AMDGPU::VReg_128RegClassID;
2256       case 5: return AMDGPU::VReg_160RegClassID;
2257       case 6: return AMDGPU::VReg_192RegClassID;
2258       case 7: return AMDGPU::VReg_224RegClassID;
2259       case 8: return AMDGPU::VReg_256RegClassID;
2260       case 16: return AMDGPU::VReg_512RegClassID;
2261       case 32: return AMDGPU::VReg_1024RegClassID;
2262     }
2263   } else if (Is == IS_TTMP) {
2264     switch (RegWidth) {
2265       default: return -1;
2266       case 1: return AMDGPU::TTMP_32RegClassID;
2267       case 2: return AMDGPU::TTMP_64RegClassID;
2268       case 4: return AMDGPU::TTMP_128RegClassID;
2269       case 8: return AMDGPU::TTMP_256RegClassID;
2270       case 16: return AMDGPU::TTMP_512RegClassID;
2271     }
2272   } else if (Is == IS_SGPR) {
2273     switch (RegWidth) {
2274       default: return -1;
2275       case 1: return AMDGPU::SGPR_32RegClassID;
2276       case 2: return AMDGPU::SGPR_64RegClassID;
2277       case 3: return AMDGPU::SGPR_96RegClassID;
2278       case 4: return AMDGPU::SGPR_128RegClassID;
2279       case 5: return AMDGPU::SGPR_160RegClassID;
2280       case 6: return AMDGPU::SGPR_192RegClassID;
2281       case 7: return AMDGPU::SGPR_224RegClassID;
2282       case 8: return AMDGPU::SGPR_256RegClassID;
2283       case 16: return AMDGPU::SGPR_512RegClassID;
2284     }
2285   } else if (Is == IS_AGPR) {
2286     switch (RegWidth) {
2287       default: return -1;
2288       case 1: return AMDGPU::AGPR_32RegClassID;
2289       case 2: return AMDGPU::AReg_64RegClassID;
2290       case 3: return AMDGPU::AReg_96RegClassID;
2291       case 4: return AMDGPU::AReg_128RegClassID;
2292       case 5: return AMDGPU::AReg_160RegClassID;
2293       case 6: return AMDGPU::AReg_192RegClassID;
2294       case 7: return AMDGPU::AReg_224RegClassID;
2295       case 8: return AMDGPU::AReg_256RegClassID;
2296       case 16: return AMDGPU::AReg_512RegClassID;
2297       case 32: return AMDGPU::AReg_1024RegClassID;
2298     }
2299   }
2300   return -1;
2301 }
2302 
2303 static unsigned getSpecialRegForName(StringRef RegName) {
2304   return StringSwitch<unsigned>(RegName)
2305     .Case("exec", AMDGPU::EXEC)
2306     .Case("vcc", AMDGPU::VCC)
2307     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2308     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2309     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2310     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2311     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2312     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2313     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2314     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2315     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2316     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2317     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2318     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2319     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2320     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2321     .Case("m0", AMDGPU::M0)
2322     .Case("vccz", AMDGPU::SRC_VCCZ)
2323     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2324     .Case("execz", AMDGPU::SRC_EXECZ)
2325     .Case("src_execz", AMDGPU::SRC_EXECZ)
2326     .Case("scc", AMDGPU::SRC_SCC)
2327     .Case("src_scc", AMDGPU::SRC_SCC)
2328     .Case("tba", AMDGPU::TBA)
2329     .Case("tma", AMDGPU::TMA)
2330     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2331     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2332     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2333     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2334     .Case("vcc_lo", AMDGPU::VCC_LO)
2335     .Case("vcc_hi", AMDGPU::VCC_HI)
2336     .Case("exec_lo", AMDGPU::EXEC_LO)
2337     .Case("exec_hi", AMDGPU::EXEC_HI)
2338     .Case("tma_lo", AMDGPU::TMA_LO)
2339     .Case("tma_hi", AMDGPU::TMA_HI)
2340     .Case("tba_lo", AMDGPU::TBA_LO)
2341     .Case("tba_hi", AMDGPU::TBA_HI)
2342     .Case("pc", AMDGPU::PC_REG)
2343     .Case("null", AMDGPU::SGPR_NULL)
2344     .Default(AMDGPU::NoRegister);
2345 }
2346 
2347 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2348                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2349   auto R = parseRegister();
2350   if (!R) return true;
2351   assert(R->isReg());
2352   RegNo = R->getReg();
2353   StartLoc = R->getStartLoc();
2354   EndLoc = R->getEndLoc();
2355   return false;
2356 }
2357 
2358 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2359                                     SMLoc &EndLoc) {
2360   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2361 }
2362 
2363 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2364                                                        SMLoc &StartLoc,
2365                                                        SMLoc &EndLoc) {
2366   bool Result =
2367       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2368   bool PendingErrors = getParser().hasPendingError();
2369   getParser().clearPendingErrors();
2370   if (PendingErrors)
2371     return MatchOperand_ParseFail;
2372   if (Result)
2373     return MatchOperand_NoMatch;
2374   return MatchOperand_Success;
2375 }
2376 
2377 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2378                                             RegisterKind RegKind, unsigned Reg1,
2379                                             SMLoc Loc) {
2380   switch (RegKind) {
2381   case IS_SPECIAL:
2382     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2383       Reg = AMDGPU::EXEC;
2384       RegWidth = 2;
2385       return true;
2386     }
2387     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2388       Reg = AMDGPU::FLAT_SCR;
2389       RegWidth = 2;
2390       return true;
2391     }
2392     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2393       Reg = AMDGPU::XNACK_MASK;
2394       RegWidth = 2;
2395       return true;
2396     }
2397     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2398       Reg = AMDGPU::VCC;
2399       RegWidth = 2;
2400       return true;
2401     }
2402     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2403       Reg = AMDGPU::TBA;
2404       RegWidth = 2;
2405       return true;
2406     }
2407     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2408       Reg = AMDGPU::TMA;
2409       RegWidth = 2;
2410       return true;
2411     }
2412     Error(Loc, "register does not fit in the list");
2413     return false;
2414   case IS_VGPR:
2415   case IS_SGPR:
2416   case IS_AGPR:
2417   case IS_TTMP:
2418     if (Reg1 != Reg + RegWidth) {
2419       Error(Loc, "registers in a list must have consecutive indices");
2420       return false;
2421     }
2422     RegWidth++;
2423     return true;
2424   default:
2425     llvm_unreachable("unexpected register kind");
2426   }
2427 }
2428 
2429 struct RegInfo {
2430   StringLiteral Name;
2431   RegisterKind Kind;
2432 };
2433 
2434 static constexpr RegInfo RegularRegisters[] = {
2435   {{"v"},    IS_VGPR},
2436   {{"s"},    IS_SGPR},
2437   {{"ttmp"}, IS_TTMP},
2438   {{"acc"},  IS_AGPR},
2439   {{"a"},    IS_AGPR},
2440 };
2441 
2442 static bool isRegularReg(RegisterKind Kind) {
2443   return Kind == IS_VGPR ||
2444          Kind == IS_SGPR ||
2445          Kind == IS_TTMP ||
2446          Kind == IS_AGPR;
2447 }
2448 
2449 static const RegInfo* getRegularRegInfo(StringRef Str) {
2450   for (const RegInfo &Reg : RegularRegisters)
2451     if (Str.startswith(Reg.Name))
2452       return &Reg;
2453   return nullptr;
2454 }
2455 
2456 static bool getRegNum(StringRef Str, unsigned& Num) {
2457   return !Str.getAsInteger(10, Num);
2458 }
2459 
2460 bool
2461 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2462                             const AsmToken &NextToken) const {
2463 
2464   // A list of consecutive registers: [s0,s1,s2,s3]
2465   if (Token.is(AsmToken::LBrac))
2466     return true;
2467 
2468   if (!Token.is(AsmToken::Identifier))
2469     return false;
2470 
2471   // A single register like s0 or a range of registers like s[0:1]
2472 
2473   StringRef Str = Token.getString();
2474   const RegInfo *Reg = getRegularRegInfo(Str);
2475   if (Reg) {
2476     StringRef RegName = Reg->Name;
2477     StringRef RegSuffix = Str.substr(RegName.size());
2478     if (!RegSuffix.empty()) {
2479       unsigned Num;
2480       // A single register with an index: rXX
2481       if (getRegNum(RegSuffix, Num))
2482         return true;
2483     } else {
2484       // A range of registers: r[XX:YY].
2485       if (NextToken.is(AsmToken::LBrac))
2486         return true;
2487     }
2488   }
2489 
2490   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2491 }
2492 
2493 bool
2494 AMDGPUAsmParser::isRegister()
2495 {
2496   return isRegister(getToken(), peekToken());
2497 }
2498 
2499 unsigned
2500 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2501                                unsigned RegNum,
2502                                unsigned RegWidth,
2503                                SMLoc Loc) {
2504 
2505   assert(isRegularReg(RegKind));
2506 
2507   unsigned AlignSize = 1;
2508   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2509     // SGPR and TTMP registers must be aligned.
2510     // Max required alignment is 4 dwords.
2511     AlignSize = std::min(RegWidth, 4u);
2512   }
2513 
2514   if (RegNum % AlignSize != 0) {
2515     Error(Loc, "invalid register alignment");
2516     return AMDGPU::NoRegister;
2517   }
2518 
2519   unsigned RegIdx = RegNum / AlignSize;
2520   int RCID = getRegClass(RegKind, RegWidth);
2521   if (RCID == -1) {
2522     Error(Loc, "invalid or unsupported register size");
2523     return AMDGPU::NoRegister;
2524   }
2525 
2526   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2527   const MCRegisterClass RC = TRI->getRegClass(RCID);
2528   if (RegIdx >= RC.getNumRegs()) {
2529     Error(Loc, "register index is out of range");
2530     return AMDGPU::NoRegister;
2531   }
2532 
2533   return RC.getRegister(RegIdx);
2534 }
2535 
2536 bool
2537 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2538   int64_t RegLo, RegHi;
2539   if (!skipToken(AsmToken::LBrac, "missing register index"))
2540     return false;
2541 
2542   SMLoc FirstIdxLoc = getLoc();
2543   SMLoc SecondIdxLoc;
2544 
2545   if (!parseExpr(RegLo))
2546     return false;
2547 
2548   if (trySkipToken(AsmToken::Colon)) {
2549     SecondIdxLoc = getLoc();
2550     if (!parseExpr(RegHi))
2551       return false;
2552   } else {
2553     RegHi = RegLo;
2554   }
2555 
2556   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2557     return false;
2558 
2559   if (!isUInt<32>(RegLo)) {
2560     Error(FirstIdxLoc, "invalid register index");
2561     return false;
2562   }
2563 
2564   if (!isUInt<32>(RegHi)) {
2565     Error(SecondIdxLoc, "invalid register index");
2566     return false;
2567   }
2568 
2569   if (RegLo > RegHi) {
2570     Error(FirstIdxLoc, "first register index should not exceed second index");
2571     return false;
2572   }
2573 
2574   Num = static_cast<unsigned>(RegLo);
2575   Width = (RegHi - RegLo) + 1;
2576   return true;
2577 }
2578 
2579 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2580                                           unsigned &RegNum, unsigned &RegWidth,
2581                                           SmallVectorImpl<AsmToken> &Tokens) {
2582   assert(isToken(AsmToken::Identifier));
2583   unsigned Reg = getSpecialRegForName(getTokenStr());
2584   if (Reg) {
2585     RegNum = 0;
2586     RegWidth = 1;
2587     RegKind = IS_SPECIAL;
2588     Tokens.push_back(getToken());
2589     lex(); // skip register name
2590   }
2591   return Reg;
2592 }
2593 
2594 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2595                                           unsigned &RegNum, unsigned &RegWidth,
2596                                           SmallVectorImpl<AsmToken> &Tokens) {
2597   assert(isToken(AsmToken::Identifier));
2598   StringRef RegName = getTokenStr();
2599   auto Loc = getLoc();
2600 
2601   const RegInfo *RI = getRegularRegInfo(RegName);
2602   if (!RI) {
2603     Error(Loc, "invalid register name");
2604     return AMDGPU::NoRegister;
2605   }
2606 
2607   Tokens.push_back(getToken());
2608   lex(); // skip register name
2609 
2610   RegKind = RI->Kind;
2611   StringRef RegSuffix = RegName.substr(RI->Name.size());
2612   if (!RegSuffix.empty()) {
2613     // Single 32-bit register: vXX.
2614     if (!getRegNum(RegSuffix, RegNum)) {
2615       Error(Loc, "invalid register index");
2616       return AMDGPU::NoRegister;
2617     }
2618     RegWidth = 1;
2619   } else {
2620     // Range of registers: v[XX:YY]. ":YY" is optional.
2621     if (!ParseRegRange(RegNum, RegWidth))
2622       return AMDGPU::NoRegister;
2623   }
2624 
2625   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2626 }
2627 
2628 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2629                                        unsigned &RegWidth,
2630                                        SmallVectorImpl<AsmToken> &Tokens) {
2631   unsigned Reg = AMDGPU::NoRegister;
2632   auto ListLoc = getLoc();
2633 
2634   if (!skipToken(AsmToken::LBrac,
2635                  "expected a register or a list of registers")) {
2636     return AMDGPU::NoRegister;
2637   }
2638 
2639   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2640 
2641   auto Loc = getLoc();
2642   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2643     return AMDGPU::NoRegister;
2644   if (RegWidth != 1) {
2645     Error(Loc, "expected a single 32-bit register");
2646     return AMDGPU::NoRegister;
2647   }
2648 
2649   for (; trySkipToken(AsmToken::Comma); ) {
2650     RegisterKind NextRegKind;
2651     unsigned NextReg, NextRegNum, NextRegWidth;
2652     Loc = getLoc();
2653 
2654     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2655                              NextRegNum, NextRegWidth,
2656                              Tokens)) {
2657       return AMDGPU::NoRegister;
2658     }
2659     if (NextRegWidth != 1) {
2660       Error(Loc, "expected a single 32-bit register");
2661       return AMDGPU::NoRegister;
2662     }
2663     if (NextRegKind != RegKind) {
2664       Error(Loc, "registers in a list must be of the same kind");
2665       return AMDGPU::NoRegister;
2666     }
2667     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2668       return AMDGPU::NoRegister;
2669   }
2670 
2671   if (!skipToken(AsmToken::RBrac,
2672                  "expected a comma or a closing square bracket")) {
2673     return AMDGPU::NoRegister;
2674   }
2675 
2676   if (isRegularReg(RegKind))
2677     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2678 
2679   return Reg;
2680 }
2681 
2682 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2683                                           unsigned &RegNum, unsigned &RegWidth,
2684                                           SmallVectorImpl<AsmToken> &Tokens) {
2685   auto Loc = getLoc();
2686   Reg = AMDGPU::NoRegister;
2687 
2688   if (isToken(AsmToken::Identifier)) {
2689     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2690     if (Reg == AMDGPU::NoRegister)
2691       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2692   } else {
2693     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2694   }
2695 
2696   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2697   if (Reg == AMDGPU::NoRegister) {
2698     assert(Parser.hasPendingError());
2699     return false;
2700   }
2701 
2702   if (!subtargetHasRegister(*TRI, Reg)) {
2703     if (Reg == AMDGPU::SGPR_NULL) {
2704       Error(Loc, "'null' operand is not supported on this GPU");
2705     } else {
2706       Error(Loc, "register not available on this GPU");
2707     }
2708     return false;
2709   }
2710 
2711   return true;
2712 }
2713 
2714 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2715                                           unsigned &RegNum, unsigned &RegWidth,
2716                                           bool RestoreOnFailure /*=false*/) {
2717   Reg = AMDGPU::NoRegister;
2718 
2719   SmallVector<AsmToken, 1> Tokens;
2720   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2721     if (RestoreOnFailure) {
2722       while (!Tokens.empty()) {
2723         getLexer().UnLex(Tokens.pop_back_val());
2724       }
2725     }
2726     return true;
2727   }
2728   return false;
2729 }
2730 
2731 Optional<StringRef>
2732 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2733   switch (RegKind) {
2734   case IS_VGPR:
2735     return StringRef(".amdgcn.next_free_vgpr");
2736   case IS_SGPR:
2737     return StringRef(".amdgcn.next_free_sgpr");
2738   default:
2739     return None;
2740   }
2741 }
2742 
2743 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2744   auto SymbolName = getGprCountSymbolName(RegKind);
2745   assert(SymbolName && "initializing invalid register kind");
2746   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2747   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2748 }
2749 
2750 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2751                                             unsigned DwordRegIndex,
2752                                             unsigned RegWidth) {
2753   // Symbols are only defined for GCN targets
2754   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2755     return true;
2756 
2757   auto SymbolName = getGprCountSymbolName(RegKind);
2758   if (!SymbolName)
2759     return true;
2760   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2761 
2762   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2763   int64_t OldCount;
2764 
2765   if (!Sym->isVariable())
2766     return !Error(getLoc(),
2767                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2768   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2769     return !Error(
2770         getLoc(),
2771         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2772 
2773   if (OldCount <= NewMax)
2774     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2775 
2776   return true;
2777 }
2778 
2779 std::unique_ptr<AMDGPUOperand>
2780 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2781   const auto &Tok = getToken();
2782   SMLoc StartLoc = Tok.getLoc();
2783   SMLoc EndLoc = Tok.getEndLoc();
2784   RegisterKind RegKind;
2785   unsigned Reg, RegNum, RegWidth;
2786 
2787   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2788     return nullptr;
2789   }
2790   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2791     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2792       return nullptr;
2793   } else
2794     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2795   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2796 }
2797 
2798 OperandMatchResultTy
2799 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2800   // TODO: add syntactic sugar for 1/(2*PI)
2801 
2802   assert(!isRegister());
2803   assert(!isModifier());
2804 
2805   const auto& Tok = getToken();
2806   const auto& NextTok = peekToken();
2807   bool IsReal = Tok.is(AsmToken::Real);
2808   SMLoc S = getLoc();
2809   bool Negate = false;
2810 
2811   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2812     lex();
2813     IsReal = true;
2814     Negate = true;
2815   }
2816 
2817   if (IsReal) {
2818     // Floating-point expressions are not supported.
2819     // Can only allow floating-point literals with an
2820     // optional sign.
2821 
2822     StringRef Num = getTokenStr();
2823     lex();
2824 
2825     APFloat RealVal(APFloat::IEEEdouble());
2826     auto roundMode = APFloat::rmNearestTiesToEven;
2827     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2828       return MatchOperand_ParseFail;
2829     }
2830     if (Negate)
2831       RealVal.changeSign();
2832 
2833     Operands.push_back(
2834       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2835                                AMDGPUOperand::ImmTyNone, true));
2836 
2837     return MatchOperand_Success;
2838 
2839   } else {
2840     int64_t IntVal;
2841     const MCExpr *Expr;
2842     SMLoc S = getLoc();
2843 
2844     if (HasSP3AbsModifier) {
2845       // This is a workaround for handling expressions
2846       // as arguments of SP3 'abs' modifier, for example:
2847       //     |1.0|
2848       //     |-1|
2849       //     |1+x|
2850       // This syntax is not compatible with syntax of standard
2851       // MC expressions (due to the trailing '|').
2852       SMLoc EndLoc;
2853       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2854         return MatchOperand_ParseFail;
2855     } else {
2856       if (Parser.parseExpression(Expr))
2857         return MatchOperand_ParseFail;
2858     }
2859 
2860     if (Expr->evaluateAsAbsolute(IntVal)) {
2861       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2862     } else {
2863       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2864     }
2865 
2866     return MatchOperand_Success;
2867   }
2868 
2869   return MatchOperand_NoMatch;
2870 }
2871 
2872 OperandMatchResultTy
2873 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2874   if (!isRegister())
2875     return MatchOperand_NoMatch;
2876 
2877   if (auto R = parseRegister()) {
2878     assert(R->isReg());
2879     Operands.push_back(std::move(R));
2880     return MatchOperand_Success;
2881   }
2882   return MatchOperand_ParseFail;
2883 }
2884 
2885 OperandMatchResultTy
2886 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2887   auto res = parseReg(Operands);
2888   if (res != MatchOperand_NoMatch) {
2889     return res;
2890   } else if (isModifier()) {
2891     return MatchOperand_NoMatch;
2892   } else {
2893     return parseImm(Operands, HasSP3AbsMod);
2894   }
2895 }
2896 
2897 bool
2898 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2899   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2900     const auto &str = Token.getString();
2901     return str == "abs" || str == "neg" || str == "sext";
2902   }
2903   return false;
2904 }
2905 
2906 bool
2907 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2908   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2909 }
2910 
2911 bool
2912 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2913   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2914 }
2915 
2916 bool
2917 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2918   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2919 }
2920 
2921 // Check if this is an operand modifier or an opcode modifier
2922 // which may look like an expression but it is not. We should
2923 // avoid parsing these modifiers as expressions. Currently
2924 // recognized sequences are:
2925 //   |...|
2926 //   abs(...)
2927 //   neg(...)
2928 //   sext(...)
2929 //   -reg
2930 //   -|...|
2931 //   -abs(...)
2932 //   name:...
2933 // Note that simple opcode modifiers like 'gds' may be parsed as
2934 // expressions; this is a special case. See getExpressionAsToken.
2935 //
2936 bool
2937 AMDGPUAsmParser::isModifier() {
2938 
2939   AsmToken Tok = getToken();
2940   AsmToken NextToken[2];
2941   peekTokens(NextToken);
2942 
2943   return isOperandModifier(Tok, NextToken[0]) ||
2944          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2945          isOpcodeModifierWithVal(Tok, NextToken[0]);
2946 }
2947 
2948 // Check if the current token is an SP3 'neg' modifier.
2949 // Currently this modifier is allowed in the following context:
2950 //
2951 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2952 // 2. Before an 'abs' modifier: -abs(...)
2953 // 3. Before an SP3 'abs' modifier: -|...|
2954 //
2955 // In all other cases "-" is handled as a part
2956 // of an expression that follows the sign.
2957 //
2958 // Note: When "-" is followed by an integer literal,
2959 // this is interpreted as integer negation rather
2960 // than a floating-point NEG modifier applied to N.
2961 // Beside being contr-intuitive, such use of floating-point
2962 // NEG modifier would have resulted in different meaning
2963 // of integer literals used with VOP1/2/C and VOP3,
2964 // for example:
2965 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2966 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2967 // Negative fp literals with preceding "-" are
2968 // handled likewise for uniformity
2969 //
2970 bool
2971 AMDGPUAsmParser::parseSP3NegModifier() {
2972 
2973   AsmToken NextToken[2];
2974   peekTokens(NextToken);
2975 
2976   if (isToken(AsmToken::Minus) &&
2977       (isRegister(NextToken[0], NextToken[1]) ||
2978        NextToken[0].is(AsmToken::Pipe) ||
2979        isId(NextToken[0], "abs"))) {
2980     lex();
2981     return true;
2982   }
2983 
2984   return false;
2985 }
2986 
2987 OperandMatchResultTy
2988 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2989                                               bool AllowImm) {
2990   bool Neg, SP3Neg;
2991   bool Abs, SP3Abs;
2992   SMLoc Loc;
2993 
2994   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2995   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2996     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2997     return MatchOperand_ParseFail;
2998   }
2999 
3000   SP3Neg = parseSP3NegModifier();
3001 
3002   Loc = getLoc();
3003   Neg = trySkipId("neg");
3004   if (Neg && SP3Neg) {
3005     Error(Loc, "expected register or immediate");
3006     return MatchOperand_ParseFail;
3007   }
3008   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3009     return MatchOperand_ParseFail;
3010 
3011   Abs = trySkipId("abs");
3012   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3013     return MatchOperand_ParseFail;
3014 
3015   Loc = getLoc();
3016   SP3Abs = trySkipToken(AsmToken::Pipe);
3017   if (Abs && SP3Abs) {
3018     Error(Loc, "expected register or immediate");
3019     return MatchOperand_ParseFail;
3020   }
3021 
3022   OperandMatchResultTy Res;
3023   if (AllowImm) {
3024     Res = parseRegOrImm(Operands, SP3Abs);
3025   } else {
3026     Res = parseReg(Operands);
3027   }
3028   if (Res != MatchOperand_Success) {
3029     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3030   }
3031 
3032   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3033     return MatchOperand_ParseFail;
3034   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3035     return MatchOperand_ParseFail;
3036   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3037     return MatchOperand_ParseFail;
3038 
3039   AMDGPUOperand::Modifiers Mods;
3040   Mods.Abs = Abs || SP3Abs;
3041   Mods.Neg = Neg || SP3Neg;
3042 
3043   if (Mods.hasFPModifiers()) {
3044     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3045     if (Op.isExpr()) {
3046       Error(Op.getStartLoc(), "expected an absolute expression");
3047       return MatchOperand_ParseFail;
3048     }
3049     Op.setModifiers(Mods);
3050   }
3051   return MatchOperand_Success;
3052 }
3053 
3054 OperandMatchResultTy
3055 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3056                                                bool AllowImm) {
3057   bool Sext = trySkipId("sext");
3058   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3059     return MatchOperand_ParseFail;
3060 
3061   OperandMatchResultTy Res;
3062   if (AllowImm) {
3063     Res = parseRegOrImm(Operands);
3064   } else {
3065     Res = parseReg(Operands);
3066   }
3067   if (Res != MatchOperand_Success) {
3068     return Sext? MatchOperand_ParseFail : Res;
3069   }
3070 
3071   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3072     return MatchOperand_ParseFail;
3073 
3074   AMDGPUOperand::Modifiers Mods;
3075   Mods.Sext = Sext;
3076 
3077   if (Mods.hasIntModifiers()) {
3078     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3079     if (Op.isExpr()) {
3080       Error(Op.getStartLoc(), "expected an absolute expression");
3081       return MatchOperand_ParseFail;
3082     }
3083     Op.setModifiers(Mods);
3084   }
3085 
3086   return MatchOperand_Success;
3087 }
3088 
3089 OperandMatchResultTy
3090 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3091   return parseRegOrImmWithFPInputMods(Operands, false);
3092 }
3093 
3094 OperandMatchResultTy
3095 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3096   return parseRegOrImmWithIntInputMods(Operands, false);
3097 }
3098 
3099 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3100   auto Loc = getLoc();
3101   if (trySkipId("off")) {
3102     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3103                                                 AMDGPUOperand::ImmTyOff, false));
3104     return MatchOperand_Success;
3105   }
3106 
3107   if (!isRegister())
3108     return MatchOperand_NoMatch;
3109 
3110   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3111   if (Reg) {
3112     Operands.push_back(std::move(Reg));
3113     return MatchOperand_Success;
3114   }
3115 
3116   return MatchOperand_ParseFail;
3117 
3118 }
3119 
3120 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3121   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3122 
3123   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3124       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3125       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3126       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3127     return Match_InvalidOperand;
3128 
3129   if ((TSFlags & SIInstrFlags::VOP3) &&
3130       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3131       getForcedEncodingSize() != 64)
3132     return Match_PreferE32;
3133 
3134   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3135       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3136     // v_mac_f32/16 allow only dst_sel == DWORD;
3137     auto OpNum =
3138         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3139     const auto &Op = Inst.getOperand(OpNum);
3140     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3141       return Match_InvalidOperand;
3142     }
3143   }
3144 
3145   return Match_Success;
3146 }
3147 
3148 static ArrayRef<unsigned> getAllVariants() {
3149   static const unsigned Variants[] = {
3150     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3151     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3152   };
3153 
3154   return makeArrayRef(Variants);
3155 }
3156 
3157 // What asm variants we should check
3158 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3159   if (getForcedEncodingSize() == 32) {
3160     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3161     return makeArrayRef(Variants);
3162   }
3163 
3164   if (isForcedVOP3()) {
3165     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3166     return makeArrayRef(Variants);
3167   }
3168 
3169   if (isForcedSDWA()) {
3170     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3171                                         AMDGPUAsmVariants::SDWA9};
3172     return makeArrayRef(Variants);
3173   }
3174 
3175   if (isForcedDPP()) {
3176     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3177     return makeArrayRef(Variants);
3178   }
3179 
3180   return getAllVariants();
3181 }
3182 
3183 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3184   if (getForcedEncodingSize() == 32)
3185     return "e32";
3186 
3187   if (isForcedVOP3())
3188     return "e64";
3189 
3190   if (isForcedSDWA())
3191     return "sdwa";
3192 
3193   if (isForcedDPP())
3194     return "dpp";
3195 
3196   return "";
3197 }
3198 
3199 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3200   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3201   const unsigned Num = Desc.getNumImplicitUses();
3202   for (unsigned i = 0; i < Num; ++i) {
3203     unsigned Reg = Desc.ImplicitUses[i];
3204     switch (Reg) {
3205     case AMDGPU::FLAT_SCR:
3206     case AMDGPU::VCC:
3207     case AMDGPU::VCC_LO:
3208     case AMDGPU::VCC_HI:
3209     case AMDGPU::M0:
3210       return Reg;
3211     default:
3212       break;
3213     }
3214   }
3215   return AMDGPU::NoRegister;
3216 }
3217 
3218 // NB: This code is correct only when used to check constant
3219 // bus limitations because GFX7 support no f16 inline constants.
3220 // Note that there are no cases when a GFX7 opcode violates
3221 // constant bus limitations due to the use of an f16 constant.
3222 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3223                                        unsigned OpIdx) const {
3224   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3225 
3226   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3227     return false;
3228   }
3229 
3230   const MCOperand &MO = Inst.getOperand(OpIdx);
3231 
3232   int64_t Val = MO.getImm();
3233   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3234 
3235   switch (OpSize) { // expected operand size
3236   case 8:
3237     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3238   case 4:
3239     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3240   case 2: {
3241     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3242     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3243         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3244         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3245       return AMDGPU::isInlinableIntLiteral(Val);
3246 
3247     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3248         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3249         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3250       return AMDGPU::isInlinableIntLiteralV216(Val);
3251 
3252     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3253         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3254         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3255       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3256 
3257     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3258   }
3259   default:
3260     llvm_unreachable("invalid operand size");
3261   }
3262 }
3263 
3264 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3265   if (!isGFX10Plus())
3266     return 1;
3267 
3268   switch (Opcode) {
3269   // 64-bit shift instructions can use only one scalar value input
3270   case AMDGPU::V_LSHLREV_B64_e64:
3271   case AMDGPU::V_LSHLREV_B64_gfx10:
3272   case AMDGPU::V_LSHRREV_B64_e64:
3273   case AMDGPU::V_LSHRREV_B64_gfx10:
3274   case AMDGPU::V_ASHRREV_I64_e64:
3275   case AMDGPU::V_ASHRREV_I64_gfx10:
3276   case AMDGPU::V_LSHL_B64_e64:
3277   case AMDGPU::V_LSHR_B64_e64:
3278   case AMDGPU::V_ASHR_I64_e64:
3279     return 1;
3280   default:
3281     return 2;
3282   }
3283 }
3284 
3285 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3286   const MCOperand &MO = Inst.getOperand(OpIdx);
3287   if (MO.isImm()) {
3288     return !isInlineConstant(Inst, OpIdx);
3289   } else if (MO.isReg()) {
3290     auto Reg = MO.getReg();
3291     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3292     auto PReg = mc2PseudoReg(Reg);
3293     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3294   } else {
3295     return true;
3296   }
3297 }
3298 
3299 bool
3300 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3301                                                 const OperandVector &Operands) {
3302   const unsigned Opcode = Inst.getOpcode();
3303   const MCInstrDesc &Desc = MII.get(Opcode);
3304   unsigned LastSGPR = AMDGPU::NoRegister;
3305   unsigned ConstantBusUseCount = 0;
3306   unsigned NumLiterals = 0;
3307   unsigned LiteralSize;
3308 
3309   if (Desc.TSFlags &
3310       (SIInstrFlags::VOPC |
3311        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3312        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3313        SIInstrFlags::SDWA)) {
3314     // Check special imm operands (used by madmk, etc)
3315     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3316       ++NumLiterals;
3317       LiteralSize = 4;
3318     }
3319 
3320     SmallDenseSet<unsigned> SGPRsUsed;
3321     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3322     if (SGPRUsed != AMDGPU::NoRegister) {
3323       SGPRsUsed.insert(SGPRUsed);
3324       ++ConstantBusUseCount;
3325     }
3326 
3327     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3328     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3329     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3330 
3331     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3332 
3333     for (int OpIdx : OpIndices) {
3334       if (OpIdx == -1) break;
3335 
3336       const MCOperand &MO = Inst.getOperand(OpIdx);
3337       if (usesConstantBus(Inst, OpIdx)) {
3338         if (MO.isReg()) {
3339           LastSGPR = mc2PseudoReg(MO.getReg());
3340           // Pairs of registers with a partial intersections like these
3341           //   s0, s[0:1]
3342           //   flat_scratch_lo, flat_scratch
3343           //   flat_scratch_lo, flat_scratch_hi
3344           // are theoretically valid but they are disabled anyway.
3345           // Note that this code mimics SIInstrInfo::verifyInstruction
3346           if (!SGPRsUsed.count(LastSGPR)) {
3347             SGPRsUsed.insert(LastSGPR);
3348             ++ConstantBusUseCount;
3349           }
3350         } else { // Expression or a literal
3351 
3352           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3353             continue; // special operand like VINTERP attr_chan
3354 
3355           // An instruction may use only one literal.
3356           // This has been validated on the previous step.
3357           // See validateVOPLiteral.
3358           // This literal may be used as more than one operand.
3359           // If all these operands are of the same size,
3360           // this literal counts as one scalar value.
3361           // Otherwise it counts as 2 scalar values.
3362           // See "GFX10 Shader Programming", section 3.6.2.3.
3363 
3364           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3365           if (Size < 4) Size = 4;
3366 
3367           if (NumLiterals == 0) {
3368             NumLiterals = 1;
3369             LiteralSize = Size;
3370           } else if (LiteralSize != Size) {
3371             NumLiterals = 2;
3372           }
3373         }
3374       }
3375     }
3376   }
3377   ConstantBusUseCount += NumLiterals;
3378 
3379   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3380     return true;
3381 
3382   SMLoc LitLoc = getLitLoc(Operands);
3383   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3384   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3385   Error(Loc, "invalid operand (violates constant bus restrictions)");
3386   return false;
3387 }
3388 
3389 bool
3390 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3391                                                  const OperandVector &Operands) {
3392   const unsigned Opcode = Inst.getOpcode();
3393   const MCInstrDesc &Desc = MII.get(Opcode);
3394 
3395   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3396   if (DstIdx == -1 ||
3397       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3398     return true;
3399   }
3400 
3401   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3402 
3403   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3404   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3405   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3406 
3407   assert(DstIdx != -1);
3408   const MCOperand &Dst = Inst.getOperand(DstIdx);
3409   assert(Dst.isReg());
3410 
3411   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3412 
3413   for (int SrcIdx : SrcIndices) {
3414     if (SrcIdx == -1) break;
3415     const MCOperand &Src = Inst.getOperand(SrcIdx);
3416     if (Src.isReg()) {
3417       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3418         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3419         Error(getRegLoc(SrcReg, Operands),
3420           "destination must be different than all sources");
3421         return false;
3422       }
3423     }
3424   }
3425 
3426   return true;
3427 }
3428 
3429 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3430 
3431   const unsigned Opc = Inst.getOpcode();
3432   const MCInstrDesc &Desc = MII.get(Opc);
3433 
3434   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3435     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3436     assert(ClampIdx != -1);
3437     return Inst.getOperand(ClampIdx).getImm() == 0;
3438   }
3439 
3440   return true;
3441 }
3442 
3443 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3444 
3445   const unsigned Opc = Inst.getOpcode();
3446   const MCInstrDesc &Desc = MII.get(Opc);
3447 
3448   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3449     return true;
3450 
3451   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3452   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3453   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3454 
3455   assert(VDataIdx != -1);
3456 
3457   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3458     return true;
3459 
3460   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3461   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3462   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3463   if (DMask == 0)
3464     DMask = 1;
3465 
3466   unsigned DataSize =
3467     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3468   if (hasPackedD16()) {
3469     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3470     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3471       DataSize = (DataSize + 1) / 2;
3472   }
3473 
3474   return (VDataSize / 4) == DataSize + TFESize;
3475 }
3476 
3477 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3478   const unsigned Opc = Inst.getOpcode();
3479   const MCInstrDesc &Desc = MII.get(Opc);
3480 
3481   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3482     return true;
3483 
3484   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3485 
3486   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3487       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3488   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3489   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3490   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3491   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3492 
3493   assert(VAddr0Idx != -1);
3494   assert(SrsrcIdx != -1);
3495   assert(SrsrcIdx > VAddr0Idx);
3496 
3497   if (DimIdx == -1)
3498     return true; // intersect_ray
3499 
3500   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3501   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3502   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3503   unsigned ActualAddrSize =
3504       IsNSA ? SrsrcIdx - VAddr0Idx
3505             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3506   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3507 
3508   unsigned ExpectedAddrSize =
3509       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3510 
3511   if (!IsNSA) {
3512     if (ExpectedAddrSize > 8)
3513       ExpectedAddrSize = 16;
3514 
3515     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3516     // This provides backward compatibility for assembly created
3517     // before 160b/192b/224b types were directly supported.
3518     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3519       return true;
3520   }
3521 
3522   return ActualAddrSize == ExpectedAddrSize;
3523 }
3524 
3525 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3526 
3527   const unsigned Opc = Inst.getOpcode();
3528   const MCInstrDesc &Desc = MII.get(Opc);
3529 
3530   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3531     return true;
3532   if (!Desc.mayLoad() || !Desc.mayStore())
3533     return true; // Not atomic
3534 
3535   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3536   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3537 
3538   // This is an incomplete check because image_atomic_cmpswap
3539   // may only use 0x3 and 0xf while other atomic operations
3540   // may use 0x1 and 0x3. However these limitations are
3541   // verified when we check that dmask matches dst size.
3542   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3543 }
3544 
3545 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3546 
3547   const unsigned Opc = Inst.getOpcode();
3548   const MCInstrDesc &Desc = MII.get(Opc);
3549 
3550   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3551     return true;
3552 
3553   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3554   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3555 
3556   // GATHER4 instructions use dmask in a different fashion compared to
3557   // other MIMG instructions. The only useful DMASK values are
3558   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3559   // (red,red,red,red) etc.) The ISA document doesn't mention
3560   // this.
3561   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3562 }
3563 
3564 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3565   const unsigned Opc = Inst.getOpcode();
3566   const MCInstrDesc &Desc = MII.get(Opc);
3567 
3568   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3569     return true;
3570 
3571   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3572   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3573       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3574 
3575   if (!BaseOpcode->MSAA)
3576     return true;
3577 
3578   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3579   assert(DimIdx != -1);
3580 
3581   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3582   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3583 
3584   return DimInfo->MSAA;
3585 }
3586 
3587 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3588 {
3589   switch (Opcode) {
3590   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3591   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3592   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3593     return true;
3594   default:
3595     return false;
3596   }
3597 }
3598 
3599 // movrels* opcodes should only allow VGPRS as src0.
3600 // This is specified in .td description for vop1/vop3,
3601 // but sdwa is handled differently. See isSDWAOperand.
3602 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3603                                       const OperandVector &Operands) {
3604 
3605   const unsigned Opc = Inst.getOpcode();
3606   const MCInstrDesc &Desc = MII.get(Opc);
3607 
3608   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3609     return true;
3610 
3611   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3612   assert(Src0Idx != -1);
3613 
3614   SMLoc ErrLoc;
3615   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3616   if (Src0.isReg()) {
3617     auto Reg = mc2PseudoReg(Src0.getReg());
3618     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3619     if (!isSGPR(Reg, TRI))
3620       return true;
3621     ErrLoc = getRegLoc(Reg, Operands);
3622   } else {
3623     ErrLoc = getConstLoc(Operands);
3624   }
3625 
3626   Error(ErrLoc, "source operand must be a VGPR");
3627   return false;
3628 }
3629 
3630 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3631                                           const OperandVector &Operands) {
3632 
3633   const unsigned Opc = Inst.getOpcode();
3634 
3635   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3636     return true;
3637 
3638   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3639   assert(Src0Idx != -1);
3640 
3641   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3642   if (!Src0.isReg())
3643     return true;
3644 
3645   auto Reg = mc2PseudoReg(Src0.getReg());
3646   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3647   if (isSGPR(Reg, TRI)) {
3648     Error(getRegLoc(Reg, Operands),
3649           "source operand must be either a VGPR or an inline constant");
3650     return false;
3651   }
3652 
3653   return true;
3654 }
3655 
3656 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3657                                    const OperandVector &Operands) {
3658   const unsigned Opc = Inst.getOpcode();
3659   const MCInstrDesc &Desc = MII.get(Opc);
3660 
3661   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3662     return true;
3663 
3664   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3665   if (Src2Idx == -1)
3666     return true;
3667 
3668   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3669   if (!Src2.isReg())
3670     return true;
3671 
3672   MCRegister Src2Reg = Src2.getReg();
3673   MCRegister DstReg = Inst.getOperand(0).getReg();
3674   if (Src2Reg == DstReg)
3675     return true;
3676 
3677   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3678   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3679     return true;
3680 
3681   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3682     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3683           "source 2 operand must not partially overlap with dst");
3684     return false;
3685   }
3686 
3687   return true;
3688 }
3689 
3690 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3691   switch (Inst.getOpcode()) {
3692   default:
3693     return true;
3694   case V_DIV_SCALE_F32_gfx6_gfx7:
3695   case V_DIV_SCALE_F32_vi:
3696   case V_DIV_SCALE_F32_gfx10:
3697   case V_DIV_SCALE_F64_gfx6_gfx7:
3698   case V_DIV_SCALE_F64_vi:
3699   case V_DIV_SCALE_F64_gfx10:
3700     break;
3701   }
3702 
3703   // TODO: Check that src0 = src1 or src2.
3704 
3705   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3706                     AMDGPU::OpName::src2_modifiers,
3707                     AMDGPU::OpName::src2_modifiers}) {
3708     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3709             .getImm() &
3710         SISrcMods::ABS) {
3711       return false;
3712     }
3713   }
3714 
3715   return true;
3716 }
3717 
3718 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3719 
3720   const unsigned Opc = Inst.getOpcode();
3721   const MCInstrDesc &Desc = MII.get(Opc);
3722 
3723   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3724     return true;
3725 
3726   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3727   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3728     if (isCI() || isSI())
3729       return false;
3730   }
3731 
3732   return true;
3733 }
3734 
3735 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3736   const unsigned Opc = Inst.getOpcode();
3737   const MCInstrDesc &Desc = MII.get(Opc);
3738 
3739   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3740     return true;
3741 
3742   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3743   if (DimIdx < 0)
3744     return true;
3745 
3746   long Imm = Inst.getOperand(DimIdx).getImm();
3747   if (Imm < 0 || Imm >= 8)
3748     return false;
3749 
3750   return true;
3751 }
3752 
3753 static bool IsRevOpcode(const unsigned Opcode)
3754 {
3755   switch (Opcode) {
3756   case AMDGPU::V_SUBREV_F32_e32:
3757   case AMDGPU::V_SUBREV_F32_e64:
3758   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3759   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3760   case AMDGPU::V_SUBREV_F32_e32_vi:
3761   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3762   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3763   case AMDGPU::V_SUBREV_F32_e64_vi:
3764 
3765   case AMDGPU::V_SUBREV_CO_U32_e32:
3766   case AMDGPU::V_SUBREV_CO_U32_e64:
3767   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3768   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3769 
3770   case AMDGPU::V_SUBBREV_U32_e32:
3771   case AMDGPU::V_SUBBREV_U32_e64:
3772   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3773   case AMDGPU::V_SUBBREV_U32_e32_vi:
3774   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3775   case AMDGPU::V_SUBBREV_U32_e64_vi:
3776 
3777   case AMDGPU::V_SUBREV_U32_e32:
3778   case AMDGPU::V_SUBREV_U32_e64:
3779   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3780   case AMDGPU::V_SUBREV_U32_e32_vi:
3781   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3782   case AMDGPU::V_SUBREV_U32_e64_vi:
3783 
3784   case AMDGPU::V_SUBREV_F16_e32:
3785   case AMDGPU::V_SUBREV_F16_e64:
3786   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3787   case AMDGPU::V_SUBREV_F16_e32_vi:
3788   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3789   case AMDGPU::V_SUBREV_F16_e64_vi:
3790 
3791   case AMDGPU::V_SUBREV_U16_e32:
3792   case AMDGPU::V_SUBREV_U16_e64:
3793   case AMDGPU::V_SUBREV_U16_e32_vi:
3794   case AMDGPU::V_SUBREV_U16_e64_vi:
3795 
3796   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3797   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3798   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3799 
3800   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3801   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3802 
3803   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3804   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3805 
3806   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3807   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3808 
3809   case AMDGPU::V_LSHRREV_B32_e32:
3810   case AMDGPU::V_LSHRREV_B32_e64:
3811   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3812   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3813   case AMDGPU::V_LSHRREV_B32_e32_vi:
3814   case AMDGPU::V_LSHRREV_B32_e64_vi:
3815   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3816   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3817 
3818   case AMDGPU::V_ASHRREV_I32_e32:
3819   case AMDGPU::V_ASHRREV_I32_e64:
3820   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3821   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3822   case AMDGPU::V_ASHRREV_I32_e32_vi:
3823   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3824   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3825   case AMDGPU::V_ASHRREV_I32_e64_vi:
3826 
3827   case AMDGPU::V_LSHLREV_B32_e32:
3828   case AMDGPU::V_LSHLREV_B32_e64:
3829   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3830   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3831   case AMDGPU::V_LSHLREV_B32_e32_vi:
3832   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3833   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3834   case AMDGPU::V_LSHLREV_B32_e64_vi:
3835 
3836   case AMDGPU::V_LSHLREV_B16_e32:
3837   case AMDGPU::V_LSHLREV_B16_e64:
3838   case AMDGPU::V_LSHLREV_B16_e32_vi:
3839   case AMDGPU::V_LSHLREV_B16_e64_vi:
3840   case AMDGPU::V_LSHLREV_B16_gfx10:
3841 
3842   case AMDGPU::V_LSHRREV_B16_e32:
3843   case AMDGPU::V_LSHRREV_B16_e64:
3844   case AMDGPU::V_LSHRREV_B16_e32_vi:
3845   case AMDGPU::V_LSHRREV_B16_e64_vi:
3846   case AMDGPU::V_LSHRREV_B16_gfx10:
3847 
3848   case AMDGPU::V_ASHRREV_I16_e32:
3849   case AMDGPU::V_ASHRREV_I16_e64:
3850   case AMDGPU::V_ASHRREV_I16_e32_vi:
3851   case AMDGPU::V_ASHRREV_I16_e64_vi:
3852   case AMDGPU::V_ASHRREV_I16_gfx10:
3853 
3854   case AMDGPU::V_LSHLREV_B64_e64:
3855   case AMDGPU::V_LSHLREV_B64_gfx10:
3856   case AMDGPU::V_LSHLREV_B64_vi:
3857 
3858   case AMDGPU::V_LSHRREV_B64_e64:
3859   case AMDGPU::V_LSHRREV_B64_gfx10:
3860   case AMDGPU::V_LSHRREV_B64_vi:
3861 
3862   case AMDGPU::V_ASHRREV_I64_e64:
3863   case AMDGPU::V_ASHRREV_I64_gfx10:
3864   case AMDGPU::V_ASHRREV_I64_vi:
3865 
3866   case AMDGPU::V_PK_LSHLREV_B16:
3867   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3868   case AMDGPU::V_PK_LSHLREV_B16_vi:
3869 
3870   case AMDGPU::V_PK_LSHRREV_B16:
3871   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3872   case AMDGPU::V_PK_LSHRREV_B16_vi:
3873   case AMDGPU::V_PK_ASHRREV_I16:
3874   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3875   case AMDGPU::V_PK_ASHRREV_I16_vi:
3876     return true;
3877   default:
3878     return false;
3879   }
3880 }
3881 
3882 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3883 
3884   using namespace SIInstrFlags;
3885   const unsigned Opcode = Inst.getOpcode();
3886   const MCInstrDesc &Desc = MII.get(Opcode);
3887 
3888   // lds_direct register is defined so that it can be used
3889   // with 9-bit operands only. Ignore encodings which do not accept these.
3890   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3891   if ((Desc.TSFlags & Enc) == 0)
3892     return None;
3893 
3894   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3895     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3896     if (SrcIdx == -1)
3897       break;
3898     const auto &Src = Inst.getOperand(SrcIdx);
3899     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3900 
3901       if (isGFX90A())
3902         return StringRef("lds_direct is not supported on this GPU");
3903 
3904       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3905         return StringRef("lds_direct cannot be used with this instruction");
3906 
3907       if (SrcName != OpName::src0)
3908         return StringRef("lds_direct may be used as src0 only");
3909     }
3910   }
3911 
3912   return None;
3913 }
3914 
3915 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3916   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3917     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3918     if (Op.isFlatOffset())
3919       return Op.getStartLoc();
3920   }
3921   return getLoc();
3922 }
3923 
3924 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3925                                          const OperandVector &Operands) {
3926   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3927   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3928     return true;
3929 
3930   auto Opcode = Inst.getOpcode();
3931   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3932   assert(OpNum != -1);
3933 
3934   const auto &Op = Inst.getOperand(OpNum);
3935   if (!hasFlatOffsets() && Op.getImm() != 0) {
3936     Error(getFlatOffsetLoc(Operands),
3937           "flat offset modifier is not supported on this GPU");
3938     return false;
3939   }
3940 
3941   // For FLAT segment the offset must be positive;
3942   // MSB is ignored and forced to zero.
3943   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3944     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3945     if (!isIntN(OffsetSize, Op.getImm())) {
3946       Error(getFlatOffsetLoc(Operands),
3947             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3948       return false;
3949     }
3950   } else {
3951     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3952     if (!isUIntN(OffsetSize, Op.getImm())) {
3953       Error(getFlatOffsetLoc(Operands),
3954             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3955       return false;
3956     }
3957   }
3958 
3959   return true;
3960 }
3961 
3962 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3963   // Start with second operand because SMEM Offset cannot be dst or src0.
3964   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3965     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3966     if (Op.isSMEMOffset())
3967       return Op.getStartLoc();
3968   }
3969   return getLoc();
3970 }
3971 
3972 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3973                                          const OperandVector &Operands) {
3974   if (isCI() || isSI())
3975     return true;
3976 
3977   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3978   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3979     return true;
3980 
3981   auto Opcode = Inst.getOpcode();
3982   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3983   if (OpNum == -1)
3984     return true;
3985 
3986   const auto &Op = Inst.getOperand(OpNum);
3987   if (!Op.isImm())
3988     return true;
3989 
3990   uint64_t Offset = Op.getImm();
3991   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3992   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3993       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3994     return true;
3995 
3996   Error(getSMEMOffsetLoc(Operands),
3997         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3998                                "expected a 21-bit signed offset");
3999 
4000   return false;
4001 }
4002 
4003 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4004   unsigned Opcode = Inst.getOpcode();
4005   const MCInstrDesc &Desc = MII.get(Opcode);
4006   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4007     return true;
4008 
4009   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4010   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4011 
4012   const int OpIndices[] = { Src0Idx, Src1Idx };
4013 
4014   unsigned NumExprs = 0;
4015   unsigned NumLiterals = 0;
4016   uint32_t LiteralValue;
4017 
4018   for (int OpIdx : OpIndices) {
4019     if (OpIdx == -1) break;
4020 
4021     const MCOperand &MO = Inst.getOperand(OpIdx);
4022     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4023     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4024       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4025         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4026         if (NumLiterals == 0 || LiteralValue != Value) {
4027           LiteralValue = Value;
4028           ++NumLiterals;
4029         }
4030       } else if (MO.isExpr()) {
4031         ++NumExprs;
4032       }
4033     }
4034   }
4035 
4036   return NumLiterals + NumExprs <= 1;
4037 }
4038 
4039 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4040   const unsigned Opc = Inst.getOpcode();
4041   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4042       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4043     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4044     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4045 
4046     if (OpSel & ~3)
4047       return false;
4048   }
4049   return true;
4050 }
4051 
4052 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4053                                   const OperandVector &Operands) {
4054   const unsigned Opc = Inst.getOpcode();
4055   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4056   if (DppCtrlIdx < 0)
4057     return true;
4058   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4059 
4060   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4061     // DPP64 is supported for row_newbcast only.
4062     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4063     if (Src0Idx >= 0 &&
4064         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4065       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4066       Error(S, "64 bit dpp only supports row_newbcast");
4067       return false;
4068     }
4069   }
4070 
4071   return true;
4072 }
4073 
4074 // Check if VCC register matches wavefront size
4075 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4076   auto FB = getFeatureBits();
4077   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4078     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4079 }
4080 
4081 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4082 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4083                                          const OperandVector &Operands) {
4084   unsigned Opcode = Inst.getOpcode();
4085   const MCInstrDesc &Desc = MII.get(Opcode);
4086   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4087   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4088       ImmIdx == -1)
4089     return true;
4090 
4091   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4092   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4093   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4094 
4095   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4096 
4097   unsigned NumExprs = 0;
4098   unsigned NumLiterals = 0;
4099   uint32_t LiteralValue;
4100 
4101   for (int OpIdx : OpIndices) {
4102     if (OpIdx == -1)
4103       continue;
4104 
4105     const MCOperand &MO = Inst.getOperand(OpIdx);
4106     if (!MO.isImm() && !MO.isExpr())
4107       continue;
4108     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4109       continue;
4110 
4111     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4112         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4113       Error(getConstLoc(Operands),
4114             "inline constants are not allowed for this operand");
4115       return false;
4116     }
4117 
4118     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4119       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4120       if (NumLiterals == 0 || LiteralValue != Value) {
4121         LiteralValue = Value;
4122         ++NumLiterals;
4123       }
4124     } else if (MO.isExpr()) {
4125       ++NumExprs;
4126     }
4127   }
4128   NumLiterals += NumExprs;
4129 
4130   if (!NumLiterals)
4131     return true;
4132 
4133   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4134     Error(getLitLoc(Operands), "literal operands are not supported");
4135     return false;
4136   }
4137 
4138   if (NumLiterals > 1) {
4139     Error(getLitLoc(Operands), "only one literal operand is allowed");
4140     return false;
4141   }
4142 
4143   return true;
4144 }
4145 
4146 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4147 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4148                          const MCRegisterInfo *MRI) {
4149   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4150   if (OpIdx < 0)
4151     return -1;
4152 
4153   const MCOperand &Op = Inst.getOperand(OpIdx);
4154   if (!Op.isReg())
4155     return -1;
4156 
4157   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4158   auto Reg = Sub ? Sub : Op.getReg();
4159   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4160   return AGPR32.contains(Reg) ? 1 : 0;
4161 }
4162 
4163 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4164   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4165   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4166                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4167                   SIInstrFlags::DS)) == 0)
4168     return true;
4169 
4170   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4171                                                       : AMDGPU::OpName::vdata;
4172 
4173   const MCRegisterInfo *MRI = getMRI();
4174   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4175   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4176 
4177   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4178     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4179     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4180       return false;
4181   }
4182 
4183   auto FB = getFeatureBits();
4184   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4185     if (DataAreg < 0 || DstAreg < 0)
4186       return true;
4187     return DstAreg == DataAreg;
4188   }
4189 
4190   return DstAreg < 1 && DataAreg < 1;
4191 }
4192 
4193 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4194   auto FB = getFeatureBits();
4195   if (!FB[AMDGPU::FeatureGFX90AInsts])
4196     return true;
4197 
4198   const MCRegisterInfo *MRI = getMRI();
4199   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4200   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4201   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4202     const MCOperand &Op = Inst.getOperand(I);
4203     if (!Op.isReg())
4204       continue;
4205 
4206     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4207     if (!Sub)
4208       continue;
4209 
4210     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4211       return false;
4212     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4213       return false;
4214   }
4215 
4216   return true;
4217 }
4218 
4219 // gfx90a has an undocumented limitation:
4220 // DS_GWS opcodes must use even aligned registers.
4221 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4222                                   const OperandVector &Operands) {
4223   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4224     return true;
4225 
4226   int Opc = Inst.getOpcode();
4227   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4228       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4229     return true;
4230 
4231   const MCRegisterInfo *MRI = getMRI();
4232   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4233   int Data0Pos =
4234       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4235   assert(Data0Pos != -1);
4236   auto Reg = Inst.getOperand(Data0Pos).getReg();
4237   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4238   if (RegIdx & 1) {
4239     SMLoc RegLoc = getRegLoc(Reg, Operands);
4240     Error(RegLoc, "vgpr must be even aligned");
4241     return false;
4242   }
4243 
4244   return true;
4245 }
4246 
4247 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4248                                             const OperandVector &Operands,
4249                                             const SMLoc &IDLoc) {
4250   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4251                                            AMDGPU::OpName::cpol);
4252   if (CPolPos == -1)
4253     return true;
4254 
4255   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4256 
4257   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4258   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4259       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4260     Error(IDLoc, "invalid cache policy for SMRD instruction");
4261     return false;
4262   }
4263 
4264   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4265     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4266     StringRef CStr(S.getPointer());
4267     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4268     Error(S, "scc is not supported on this GPU");
4269     return false;
4270   }
4271 
4272   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4273     return true;
4274 
4275   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4276     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4277       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4278                               : "instruction must use glc");
4279       return false;
4280     }
4281   } else {
4282     if (CPol & CPol::GLC) {
4283       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4284       StringRef CStr(S.getPointer());
4285       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4286       Error(S, isGFX940() ? "instruction must not use sc0"
4287                           : "instruction must not use glc");
4288       return false;
4289     }
4290   }
4291 
4292   return true;
4293 }
4294 
4295 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4296                                           const SMLoc &IDLoc,
4297                                           const OperandVector &Operands) {
4298   if (auto ErrMsg = validateLdsDirect(Inst)) {
4299     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4300     return false;
4301   }
4302   if (!validateSOPLiteral(Inst)) {
4303     Error(getLitLoc(Operands),
4304       "only one literal operand is allowed");
4305     return false;
4306   }
4307   if (!validateVOPLiteral(Inst, Operands)) {
4308     return false;
4309   }
4310   if (!validateConstantBusLimitations(Inst, Operands)) {
4311     return false;
4312   }
4313   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4314     return false;
4315   }
4316   if (!validateIntClampSupported(Inst)) {
4317     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4318       "integer clamping is not supported on this GPU");
4319     return false;
4320   }
4321   if (!validateOpSel(Inst)) {
4322     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4323       "invalid op_sel operand");
4324     return false;
4325   }
4326   if (!validateDPP(Inst, Operands)) {
4327     return false;
4328   }
4329   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4330   if (!validateMIMGD16(Inst)) {
4331     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4332       "d16 modifier is not supported on this GPU");
4333     return false;
4334   }
4335   if (!validateMIMGDim(Inst)) {
4336     Error(IDLoc, "dim modifier is required on this GPU");
4337     return false;
4338   }
4339   if (!validateMIMGMSAA(Inst)) {
4340     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4341           "invalid dim; must be MSAA type");
4342     return false;
4343   }
4344   if (!validateMIMGDataSize(Inst)) {
4345     Error(IDLoc,
4346       "image data size does not match dmask and tfe");
4347     return false;
4348   }
4349   if (!validateMIMGAddrSize(Inst)) {
4350     Error(IDLoc,
4351       "image address size does not match dim and a16");
4352     return false;
4353   }
4354   if (!validateMIMGAtomicDMask(Inst)) {
4355     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4356       "invalid atomic image dmask");
4357     return false;
4358   }
4359   if (!validateMIMGGatherDMask(Inst)) {
4360     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4361       "invalid image_gather dmask: only one bit must be set");
4362     return false;
4363   }
4364   if (!validateMovrels(Inst, Operands)) {
4365     return false;
4366   }
4367   if (!validateFlatOffset(Inst, Operands)) {
4368     return false;
4369   }
4370   if (!validateSMEMOffset(Inst, Operands)) {
4371     return false;
4372   }
4373   if (!validateMAIAccWrite(Inst, Operands)) {
4374     return false;
4375   }
4376   if (!validateMFMA(Inst, Operands)) {
4377     return false;
4378   }
4379   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4380     return false;
4381   }
4382 
4383   if (!validateAGPRLdSt(Inst)) {
4384     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4385     ? "invalid register class: data and dst should be all VGPR or AGPR"
4386     : "invalid register class: agpr loads and stores not supported on this GPU"
4387     );
4388     return false;
4389   }
4390   if (!validateVGPRAlign(Inst)) {
4391     Error(IDLoc,
4392       "invalid register class: vgpr tuples must be 64 bit aligned");
4393     return false;
4394   }
4395   if (!validateGWS(Inst, Operands)) {
4396     return false;
4397   }
4398 
4399   if (!validateDivScale(Inst)) {
4400     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4401     return false;
4402   }
4403   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4404     return false;
4405   }
4406 
4407   return true;
4408 }
4409 
4410 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4411                                             const FeatureBitset &FBS,
4412                                             unsigned VariantID = 0);
4413 
4414 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4415                                 const FeatureBitset &AvailableFeatures,
4416                                 unsigned VariantID);
4417 
4418 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4419                                        const FeatureBitset &FBS) {
4420   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4421 }
4422 
4423 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4424                                        const FeatureBitset &FBS,
4425                                        ArrayRef<unsigned> Variants) {
4426   for (auto Variant : Variants) {
4427     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4428       return true;
4429   }
4430 
4431   return false;
4432 }
4433 
4434 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4435                                                   const SMLoc &IDLoc) {
4436   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4437 
4438   // Check if requested instruction variant is supported.
4439   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4440     return false;
4441 
4442   // This instruction is not supported.
4443   // Clear any other pending errors because they are no longer relevant.
4444   getParser().clearPendingErrors();
4445 
4446   // Requested instruction variant is not supported.
4447   // Check if any other variants are supported.
4448   StringRef VariantName = getMatchedVariantName();
4449   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4450     return Error(IDLoc,
4451                  Twine(VariantName,
4452                        " variant of this instruction is not supported"));
4453   }
4454 
4455   // Finally check if this instruction is supported on any other GPU.
4456   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4457     return Error(IDLoc, "instruction not supported on this GPU");
4458   }
4459 
4460   // Instruction not supported on any GPU. Probably a typo.
4461   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4462   return Error(IDLoc, "invalid instruction" + Suggestion);
4463 }
4464 
4465 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4466                                               OperandVector &Operands,
4467                                               MCStreamer &Out,
4468                                               uint64_t &ErrorInfo,
4469                                               bool MatchingInlineAsm) {
4470   MCInst Inst;
4471   unsigned Result = Match_Success;
4472   for (auto Variant : getMatchedVariants()) {
4473     uint64_t EI;
4474     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4475                                   Variant);
4476     // We order match statuses from least to most specific. We use most specific
4477     // status as resulting
4478     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4479     if ((R == Match_Success) ||
4480         (R == Match_PreferE32) ||
4481         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4482         (R == Match_InvalidOperand && Result != Match_MissingFeature
4483                                    && Result != Match_PreferE32) ||
4484         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4485                                    && Result != Match_MissingFeature
4486                                    && Result != Match_PreferE32)) {
4487       Result = R;
4488       ErrorInfo = EI;
4489     }
4490     if (R == Match_Success)
4491       break;
4492   }
4493 
4494   if (Result == Match_Success) {
4495     if (!validateInstruction(Inst, IDLoc, Operands)) {
4496       return true;
4497     }
4498     Inst.setLoc(IDLoc);
4499     Out.emitInstruction(Inst, getSTI());
4500     return false;
4501   }
4502 
4503   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4504   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4505     return true;
4506   }
4507 
4508   switch (Result) {
4509   default: break;
4510   case Match_MissingFeature:
4511     // It has been verified that the specified instruction
4512     // mnemonic is valid. A match was found but it requires
4513     // features which are not supported on this GPU.
4514     return Error(IDLoc, "operands are not valid for this GPU or mode");
4515 
4516   case Match_InvalidOperand: {
4517     SMLoc ErrorLoc = IDLoc;
4518     if (ErrorInfo != ~0ULL) {
4519       if (ErrorInfo >= Operands.size()) {
4520         return Error(IDLoc, "too few operands for instruction");
4521       }
4522       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4523       if (ErrorLoc == SMLoc())
4524         ErrorLoc = IDLoc;
4525     }
4526     return Error(ErrorLoc, "invalid operand for instruction");
4527   }
4528 
4529   case Match_PreferE32:
4530     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4531                         "should be encoded as e32");
4532   case Match_MnemonicFail:
4533     llvm_unreachable("Invalid instructions should have been handled already");
4534   }
4535   llvm_unreachable("Implement any new match types added!");
4536 }
4537 
4538 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4539   int64_t Tmp = -1;
4540   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4541     return true;
4542   }
4543   if (getParser().parseAbsoluteExpression(Tmp)) {
4544     return true;
4545   }
4546   Ret = static_cast<uint32_t>(Tmp);
4547   return false;
4548 }
4549 
4550 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4551                                                uint32_t &Minor) {
4552   if (ParseAsAbsoluteExpression(Major))
4553     return TokError("invalid major version");
4554 
4555   if (!trySkipToken(AsmToken::Comma))
4556     return TokError("minor version number required, comma expected");
4557 
4558   if (ParseAsAbsoluteExpression(Minor))
4559     return TokError("invalid minor version");
4560 
4561   return false;
4562 }
4563 
4564 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4565   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4566     return TokError("directive only supported for amdgcn architecture");
4567 
4568   std::string TargetIDDirective;
4569   SMLoc TargetStart = getTok().getLoc();
4570   if (getParser().parseEscapedString(TargetIDDirective))
4571     return true;
4572 
4573   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4574   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4575     return getParser().Error(TargetRange.Start,
4576         (Twine(".amdgcn_target directive's target id ") +
4577          Twine(TargetIDDirective) +
4578          Twine(" does not match the specified target id ") +
4579          Twine(getTargetStreamer().getTargetID()->toString())).str());
4580 
4581   return false;
4582 }
4583 
4584 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4585   return Error(Range.Start, "value out of range", Range);
4586 }
4587 
4588 bool AMDGPUAsmParser::calculateGPRBlocks(
4589     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4590     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4591     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4592     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4593   // TODO(scott.linder): These calculations are duplicated from
4594   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4595   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4596 
4597   unsigned NumVGPRs = NextFreeVGPR;
4598   unsigned NumSGPRs = NextFreeSGPR;
4599 
4600   if (Version.Major >= 10)
4601     NumSGPRs = 0;
4602   else {
4603     unsigned MaxAddressableNumSGPRs =
4604         IsaInfo::getAddressableNumSGPRs(&getSTI());
4605 
4606     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4607         NumSGPRs > MaxAddressableNumSGPRs)
4608       return OutOfRangeError(SGPRRange);
4609 
4610     NumSGPRs +=
4611         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4612 
4613     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4614         NumSGPRs > MaxAddressableNumSGPRs)
4615       return OutOfRangeError(SGPRRange);
4616 
4617     if (Features.test(FeatureSGPRInitBug))
4618       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4619   }
4620 
4621   VGPRBlocks =
4622       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4623   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4624 
4625   return false;
4626 }
4627 
4628 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4629   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4630     return TokError("directive only supported for amdgcn architecture");
4631 
4632   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4633     return TokError("directive only supported for amdhsa OS");
4634 
4635   StringRef KernelName;
4636   if (getParser().parseIdentifier(KernelName))
4637     return true;
4638 
4639   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4640 
4641   StringSet<> Seen;
4642 
4643   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4644 
4645   SMRange VGPRRange;
4646   uint64_t NextFreeVGPR = 0;
4647   uint64_t AccumOffset = 0;
4648   uint64_t SharedVGPRCount = 0;
4649   SMRange SGPRRange;
4650   uint64_t NextFreeSGPR = 0;
4651 
4652   // Count the number of user SGPRs implied from the enabled feature bits.
4653   unsigned ImpliedUserSGPRCount = 0;
4654 
4655   // Track if the asm explicitly contains the directive for the user SGPR
4656   // count.
4657   Optional<unsigned> ExplicitUserSGPRCount;
4658   bool ReserveVCC = true;
4659   bool ReserveFlatScr = true;
4660   Optional<bool> EnableWavefrontSize32;
4661 
4662   while (true) {
4663     while (trySkipToken(AsmToken::EndOfStatement));
4664 
4665     StringRef ID;
4666     SMRange IDRange = getTok().getLocRange();
4667     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4668       return true;
4669 
4670     if (ID == ".end_amdhsa_kernel")
4671       break;
4672 
4673     if (Seen.find(ID) != Seen.end())
4674       return TokError(".amdhsa_ directives cannot be repeated");
4675     Seen.insert(ID);
4676 
4677     SMLoc ValStart = getLoc();
4678     int64_t IVal;
4679     if (getParser().parseAbsoluteExpression(IVal))
4680       return true;
4681     SMLoc ValEnd = getLoc();
4682     SMRange ValRange = SMRange(ValStart, ValEnd);
4683 
4684     if (IVal < 0)
4685       return OutOfRangeError(ValRange);
4686 
4687     uint64_t Val = IVal;
4688 
4689 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4690   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4691     return OutOfRangeError(RANGE);                                             \
4692   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4693 
4694     if (ID == ".amdhsa_group_segment_fixed_size") {
4695       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4696         return OutOfRangeError(ValRange);
4697       KD.group_segment_fixed_size = Val;
4698     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4699       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4700         return OutOfRangeError(ValRange);
4701       KD.private_segment_fixed_size = Val;
4702     } else if (ID == ".amdhsa_kernarg_size") {
4703       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4704         return OutOfRangeError(ValRange);
4705       KD.kernarg_size = Val;
4706     } else if (ID == ".amdhsa_user_sgpr_count") {
4707       ExplicitUserSGPRCount = Val;
4708     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4709       if (hasArchitectedFlatScratch())
4710         return Error(IDRange.Start,
4711                      "directive is not supported with architected flat scratch",
4712                      IDRange);
4713       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4714                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4715                        Val, ValRange);
4716       if (Val)
4717         ImpliedUserSGPRCount += 4;
4718     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4719       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4720                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4721                        ValRange);
4722       if (Val)
4723         ImpliedUserSGPRCount += 2;
4724     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4725       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4726                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4727                        ValRange);
4728       if (Val)
4729         ImpliedUserSGPRCount += 2;
4730     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4731       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4732                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4733                        Val, ValRange);
4734       if (Val)
4735         ImpliedUserSGPRCount += 2;
4736     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4737       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4738                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4739                        ValRange);
4740       if (Val)
4741         ImpliedUserSGPRCount += 2;
4742     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4743       if (hasArchitectedFlatScratch())
4744         return Error(IDRange.Start,
4745                      "directive is not supported with architected flat scratch",
4746                      IDRange);
4747       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4748                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4749                        ValRange);
4750       if (Val)
4751         ImpliedUserSGPRCount += 2;
4752     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4753       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4754                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4755                        Val, ValRange);
4756       if (Val)
4757         ImpliedUserSGPRCount += 1;
4758     } else if (ID == ".amdhsa_wavefront_size32") {
4759       if (IVersion.Major < 10)
4760         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4761       EnableWavefrontSize32 = Val;
4762       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4763                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4764                        Val, ValRange);
4765     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4766       if (hasArchitectedFlatScratch())
4767         return Error(IDRange.Start,
4768                      "directive is not supported with architected flat scratch",
4769                      IDRange);
4770       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4771                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4772     } else if (ID == ".amdhsa_enable_private_segment") {
4773       if (!hasArchitectedFlatScratch())
4774         return Error(
4775             IDRange.Start,
4776             "directive is not supported without architected flat scratch",
4777             IDRange);
4778       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4779                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4780     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4781       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4782                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4783                        ValRange);
4784     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4785       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4786                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4787                        ValRange);
4788     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4789       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4790                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4791                        ValRange);
4792     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4793       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4794                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4795                        ValRange);
4796     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4797       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4798                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4799                        ValRange);
4800     } else if (ID == ".amdhsa_next_free_vgpr") {
4801       VGPRRange = ValRange;
4802       NextFreeVGPR = Val;
4803     } else if (ID == ".amdhsa_next_free_sgpr") {
4804       SGPRRange = ValRange;
4805       NextFreeSGPR = Val;
4806     } else if (ID == ".amdhsa_accum_offset") {
4807       if (!isGFX90A())
4808         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4809       AccumOffset = Val;
4810     } else if (ID == ".amdhsa_reserve_vcc") {
4811       if (!isUInt<1>(Val))
4812         return OutOfRangeError(ValRange);
4813       ReserveVCC = Val;
4814     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4815       if (IVersion.Major < 7)
4816         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4817       if (hasArchitectedFlatScratch())
4818         return Error(IDRange.Start,
4819                      "directive is not supported with architected flat scratch",
4820                      IDRange);
4821       if (!isUInt<1>(Val))
4822         return OutOfRangeError(ValRange);
4823       ReserveFlatScr = Val;
4824     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4825       if (IVersion.Major < 8)
4826         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4827       if (!isUInt<1>(Val))
4828         return OutOfRangeError(ValRange);
4829       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4830         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4831                                  IDRange);
4832     } else if (ID == ".amdhsa_float_round_mode_32") {
4833       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4834                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4835     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4836       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4837                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4838     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4839       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4840                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4841     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4842       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4843                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4844                        ValRange);
4845     } else if (ID == ".amdhsa_dx10_clamp") {
4846       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4847                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4848     } else if (ID == ".amdhsa_ieee_mode") {
4849       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4850                        Val, ValRange);
4851     } else if (ID == ".amdhsa_fp16_overflow") {
4852       if (IVersion.Major < 9)
4853         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4854       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4855                        ValRange);
4856     } else if (ID == ".amdhsa_tg_split") {
4857       if (!isGFX90A())
4858         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4859       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4860                        ValRange);
4861     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4862       if (IVersion.Major < 10)
4863         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4864       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4865                        ValRange);
4866     } else if (ID == ".amdhsa_memory_ordered") {
4867       if (IVersion.Major < 10)
4868         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4869       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4870                        ValRange);
4871     } else if (ID == ".amdhsa_forward_progress") {
4872       if (IVersion.Major < 10)
4873         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4874       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4875                        ValRange);
4876     } else if (ID == ".amdhsa_shared_vgpr_count") {
4877       if (IVersion.Major < 10)
4878         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4879       SharedVGPRCount = Val;
4880       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
4881                        COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
4882                        ValRange);
4883     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4884       PARSE_BITS_ENTRY(
4885           KD.compute_pgm_rsrc2,
4886           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4887           ValRange);
4888     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4889       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4890                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4891                        Val, ValRange);
4892     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4893       PARSE_BITS_ENTRY(
4894           KD.compute_pgm_rsrc2,
4895           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4896           ValRange);
4897     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4898       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4899                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4900                        Val, ValRange);
4901     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4902       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4903                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4904                        Val, ValRange);
4905     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4906       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4907                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4908                        Val, ValRange);
4909     } else if (ID == ".amdhsa_exception_int_div_zero") {
4910       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4911                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4912                        Val, ValRange);
4913     } else {
4914       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4915     }
4916 
4917 #undef PARSE_BITS_ENTRY
4918   }
4919 
4920   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4921     return TokError(".amdhsa_next_free_vgpr directive is required");
4922 
4923   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4924     return TokError(".amdhsa_next_free_sgpr directive is required");
4925 
4926   unsigned VGPRBlocks;
4927   unsigned SGPRBlocks;
4928   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4929                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4930                          EnableWavefrontSize32, NextFreeVGPR,
4931                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4932                          SGPRBlocks))
4933     return true;
4934 
4935   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4936           VGPRBlocks))
4937     return OutOfRangeError(VGPRRange);
4938   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4939                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4940 
4941   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4942           SGPRBlocks))
4943     return OutOfRangeError(SGPRRange);
4944   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4945                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4946                   SGPRBlocks);
4947 
4948   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
4949     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
4950                     "enabled user SGPRs");
4951 
4952   unsigned UserSGPRCount =
4953       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
4954 
4955   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4956     return TokError("too many user SGPRs enabled");
4957   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4958                   UserSGPRCount);
4959 
4960   if (isGFX90A()) {
4961     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4962       return TokError(".amdhsa_accum_offset directive is required");
4963     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4964       return TokError("accum_offset should be in range [4..256] in "
4965                       "increments of 4");
4966     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4967       return TokError("accum_offset exceeds total VGPR allocation");
4968     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4969                     (AccumOffset / 4 - 1));
4970   }
4971 
4972   if (IVersion.Major == 10) {
4973     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
4974     if (SharedVGPRCount && EnableWavefrontSize32) {
4975       return TokError("shared_vgpr_count directive not valid on "
4976                       "wavefront size 32");
4977     }
4978     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
4979       return TokError("shared_vgpr_count*2 + "
4980                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
4981                       "exceed 63\n");
4982     }
4983   }
4984 
4985   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4986       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4987       ReserveFlatScr);
4988   return false;
4989 }
4990 
4991 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4992   uint32_t Major;
4993   uint32_t Minor;
4994 
4995   if (ParseDirectiveMajorMinor(Major, Minor))
4996     return true;
4997 
4998   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4999   return false;
5000 }
5001 
5002 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5003   uint32_t Major;
5004   uint32_t Minor;
5005   uint32_t Stepping;
5006   StringRef VendorName;
5007   StringRef ArchName;
5008 
5009   // If this directive has no arguments, then use the ISA version for the
5010   // targeted GPU.
5011   if (isToken(AsmToken::EndOfStatement)) {
5012     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5013     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5014                                                         ISA.Stepping,
5015                                                         "AMD", "AMDGPU");
5016     return false;
5017   }
5018 
5019   if (ParseDirectiveMajorMinor(Major, Minor))
5020     return true;
5021 
5022   if (!trySkipToken(AsmToken::Comma))
5023     return TokError("stepping version number required, comma expected");
5024 
5025   if (ParseAsAbsoluteExpression(Stepping))
5026     return TokError("invalid stepping version");
5027 
5028   if (!trySkipToken(AsmToken::Comma))
5029     return TokError("vendor name required, comma expected");
5030 
5031   if (!parseString(VendorName, "invalid vendor name"))
5032     return true;
5033 
5034   if (!trySkipToken(AsmToken::Comma))
5035     return TokError("arch name required, comma expected");
5036 
5037   if (!parseString(ArchName, "invalid arch name"))
5038     return true;
5039 
5040   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5041                                                       VendorName, ArchName);
5042   return false;
5043 }
5044 
5045 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5046                                                amd_kernel_code_t &Header) {
5047   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5048   // assembly for backwards compatibility.
5049   if (ID == "max_scratch_backing_memory_byte_size") {
5050     Parser.eatToEndOfStatement();
5051     return false;
5052   }
5053 
5054   SmallString<40> ErrStr;
5055   raw_svector_ostream Err(ErrStr);
5056   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5057     return TokError(Err.str());
5058   }
5059   Lex();
5060 
5061   if (ID == "enable_wavefront_size32") {
5062     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5063       if (!isGFX10Plus())
5064         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5065       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5066         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5067     } else {
5068       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5069         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5070     }
5071   }
5072 
5073   if (ID == "wavefront_size") {
5074     if (Header.wavefront_size == 5) {
5075       if (!isGFX10Plus())
5076         return TokError("wavefront_size=5 is only allowed on GFX10+");
5077       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5078         return TokError("wavefront_size=5 requires +WavefrontSize32");
5079     } else if (Header.wavefront_size == 6) {
5080       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5081         return TokError("wavefront_size=6 requires +WavefrontSize64");
5082     }
5083   }
5084 
5085   if (ID == "enable_wgp_mode") {
5086     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5087         !isGFX10Plus())
5088       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5089   }
5090 
5091   if (ID == "enable_mem_ordered") {
5092     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5093         !isGFX10Plus())
5094       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5095   }
5096 
5097   if (ID == "enable_fwd_progress") {
5098     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5099         !isGFX10Plus())
5100       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5101   }
5102 
5103   return false;
5104 }
5105 
5106 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5107   amd_kernel_code_t Header;
5108   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5109 
5110   while (true) {
5111     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5112     // will set the current token to EndOfStatement.
5113     while(trySkipToken(AsmToken::EndOfStatement));
5114 
5115     StringRef ID;
5116     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5117       return true;
5118 
5119     if (ID == ".end_amd_kernel_code_t")
5120       break;
5121 
5122     if (ParseAMDKernelCodeTValue(ID, Header))
5123       return true;
5124   }
5125 
5126   getTargetStreamer().EmitAMDKernelCodeT(Header);
5127 
5128   return false;
5129 }
5130 
5131 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5132   StringRef KernelName;
5133   if (!parseId(KernelName, "expected symbol name"))
5134     return true;
5135 
5136   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5137                                            ELF::STT_AMDGPU_HSA_KERNEL);
5138 
5139   KernelScope.initialize(getContext());
5140   return false;
5141 }
5142 
5143 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5144   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5145     return Error(getLoc(),
5146                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5147                  "architectures");
5148   }
5149 
5150   auto TargetIDDirective = getLexer().getTok().getStringContents();
5151   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5152     return Error(getParser().getTok().getLoc(), "target id must match options");
5153 
5154   getTargetStreamer().EmitISAVersion();
5155   Lex();
5156 
5157   return false;
5158 }
5159 
5160 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5161   const char *AssemblerDirectiveBegin;
5162   const char *AssemblerDirectiveEnd;
5163   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5164       isHsaAbiVersion3AndAbove(&getSTI())
5165           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5166                             HSAMD::V3::AssemblerDirectiveEnd)
5167           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5168                             HSAMD::AssemblerDirectiveEnd);
5169 
5170   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5171     return Error(getLoc(),
5172                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5173                  "not available on non-amdhsa OSes")).str());
5174   }
5175 
5176   std::string HSAMetadataString;
5177   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5178                           HSAMetadataString))
5179     return true;
5180 
5181   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5182     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5183       return Error(getLoc(), "invalid HSA metadata");
5184   } else {
5185     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5186       return Error(getLoc(), "invalid HSA metadata");
5187   }
5188 
5189   return false;
5190 }
5191 
5192 /// Common code to parse out a block of text (typically YAML) between start and
5193 /// end directives.
5194 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5195                                           const char *AssemblerDirectiveEnd,
5196                                           std::string &CollectString) {
5197 
5198   raw_string_ostream CollectStream(CollectString);
5199 
5200   getLexer().setSkipSpace(false);
5201 
5202   bool FoundEnd = false;
5203   while (!isToken(AsmToken::Eof)) {
5204     while (isToken(AsmToken::Space)) {
5205       CollectStream << getTokenStr();
5206       Lex();
5207     }
5208 
5209     if (trySkipId(AssemblerDirectiveEnd)) {
5210       FoundEnd = true;
5211       break;
5212     }
5213 
5214     CollectStream << Parser.parseStringToEndOfStatement()
5215                   << getContext().getAsmInfo()->getSeparatorString();
5216 
5217     Parser.eatToEndOfStatement();
5218   }
5219 
5220   getLexer().setSkipSpace(true);
5221 
5222   if (isToken(AsmToken::Eof) && !FoundEnd) {
5223     return TokError(Twine("expected directive ") +
5224                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5225   }
5226 
5227   CollectStream.flush();
5228   return false;
5229 }
5230 
5231 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5232 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5233   std::string String;
5234   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5235                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5236     return true;
5237 
5238   auto PALMetadata = getTargetStreamer().getPALMetadata();
5239   if (!PALMetadata->setFromString(String))
5240     return Error(getLoc(), "invalid PAL metadata");
5241   return false;
5242 }
5243 
5244 /// Parse the assembler directive for old linear-format PAL metadata.
5245 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5246   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5247     return Error(getLoc(),
5248                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5249                  "not available on non-amdpal OSes")).str());
5250   }
5251 
5252   auto PALMetadata = getTargetStreamer().getPALMetadata();
5253   PALMetadata->setLegacy();
5254   for (;;) {
5255     uint32_t Key, Value;
5256     if (ParseAsAbsoluteExpression(Key)) {
5257       return TokError(Twine("invalid value in ") +
5258                       Twine(PALMD::AssemblerDirective));
5259     }
5260     if (!trySkipToken(AsmToken::Comma)) {
5261       return TokError(Twine("expected an even number of values in ") +
5262                       Twine(PALMD::AssemblerDirective));
5263     }
5264     if (ParseAsAbsoluteExpression(Value)) {
5265       return TokError(Twine("invalid value in ") +
5266                       Twine(PALMD::AssemblerDirective));
5267     }
5268     PALMetadata->setRegister(Key, Value);
5269     if (!trySkipToken(AsmToken::Comma))
5270       break;
5271   }
5272   return false;
5273 }
5274 
5275 /// ParseDirectiveAMDGPULDS
5276 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5277 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5278   if (getParser().checkForValidSection())
5279     return true;
5280 
5281   StringRef Name;
5282   SMLoc NameLoc = getLoc();
5283   if (getParser().parseIdentifier(Name))
5284     return TokError("expected identifier in directive");
5285 
5286   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5287   if (parseToken(AsmToken::Comma, "expected ','"))
5288     return true;
5289 
5290   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5291 
5292   int64_t Size;
5293   SMLoc SizeLoc = getLoc();
5294   if (getParser().parseAbsoluteExpression(Size))
5295     return true;
5296   if (Size < 0)
5297     return Error(SizeLoc, "size must be non-negative");
5298   if (Size > LocalMemorySize)
5299     return Error(SizeLoc, "size is too large");
5300 
5301   int64_t Alignment = 4;
5302   if (trySkipToken(AsmToken::Comma)) {
5303     SMLoc AlignLoc = getLoc();
5304     if (getParser().parseAbsoluteExpression(Alignment))
5305       return true;
5306     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5307       return Error(AlignLoc, "alignment must be a power of two");
5308 
5309     // Alignment larger than the size of LDS is possible in theory, as long
5310     // as the linker manages to place to symbol at address 0, but we do want
5311     // to make sure the alignment fits nicely into a 32-bit integer.
5312     if (Alignment >= 1u << 31)
5313       return Error(AlignLoc, "alignment is too large");
5314   }
5315 
5316   if (parseToken(AsmToken::EndOfStatement,
5317                  "unexpected token in '.amdgpu_lds' directive"))
5318     return true;
5319 
5320   Symbol->redefineIfPossible();
5321   if (!Symbol->isUndefined())
5322     return Error(NameLoc, "invalid symbol redefinition");
5323 
5324   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5325   return false;
5326 }
5327 
5328 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5329   StringRef IDVal = DirectiveID.getString();
5330 
5331   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5332     if (IDVal == ".amdhsa_kernel")
5333      return ParseDirectiveAMDHSAKernel();
5334 
5335     // TODO: Restructure/combine with PAL metadata directive.
5336     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5337       return ParseDirectiveHSAMetadata();
5338   } else {
5339     if (IDVal == ".hsa_code_object_version")
5340       return ParseDirectiveHSACodeObjectVersion();
5341 
5342     if (IDVal == ".hsa_code_object_isa")
5343       return ParseDirectiveHSACodeObjectISA();
5344 
5345     if (IDVal == ".amd_kernel_code_t")
5346       return ParseDirectiveAMDKernelCodeT();
5347 
5348     if (IDVal == ".amdgpu_hsa_kernel")
5349       return ParseDirectiveAMDGPUHsaKernel();
5350 
5351     if (IDVal == ".amd_amdgpu_isa")
5352       return ParseDirectiveISAVersion();
5353 
5354     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5355       return ParseDirectiveHSAMetadata();
5356   }
5357 
5358   if (IDVal == ".amdgcn_target")
5359     return ParseDirectiveAMDGCNTarget();
5360 
5361   if (IDVal == ".amdgpu_lds")
5362     return ParseDirectiveAMDGPULDS();
5363 
5364   if (IDVal == PALMD::AssemblerDirectiveBegin)
5365     return ParseDirectivePALMetadataBegin();
5366 
5367   if (IDVal == PALMD::AssemblerDirective)
5368     return ParseDirectivePALMetadata();
5369 
5370   return true;
5371 }
5372 
5373 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5374                                            unsigned RegNo) {
5375 
5376   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5377     return isGFX9Plus();
5378 
5379   // GFX10 has 2 more SGPRs 104 and 105.
5380   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5381     return hasSGPR104_SGPR105();
5382 
5383   switch (RegNo) {
5384   case AMDGPU::SRC_SHARED_BASE:
5385   case AMDGPU::SRC_SHARED_LIMIT:
5386   case AMDGPU::SRC_PRIVATE_BASE:
5387   case AMDGPU::SRC_PRIVATE_LIMIT:
5388   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5389     return isGFX9Plus();
5390   case AMDGPU::TBA:
5391   case AMDGPU::TBA_LO:
5392   case AMDGPU::TBA_HI:
5393   case AMDGPU::TMA:
5394   case AMDGPU::TMA_LO:
5395   case AMDGPU::TMA_HI:
5396     return !isGFX9Plus();
5397   case AMDGPU::XNACK_MASK:
5398   case AMDGPU::XNACK_MASK_LO:
5399   case AMDGPU::XNACK_MASK_HI:
5400     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5401   case AMDGPU::SGPR_NULL:
5402     return isGFX10Plus();
5403   default:
5404     break;
5405   }
5406 
5407   if (isCI())
5408     return true;
5409 
5410   if (isSI() || isGFX10Plus()) {
5411     // No flat_scr on SI.
5412     // On GFX10 flat scratch is not a valid register operand and can only be
5413     // accessed with s_setreg/s_getreg.
5414     switch (RegNo) {
5415     case AMDGPU::FLAT_SCR:
5416     case AMDGPU::FLAT_SCR_LO:
5417     case AMDGPU::FLAT_SCR_HI:
5418       return false;
5419     default:
5420       return true;
5421     }
5422   }
5423 
5424   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5425   // SI/CI have.
5426   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5427     return hasSGPR102_SGPR103();
5428 
5429   return true;
5430 }
5431 
5432 OperandMatchResultTy
5433 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5434                               OperandMode Mode) {
5435   // Try to parse with a custom parser
5436   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5437 
5438   // If we successfully parsed the operand or if there as an error parsing,
5439   // we are done.
5440   //
5441   // If we are parsing after we reach EndOfStatement then this means we
5442   // are appending default values to the Operands list.  This is only done
5443   // by custom parser, so we shouldn't continue on to the generic parsing.
5444   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5445       isToken(AsmToken::EndOfStatement))
5446     return ResTy;
5447 
5448   SMLoc RBraceLoc;
5449   SMLoc LBraceLoc = getLoc();
5450   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5451     unsigned Prefix = Operands.size();
5452 
5453     for (;;) {
5454       auto Loc = getLoc();
5455       ResTy = parseReg(Operands);
5456       if (ResTy == MatchOperand_NoMatch)
5457         Error(Loc, "expected a register");
5458       if (ResTy != MatchOperand_Success)
5459         return MatchOperand_ParseFail;
5460 
5461       RBraceLoc = getLoc();
5462       if (trySkipToken(AsmToken::RBrac))
5463         break;
5464 
5465       if (!skipToken(AsmToken::Comma,
5466                      "expected a comma or a closing square bracket")) {
5467         return MatchOperand_ParseFail;
5468       }
5469     }
5470 
5471     if (Operands.size() - Prefix > 1) {
5472       Operands.insert(Operands.begin() + Prefix,
5473                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5474       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5475     }
5476 
5477     return MatchOperand_Success;
5478   }
5479 
5480   return parseRegOrImm(Operands);
5481 }
5482 
5483 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5484   // Clear any forced encodings from the previous instruction.
5485   setForcedEncodingSize(0);
5486   setForcedDPP(false);
5487   setForcedSDWA(false);
5488 
5489   if (Name.endswith("_e64")) {
5490     setForcedEncodingSize(64);
5491     return Name.substr(0, Name.size() - 4);
5492   } else if (Name.endswith("_e32")) {
5493     setForcedEncodingSize(32);
5494     return Name.substr(0, Name.size() - 4);
5495   } else if (Name.endswith("_dpp")) {
5496     setForcedDPP(true);
5497     return Name.substr(0, Name.size() - 4);
5498   } else if (Name.endswith("_sdwa")) {
5499     setForcedSDWA(true);
5500     return Name.substr(0, Name.size() - 5);
5501   }
5502   return Name;
5503 }
5504 
5505 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5506                                        StringRef Name,
5507                                        SMLoc NameLoc, OperandVector &Operands) {
5508   // Add the instruction mnemonic
5509   Name = parseMnemonicSuffix(Name);
5510   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5511 
5512   bool IsMIMG = Name.startswith("image_");
5513 
5514   while (!trySkipToken(AsmToken::EndOfStatement)) {
5515     OperandMode Mode = OperandMode_Default;
5516     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5517       Mode = OperandMode_NSA;
5518     CPolSeen = 0;
5519     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5520 
5521     if (Res != MatchOperand_Success) {
5522       checkUnsupportedInstruction(Name, NameLoc);
5523       if (!Parser.hasPendingError()) {
5524         // FIXME: use real operand location rather than the current location.
5525         StringRef Msg =
5526           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5527                                             "not a valid operand.";
5528         Error(getLoc(), Msg);
5529       }
5530       while (!trySkipToken(AsmToken::EndOfStatement)) {
5531         lex();
5532       }
5533       return true;
5534     }
5535 
5536     // Eat the comma or space if there is one.
5537     trySkipToken(AsmToken::Comma);
5538   }
5539 
5540   return false;
5541 }
5542 
5543 //===----------------------------------------------------------------------===//
5544 // Utility functions
5545 //===----------------------------------------------------------------------===//
5546 
5547 OperandMatchResultTy
5548 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5549 
5550   if (!trySkipId(Prefix, AsmToken::Colon))
5551     return MatchOperand_NoMatch;
5552 
5553   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5554 }
5555 
5556 OperandMatchResultTy
5557 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5558                                     AMDGPUOperand::ImmTy ImmTy,
5559                                     bool (*ConvertResult)(int64_t&)) {
5560   SMLoc S = getLoc();
5561   int64_t Value = 0;
5562 
5563   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5564   if (Res != MatchOperand_Success)
5565     return Res;
5566 
5567   if (ConvertResult && !ConvertResult(Value)) {
5568     Error(S, "invalid " + StringRef(Prefix) + " value.");
5569   }
5570 
5571   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5572   return MatchOperand_Success;
5573 }
5574 
5575 OperandMatchResultTy
5576 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5577                                              OperandVector &Operands,
5578                                              AMDGPUOperand::ImmTy ImmTy,
5579                                              bool (*ConvertResult)(int64_t&)) {
5580   SMLoc S = getLoc();
5581   if (!trySkipId(Prefix, AsmToken::Colon))
5582     return MatchOperand_NoMatch;
5583 
5584   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5585     return MatchOperand_ParseFail;
5586 
5587   unsigned Val = 0;
5588   const unsigned MaxSize = 4;
5589 
5590   // FIXME: How to verify the number of elements matches the number of src
5591   // operands?
5592   for (int I = 0; ; ++I) {
5593     int64_t Op;
5594     SMLoc Loc = getLoc();
5595     if (!parseExpr(Op))
5596       return MatchOperand_ParseFail;
5597 
5598     if (Op != 0 && Op != 1) {
5599       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5600       return MatchOperand_ParseFail;
5601     }
5602 
5603     Val |= (Op << I);
5604 
5605     if (trySkipToken(AsmToken::RBrac))
5606       break;
5607 
5608     if (I + 1 == MaxSize) {
5609       Error(getLoc(), "expected a closing square bracket");
5610       return MatchOperand_ParseFail;
5611     }
5612 
5613     if (!skipToken(AsmToken::Comma, "expected a comma"))
5614       return MatchOperand_ParseFail;
5615   }
5616 
5617   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5618   return MatchOperand_Success;
5619 }
5620 
5621 OperandMatchResultTy
5622 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5623                                AMDGPUOperand::ImmTy ImmTy) {
5624   int64_t Bit;
5625   SMLoc S = getLoc();
5626 
5627   if (trySkipId(Name)) {
5628     Bit = 1;
5629   } else if (trySkipId("no", Name)) {
5630     Bit = 0;
5631   } else {
5632     return MatchOperand_NoMatch;
5633   }
5634 
5635   if (Name == "r128" && !hasMIMG_R128()) {
5636     Error(S, "r128 modifier is not supported on this GPU");
5637     return MatchOperand_ParseFail;
5638   }
5639   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5640     Error(S, "a16 modifier is not supported on this GPU");
5641     return MatchOperand_ParseFail;
5642   }
5643 
5644   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5645     ImmTy = AMDGPUOperand::ImmTyR128A16;
5646 
5647   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5648   return MatchOperand_Success;
5649 }
5650 
5651 OperandMatchResultTy
5652 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5653   unsigned CPolOn = 0;
5654   unsigned CPolOff = 0;
5655   SMLoc S = getLoc();
5656 
5657   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5658   if (isGFX940() && !Mnemo.startswith("s_")) {
5659     if (trySkipId("sc0"))
5660       CPolOn = AMDGPU::CPol::SC0;
5661     else if (trySkipId("nosc0"))
5662       CPolOff = AMDGPU::CPol::SC0;
5663     else if (trySkipId("nt"))
5664       CPolOn = AMDGPU::CPol::NT;
5665     else if (trySkipId("nont"))
5666       CPolOff = AMDGPU::CPol::NT;
5667     else if (trySkipId("sc1"))
5668       CPolOn = AMDGPU::CPol::SC1;
5669     else if (trySkipId("nosc1"))
5670       CPolOff = AMDGPU::CPol::SC1;
5671     else
5672       return MatchOperand_NoMatch;
5673   }
5674   else if (trySkipId("glc"))
5675     CPolOn = AMDGPU::CPol::GLC;
5676   else if (trySkipId("noglc"))
5677     CPolOff = AMDGPU::CPol::GLC;
5678   else if (trySkipId("slc"))
5679     CPolOn = AMDGPU::CPol::SLC;
5680   else if (trySkipId("noslc"))
5681     CPolOff = AMDGPU::CPol::SLC;
5682   else if (trySkipId("dlc"))
5683     CPolOn = AMDGPU::CPol::DLC;
5684   else if (trySkipId("nodlc"))
5685     CPolOff = AMDGPU::CPol::DLC;
5686   else if (trySkipId("scc"))
5687     CPolOn = AMDGPU::CPol::SCC;
5688   else if (trySkipId("noscc"))
5689     CPolOff = AMDGPU::CPol::SCC;
5690   else
5691     return MatchOperand_NoMatch;
5692 
5693   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5694     Error(S, "dlc modifier is not supported on this GPU");
5695     return MatchOperand_ParseFail;
5696   }
5697 
5698   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5699     Error(S, "scc modifier is not supported on this GPU");
5700     return MatchOperand_ParseFail;
5701   }
5702 
5703   if (CPolSeen & (CPolOn | CPolOff)) {
5704     Error(S, "duplicate cache policy modifier");
5705     return MatchOperand_ParseFail;
5706   }
5707 
5708   CPolSeen |= (CPolOn | CPolOff);
5709 
5710   for (unsigned I = 1; I != Operands.size(); ++I) {
5711     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5712     if (Op.isCPol()) {
5713       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5714       return MatchOperand_Success;
5715     }
5716   }
5717 
5718   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5719                                               AMDGPUOperand::ImmTyCPol));
5720 
5721   return MatchOperand_Success;
5722 }
5723 
5724 static void addOptionalImmOperand(
5725   MCInst& Inst, const OperandVector& Operands,
5726   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5727   AMDGPUOperand::ImmTy ImmT,
5728   int64_t Default = 0) {
5729   auto i = OptionalIdx.find(ImmT);
5730   if (i != OptionalIdx.end()) {
5731     unsigned Idx = i->second;
5732     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5733   } else {
5734     Inst.addOperand(MCOperand::createImm(Default));
5735   }
5736 }
5737 
5738 OperandMatchResultTy
5739 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5740                                        StringRef &Value,
5741                                        SMLoc &StringLoc) {
5742   if (!trySkipId(Prefix, AsmToken::Colon))
5743     return MatchOperand_NoMatch;
5744 
5745   StringLoc = getLoc();
5746   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5747                                                   : MatchOperand_ParseFail;
5748 }
5749 
5750 //===----------------------------------------------------------------------===//
5751 // MTBUF format
5752 //===----------------------------------------------------------------------===//
5753 
5754 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5755                                   int64_t MaxVal,
5756                                   int64_t &Fmt) {
5757   int64_t Val;
5758   SMLoc Loc = getLoc();
5759 
5760   auto Res = parseIntWithPrefix(Pref, Val);
5761   if (Res == MatchOperand_ParseFail)
5762     return false;
5763   if (Res == MatchOperand_NoMatch)
5764     return true;
5765 
5766   if (Val < 0 || Val > MaxVal) {
5767     Error(Loc, Twine("out of range ", StringRef(Pref)));
5768     return false;
5769   }
5770 
5771   Fmt = Val;
5772   return true;
5773 }
5774 
5775 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5776 // values to live in a joint format operand in the MCInst encoding.
5777 OperandMatchResultTy
5778 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5779   using namespace llvm::AMDGPU::MTBUFFormat;
5780 
5781   int64_t Dfmt = DFMT_UNDEF;
5782   int64_t Nfmt = NFMT_UNDEF;
5783 
5784   // dfmt and nfmt can appear in either order, and each is optional.
5785   for (int I = 0; I < 2; ++I) {
5786     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5787       return MatchOperand_ParseFail;
5788 
5789     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5790       return MatchOperand_ParseFail;
5791     }
5792     // Skip optional comma between dfmt/nfmt
5793     // but guard against 2 commas following each other.
5794     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5795         !peekToken().is(AsmToken::Comma)) {
5796       trySkipToken(AsmToken::Comma);
5797     }
5798   }
5799 
5800   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5801     return MatchOperand_NoMatch;
5802 
5803   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5804   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5805 
5806   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5807   return MatchOperand_Success;
5808 }
5809 
5810 OperandMatchResultTy
5811 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5812   using namespace llvm::AMDGPU::MTBUFFormat;
5813 
5814   int64_t Fmt = UFMT_UNDEF;
5815 
5816   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5817     return MatchOperand_ParseFail;
5818 
5819   if (Fmt == UFMT_UNDEF)
5820     return MatchOperand_NoMatch;
5821 
5822   Format = Fmt;
5823   return MatchOperand_Success;
5824 }
5825 
5826 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5827                                     int64_t &Nfmt,
5828                                     StringRef FormatStr,
5829                                     SMLoc Loc) {
5830   using namespace llvm::AMDGPU::MTBUFFormat;
5831   int64_t Format;
5832 
5833   Format = getDfmt(FormatStr);
5834   if (Format != DFMT_UNDEF) {
5835     Dfmt = Format;
5836     return true;
5837   }
5838 
5839   Format = getNfmt(FormatStr, getSTI());
5840   if (Format != NFMT_UNDEF) {
5841     Nfmt = Format;
5842     return true;
5843   }
5844 
5845   Error(Loc, "unsupported format");
5846   return false;
5847 }
5848 
5849 OperandMatchResultTy
5850 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5851                                           SMLoc FormatLoc,
5852                                           int64_t &Format) {
5853   using namespace llvm::AMDGPU::MTBUFFormat;
5854 
5855   int64_t Dfmt = DFMT_UNDEF;
5856   int64_t Nfmt = NFMT_UNDEF;
5857   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5858     return MatchOperand_ParseFail;
5859 
5860   if (trySkipToken(AsmToken::Comma)) {
5861     StringRef Str;
5862     SMLoc Loc = getLoc();
5863     if (!parseId(Str, "expected a format string") ||
5864         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5865       return MatchOperand_ParseFail;
5866     }
5867     if (Dfmt == DFMT_UNDEF) {
5868       Error(Loc, "duplicate numeric format");
5869       return MatchOperand_ParseFail;
5870     } else if (Nfmt == NFMT_UNDEF) {
5871       Error(Loc, "duplicate data format");
5872       return MatchOperand_ParseFail;
5873     }
5874   }
5875 
5876   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5877   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5878 
5879   if (isGFX10Plus()) {
5880     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5881     if (Ufmt == UFMT_UNDEF) {
5882       Error(FormatLoc, "unsupported format");
5883       return MatchOperand_ParseFail;
5884     }
5885     Format = Ufmt;
5886   } else {
5887     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5888   }
5889 
5890   return MatchOperand_Success;
5891 }
5892 
5893 OperandMatchResultTy
5894 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5895                                             SMLoc Loc,
5896                                             int64_t &Format) {
5897   using namespace llvm::AMDGPU::MTBUFFormat;
5898 
5899   auto Id = getUnifiedFormat(FormatStr);
5900   if (Id == UFMT_UNDEF)
5901     return MatchOperand_NoMatch;
5902 
5903   if (!isGFX10Plus()) {
5904     Error(Loc, "unified format is not supported on this GPU");
5905     return MatchOperand_ParseFail;
5906   }
5907 
5908   Format = Id;
5909   return MatchOperand_Success;
5910 }
5911 
5912 OperandMatchResultTy
5913 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5914   using namespace llvm::AMDGPU::MTBUFFormat;
5915   SMLoc Loc = getLoc();
5916 
5917   if (!parseExpr(Format))
5918     return MatchOperand_ParseFail;
5919   if (!isValidFormatEncoding(Format, getSTI())) {
5920     Error(Loc, "out of range format");
5921     return MatchOperand_ParseFail;
5922   }
5923 
5924   return MatchOperand_Success;
5925 }
5926 
5927 OperandMatchResultTy
5928 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5929   using namespace llvm::AMDGPU::MTBUFFormat;
5930 
5931   if (!trySkipId("format", AsmToken::Colon))
5932     return MatchOperand_NoMatch;
5933 
5934   if (trySkipToken(AsmToken::LBrac)) {
5935     StringRef FormatStr;
5936     SMLoc Loc = getLoc();
5937     if (!parseId(FormatStr, "expected a format string"))
5938       return MatchOperand_ParseFail;
5939 
5940     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5941     if (Res == MatchOperand_NoMatch)
5942       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5943     if (Res != MatchOperand_Success)
5944       return Res;
5945 
5946     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5947       return MatchOperand_ParseFail;
5948 
5949     return MatchOperand_Success;
5950   }
5951 
5952   return parseNumericFormat(Format);
5953 }
5954 
5955 OperandMatchResultTy
5956 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5957   using namespace llvm::AMDGPU::MTBUFFormat;
5958 
5959   int64_t Format = getDefaultFormatEncoding(getSTI());
5960   OperandMatchResultTy Res;
5961   SMLoc Loc = getLoc();
5962 
5963   // Parse legacy format syntax.
5964   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5965   if (Res == MatchOperand_ParseFail)
5966     return Res;
5967 
5968   bool FormatFound = (Res == MatchOperand_Success);
5969 
5970   Operands.push_back(
5971     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5972 
5973   if (FormatFound)
5974     trySkipToken(AsmToken::Comma);
5975 
5976   if (isToken(AsmToken::EndOfStatement)) {
5977     // We are expecting an soffset operand,
5978     // but let matcher handle the error.
5979     return MatchOperand_Success;
5980   }
5981 
5982   // Parse soffset.
5983   Res = parseRegOrImm(Operands);
5984   if (Res != MatchOperand_Success)
5985     return Res;
5986 
5987   trySkipToken(AsmToken::Comma);
5988 
5989   if (!FormatFound) {
5990     Res = parseSymbolicOrNumericFormat(Format);
5991     if (Res == MatchOperand_ParseFail)
5992       return Res;
5993     if (Res == MatchOperand_Success) {
5994       auto Size = Operands.size();
5995       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5996       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5997       Op.setImm(Format);
5998     }
5999     return MatchOperand_Success;
6000   }
6001 
6002   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6003     Error(getLoc(), "duplicate format");
6004     return MatchOperand_ParseFail;
6005   }
6006   return MatchOperand_Success;
6007 }
6008 
6009 //===----------------------------------------------------------------------===//
6010 // ds
6011 //===----------------------------------------------------------------------===//
6012 
6013 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6014                                     const OperandVector &Operands) {
6015   OptionalImmIndexMap OptionalIdx;
6016 
6017   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6018     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6019 
6020     // Add the register arguments
6021     if (Op.isReg()) {
6022       Op.addRegOperands(Inst, 1);
6023       continue;
6024     }
6025 
6026     // Handle optional arguments
6027     OptionalIdx[Op.getImmTy()] = i;
6028   }
6029 
6030   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6031   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6032   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6033 
6034   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6035 }
6036 
6037 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6038                                 bool IsGdsHardcoded) {
6039   OptionalImmIndexMap OptionalIdx;
6040 
6041   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6042     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6043 
6044     // Add the register arguments
6045     if (Op.isReg()) {
6046       Op.addRegOperands(Inst, 1);
6047       continue;
6048     }
6049 
6050     if (Op.isToken() && Op.getToken() == "gds") {
6051       IsGdsHardcoded = true;
6052       continue;
6053     }
6054 
6055     // Handle optional arguments
6056     OptionalIdx[Op.getImmTy()] = i;
6057   }
6058 
6059   AMDGPUOperand::ImmTy OffsetType =
6060     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
6061      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
6062      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
6063                                                       AMDGPUOperand::ImmTyOffset;
6064 
6065   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6066 
6067   if (!IsGdsHardcoded) {
6068     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6069   }
6070   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6071 }
6072 
6073 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6074   OptionalImmIndexMap OptionalIdx;
6075 
6076   unsigned OperandIdx[4];
6077   unsigned EnMask = 0;
6078   int SrcIdx = 0;
6079 
6080   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6081     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6082 
6083     // Add the register arguments
6084     if (Op.isReg()) {
6085       assert(SrcIdx < 4);
6086       OperandIdx[SrcIdx] = Inst.size();
6087       Op.addRegOperands(Inst, 1);
6088       ++SrcIdx;
6089       continue;
6090     }
6091 
6092     if (Op.isOff()) {
6093       assert(SrcIdx < 4);
6094       OperandIdx[SrcIdx] = Inst.size();
6095       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6096       ++SrcIdx;
6097       continue;
6098     }
6099 
6100     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6101       Op.addImmOperands(Inst, 1);
6102       continue;
6103     }
6104 
6105     if (Op.isToken() && Op.getToken() == "done")
6106       continue;
6107 
6108     // Handle optional arguments
6109     OptionalIdx[Op.getImmTy()] = i;
6110   }
6111 
6112   assert(SrcIdx == 4);
6113 
6114   bool Compr = false;
6115   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6116     Compr = true;
6117     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6118     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6119     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6120   }
6121 
6122   for (auto i = 0; i < SrcIdx; ++i) {
6123     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6124       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6125     }
6126   }
6127 
6128   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6129   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6130 
6131   Inst.addOperand(MCOperand::createImm(EnMask));
6132 }
6133 
6134 //===----------------------------------------------------------------------===//
6135 // s_waitcnt
6136 //===----------------------------------------------------------------------===//
6137 
6138 static bool
6139 encodeCnt(
6140   const AMDGPU::IsaVersion ISA,
6141   int64_t &IntVal,
6142   int64_t CntVal,
6143   bool Saturate,
6144   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6145   unsigned (*decode)(const IsaVersion &Version, unsigned))
6146 {
6147   bool Failed = false;
6148 
6149   IntVal = encode(ISA, IntVal, CntVal);
6150   if (CntVal != decode(ISA, IntVal)) {
6151     if (Saturate) {
6152       IntVal = encode(ISA, IntVal, -1);
6153     } else {
6154       Failed = true;
6155     }
6156   }
6157   return Failed;
6158 }
6159 
6160 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6161 
6162   SMLoc CntLoc = getLoc();
6163   StringRef CntName = getTokenStr();
6164 
6165   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6166       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6167     return false;
6168 
6169   int64_t CntVal;
6170   SMLoc ValLoc = getLoc();
6171   if (!parseExpr(CntVal))
6172     return false;
6173 
6174   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6175 
6176   bool Failed = true;
6177   bool Sat = CntName.endswith("_sat");
6178 
6179   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6180     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6181   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6182     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6183   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6184     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6185   } else {
6186     Error(CntLoc, "invalid counter name " + CntName);
6187     return false;
6188   }
6189 
6190   if (Failed) {
6191     Error(ValLoc, "too large value for " + CntName);
6192     return false;
6193   }
6194 
6195   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6196     return false;
6197 
6198   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6199     if (isToken(AsmToken::EndOfStatement)) {
6200       Error(getLoc(), "expected a counter name");
6201       return false;
6202     }
6203   }
6204 
6205   return true;
6206 }
6207 
6208 OperandMatchResultTy
6209 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6210   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6211   int64_t Waitcnt = getWaitcntBitMask(ISA);
6212   SMLoc S = getLoc();
6213 
6214   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6215     while (!isToken(AsmToken::EndOfStatement)) {
6216       if (!parseCnt(Waitcnt))
6217         return MatchOperand_ParseFail;
6218     }
6219   } else {
6220     if (!parseExpr(Waitcnt))
6221       return MatchOperand_ParseFail;
6222   }
6223 
6224   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6225   return MatchOperand_Success;
6226 }
6227 
6228 bool
6229 AMDGPUOperand::isSWaitCnt() const {
6230   return isImm();
6231 }
6232 
6233 //===----------------------------------------------------------------------===//
6234 // hwreg
6235 //===----------------------------------------------------------------------===//
6236 
6237 bool
6238 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6239                                 OperandInfoTy &Offset,
6240                                 OperandInfoTy &Width) {
6241   using namespace llvm::AMDGPU::Hwreg;
6242 
6243   // The register may be specified by name or using a numeric code
6244   HwReg.Loc = getLoc();
6245   if (isToken(AsmToken::Identifier) &&
6246       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) >= 0) {
6247     HwReg.IsSymbolic = true;
6248     lex(); // skip register name
6249   } else if (!parseExpr(HwReg.Id, "a register name")) {
6250     return false;
6251   }
6252 
6253   if (trySkipToken(AsmToken::RParen))
6254     return true;
6255 
6256   // parse optional params
6257   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6258     return false;
6259 
6260   Offset.Loc = getLoc();
6261   if (!parseExpr(Offset.Id))
6262     return false;
6263 
6264   if (!skipToken(AsmToken::Comma, "expected a comma"))
6265     return false;
6266 
6267   Width.Loc = getLoc();
6268   return parseExpr(Width.Id) &&
6269          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6270 }
6271 
6272 bool
6273 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6274                                const OperandInfoTy &Offset,
6275                                const OperandInfoTy &Width) {
6276 
6277   using namespace llvm::AMDGPU::Hwreg;
6278 
6279   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6280     Error(HwReg.Loc,
6281           "specified hardware register is not supported on this GPU");
6282     return false;
6283   }
6284   if (!isValidHwreg(HwReg.Id)) {
6285     Error(HwReg.Loc,
6286           "invalid code of hardware register: only 6-bit values are legal");
6287     return false;
6288   }
6289   if (!isValidHwregOffset(Offset.Id)) {
6290     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6291     return false;
6292   }
6293   if (!isValidHwregWidth(Width.Id)) {
6294     Error(Width.Loc,
6295           "invalid bitfield width: only values from 1 to 32 are legal");
6296     return false;
6297   }
6298   return true;
6299 }
6300 
6301 OperandMatchResultTy
6302 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6303   using namespace llvm::AMDGPU::Hwreg;
6304 
6305   int64_t ImmVal = 0;
6306   SMLoc Loc = getLoc();
6307 
6308   if (trySkipId("hwreg", AsmToken::LParen)) {
6309     OperandInfoTy HwReg(ID_UNKNOWN_);
6310     OperandInfoTy Offset(OFFSET_DEFAULT_);
6311     OperandInfoTy Width(WIDTH_DEFAULT_);
6312     if (parseHwregBody(HwReg, Offset, Width) &&
6313         validateHwreg(HwReg, Offset, Width)) {
6314       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6315     } else {
6316       return MatchOperand_ParseFail;
6317     }
6318   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6319     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6320       Error(Loc, "invalid immediate: only 16-bit values are legal");
6321       return MatchOperand_ParseFail;
6322     }
6323   } else {
6324     return MatchOperand_ParseFail;
6325   }
6326 
6327   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6328   return MatchOperand_Success;
6329 }
6330 
6331 bool AMDGPUOperand::isHwreg() const {
6332   return isImmTy(ImmTyHwreg);
6333 }
6334 
6335 //===----------------------------------------------------------------------===//
6336 // sendmsg
6337 //===----------------------------------------------------------------------===//
6338 
6339 bool
6340 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6341                                   OperandInfoTy &Op,
6342                                   OperandInfoTy &Stream) {
6343   using namespace llvm::AMDGPU::SendMsg;
6344 
6345   Msg.Loc = getLoc();
6346   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6347     Msg.IsSymbolic = true;
6348     lex(); // skip message name
6349   } else if (!parseExpr(Msg.Id, "a message name")) {
6350     return false;
6351   }
6352 
6353   if (trySkipToken(AsmToken::Comma)) {
6354     Op.IsDefined = true;
6355     Op.Loc = getLoc();
6356     if (isToken(AsmToken::Identifier) &&
6357         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6358       lex(); // skip operation name
6359     } else if (!parseExpr(Op.Id, "an operation name")) {
6360       return false;
6361     }
6362 
6363     if (trySkipToken(AsmToken::Comma)) {
6364       Stream.IsDefined = true;
6365       Stream.Loc = getLoc();
6366       if (!parseExpr(Stream.Id))
6367         return false;
6368     }
6369   }
6370 
6371   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6372 }
6373 
6374 bool
6375 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6376                                  const OperandInfoTy &Op,
6377                                  const OperandInfoTy &Stream) {
6378   using namespace llvm::AMDGPU::SendMsg;
6379 
6380   // Validation strictness depends on whether message is specified
6381   // in a symbolic or in a numeric form. In the latter case
6382   // only encoding possibility is checked.
6383   bool Strict = Msg.IsSymbolic;
6384 
6385   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6386     Error(Msg.Loc, "invalid message id");
6387     return false;
6388   }
6389   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6390     if (Op.IsDefined) {
6391       Error(Op.Loc, "message does not support operations");
6392     } else {
6393       Error(Msg.Loc, "missing message operation");
6394     }
6395     return false;
6396   }
6397   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6398     Error(Op.Loc, "invalid operation id");
6399     return false;
6400   }
6401   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6402     Error(Stream.Loc, "message operation does not support streams");
6403     return false;
6404   }
6405   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6406     Error(Stream.Loc, "invalid message stream id");
6407     return false;
6408   }
6409   return true;
6410 }
6411 
6412 OperandMatchResultTy
6413 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6414   using namespace llvm::AMDGPU::SendMsg;
6415 
6416   int64_t ImmVal = 0;
6417   SMLoc Loc = getLoc();
6418 
6419   if (trySkipId("sendmsg", AsmToken::LParen)) {
6420     OperandInfoTy Msg(ID_UNKNOWN_);
6421     OperandInfoTy Op(OP_NONE_);
6422     OperandInfoTy Stream(STREAM_ID_NONE_);
6423     if (parseSendMsgBody(Msg, Op, Stream) &&
6424         validateSendMsg(Msg, Op, Stream)) {
6425       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6426     } else {
6427       return MatchOperand_ParseFail;
6428     }
6429   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6430     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6431       Error(Loc, "invalid immediate: only 16-bit values are legal");
6432       return MatchOperand_ParseFail;
6433     }
6434   } else {
6435     return MatchOperand_ParseFail;
6436   }
6437 
6438   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6439   return MatchOperand_Success;
6440 }
6441 
6442 bool AMDGPUOperand::isSendMsg() const {
6443   return isImmTy(ImmTySendMsg);
6444 }
6445 
6446 //===----------------------------------------------------------------------===//
6447 // v_interp
6448 //===----------------------------------------------------------------------===//
6449 
6450 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6451   StringRef Str;
6452   SMLoc S = getLoc();
6453 
6454   if (!parseId(Str))
6455     return MatchOperand_NoMatch;
6456 
6457   int Slot = StringSwitch<int>(Str)
6458     .Case("p10", 0)
6459     .Case("p20", 1)
6460     .Case("p0", 2)
6461     .Default(-1);
6462 
6463   if (Slot == -1) {
6464     Error(S, "invalid interpolation slot");
6465     return MatchOperand_ParseFail;
6466   }
6467 
6468   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6469                                               AMDGPUOperand::ImmTyInterpSlot));
6470   return MatchOperand_Success;
6471 }
6472 
6473 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6474   StringRef Str;
6475   SMLoc S = getLoc();
6476 
6477   if (!parseId(Str))
6478     return MatchOperand_NoMatch;
6479 
6480   if (!Str.startswith("attr")) {
6481     Error(S, "invalid interpolation attribute");
6482     return MatchOperand_ParseFail;
6483   }
6484 
6485   StringRef Chan = Str.take_back(2);
6486   int AttrChan = StringSwitch<int>(Chan)
6487     .Case(".x", 0)
6488     .Case(".y", 1)
6489     .Case(".z", 2)
6490     .Case(".w", 3)
6491     .Default(-1);
6492   if (AttrChan == -1) {
6493     Error(S, "invalid or missing interpolation attribute channel");
6494     return MatchOperand_ParseFail;
6495   }
6496 
6497   Str = Str.drop_back(2).drop_front(4);
6498 
6499   uint8_t Attr;
6500   if (Str.getAsInteger(10, Attr)) {
6501     Error(S, "invalid or missing interpolation attribute number");
6502     return MatchOperand_ParseFail;
6503   }
6504 
6505   if (Attr > 63) {
6506     Error(S, "out of bounds interpolation attribute number");
6507     return MatchOperand_ParseFail;
6508   }
6509 
6510   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6511 
6512   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6513                                               AMDGPUOperand::ImmTyInterpAttr));
6514   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6515                                               AMDGPUOperand::ImmTyAttrChan));
6516   return MatchOperand_Success;
6517 }
6518 
6519 //===----------------------------------------------------------------------===//
6520 // exp
6521 //===----------------------------------------------------------------------===//
6522 
6523 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6524   using namespace llvm::AMDGPU::Exp;
6525 
6526   StringRef Str;
6527   SMLoc S = getLoc();
6528 
6529   if (!parseId(Str))
6530     return MatchOperand_NoMatch;
6531 
6532   unsigned Id = getTgtId(Str);
6533   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6534     Error(S, (Id == ET_INVALID) ?
6535                 "invalid exp target" :
6536                 "exp target is not supported on this GPU");
6537     return MatchOperand_ParseFail;
6538   }
6539 
6540   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6541                                               AMDGPUOperand::ImmTyExpTgt));
6542   return MatchOperand_Success;
6543 }
6544 
6545 //===----------------------------------------------------------------------===//
6546 // parser helpers
6547 //===----------------------------------------------------------------------===//
6548 
6549 bool
6550 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6551   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6552 }
6553 
6554 bool
6555 AMDGPUAsmParser::isId(const StringRef Id) const {
6556   return isId(getToken(), Id);
6557 }
6558 
6559 bool
6560 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6561   return getTokenKind() == Kind;
6562 }
6563 
6564 bool
6565 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6566   if (isId(Id)) {
6567     lex();
6568     return true;
6569   }
6570   return false;
6571 }
6572 
6573 bool
6574 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6575   if (isToken(AsmToken::Identifier)) {
6576     StringRef Tok = getTokenStr();
6577     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6578       lex();
6579       return true;
6580     }
6581   }
6582   return false;
6583 }
6584 
6585 bool
6586 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6587   if (isId(Id) && peekToken().is(Kind)) {
6588     lex();
6589     lex();
6590     return true;
6591   }
6592   return false;
6593 }
6594 
6595 bool
6596 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6597   if (isToken(Kind)) {
6598     lex();
6599     return true;
6600   }
6601   return false;
6602 }
6603 
6604 bool
6605 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6606                            const StringRef ErrMsg) {
6607   if (!trySkipToken(Kind)) {
6608     Error(getLoc(), ErrMsg);
6609     return false;
6610   }
6611   return true;
6612 }
6613 
6614 bool
6615 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6616   SMLoc S = getLoc();
6617 
6618   const MCExpr *Expr;
6619   if (Parser.parseExpression(Expr))
6620     return false;
6621 
6622   if (Expr->evaluateAsAbsolute(Imm))
6623     return true;
6624 
6625   if (Expected.empty()) {
6626     Error(S, "expected absolute expression");
6627   } else {
6628     Error(S, Twine("expected ", Expected) +
6629              Twine(" or an absolute expression"));
6630   }
6631   return false;
6632 }
6633 
6634 bool
6635 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6636   SMLoc S = getLoc();
6637 
6638   const MCExpr *Expr;
6639   if (Parser.parseExpression(Expr))
6640     return false;
6641 
6642   int64_t IntVal;
6643   if (Expr->evaluateAsAbsolute(IntVal)) {
6644     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6645   } else {
6646     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6647   }
6648   return true;
6649 }
6650 
6651 bool
6652 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6653   if (isToken(AsmToken::String)) {
6654     Val = getToken().getStringContents();
6655     lex();
6656     return true;
6657   } else {
6658     Error(getLoc(), ErrMsg);
6659     return false;
6660   }
6661 }
6662 
6663 bool
6664 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6665   if (isToken(AsmToken::Identifier)) {
6666     Val = getTokenStr();
6667     lex();
6668     return true;
6669   } else {
6670     if (!ErrMsg.empty())
6671       Error(getLoc(), ErrMsg);
6672     return false;
6673   }
6674 }
6675 
6676 AsmToken
6677 AMDGPUAsmParser::getToken() const {
6678   return Parser.getTok();
6679 }
6680 
6681 AsmToken
6682 AMDGPUAsmParser::peekToken() {
6683   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6684 }
6685 
6686 void
6687 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6688   auto TokCount = getLexer().peekTokens(Tokens);
6689 
6690   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6691     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6692 }
6693 
6694 AsmToken::TokenKind
6695 AMDGPUAsmParser::getTokenKind() const {
6696   return getLexer().getKind();
6697 }
6698 
6699 SMLoc
6700 AMDGPUAsmParser::getLoc() const {
6701   return getToken().getLoc();
6702 }
6703 
6704 StringRef
6705 AMDGPUAsmParser::getTokenStr() const {
6706   return getToken().getString();
6707 }
6708 
6709 void
6710 AMDGPUAsmParser::lex() {
6711   Parser.Lex();
6712 }
6713 
6714 SMLoc
6715 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6716                                const OperandVector &Operands) const {
6717   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6718     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6719     if (Test(Op))
6720       return Op.getStartLoc();
6721   }
6722   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6723 }
6724 
6725 SMLoc
6726 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6727                            const OperandVector &Operands) const {
6728   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6729   return getOperandLoc(Test, Operands);
6730 }
6731 
6732 SMLoc
6733 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6734                            const OperandVector &Operands) const {
6735   auto Test = [=](const AMDGPUOperand& Op) {
6736     return Op.isRegKind() && Op.getReg() == Reg;
6737   };
6738   return getOperandLoc(Test, Operands);
6739 }
6740 
6741 SMLoc
6742 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6743   auto Test = [](const AMDGPUOperand& Op) {
6744     return Op.IsImmKindLiteral() || Op.isExpr();
6745   };
6746   return getOperandLoc(Test, Operands);
6747 }
6748 
6749 SMLoc
6750 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6751   auto Test = [](const AMDGPUOperand& Op) {
6752     return Op.isImmKindConst();
6753   };
6754   return getOperandLoc(Test, Operands);
6755 }
6756 
6757 //===----------------------------------------------------------------------===//
6758 // swizzle
6759 //===----------------------------------------------------------------------===//
6760 
6761 LLVM_READNONE
6762 static unsigned
6763 encodeBitmaskPerm(const unsigned AndMask,
6764                   const unsigned OrMask,
6765                   const unsigned XorMask) {
6766   using namespace llvm::AMDGPU::Swizzle;
6767 
6768   return BITMASK_PERM_ENC |
6769          (AndMask << BITMASK_AND_SHIFT) |
6770          (OrMask  << BITMASK_OR_SHIFT)  |
6771          (XorMask << BITMASK_XOR_SHIFT);
6772 }
6773 
6774 bool
6775 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6776                                      const unsigned MinVal,
6777                                      const unsigned MaxVal,
6778                                      const StringRef ErrMsg,
6779                                      SMLoc &Loc) {
6780   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6781     return false;
6782   }
6783   Loc = getLoc();
6784   if (!parseExpr(Op)) {
6785     return false;
6786   }
6787   if (Op < MinVal || Op > MaxVal) {
6788     Error(Loc, ErrMsg);
6789     return false;
6790   }
6791 
6792   return true;
6793 }
6794 
6795 bool
6796 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6797                                       const unsigned MinVal,
6798                                       const unsigned MaxVal,
6799                                       const StringRef ErrMsg) {
6800   SMLoc Loc;
6801   for (unsigned i = 0; i < OpNum; ++i) {
6802     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6803       return false;
6804   }
6805 
6806   return true;
6807 }
6808 
6809 bool
6810 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6811   using namespace llvm::AMDGPU::Swizzle;
6812 
6813   int64_t Lane[LANE_NUM];
6814   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6815                            "expected a 2-bit lane id")) {
6816     Imm = QUAD_PERM_ENC;
6817     for (unsigned I = 0; I < LANE_NUM; ++I) {
6818       Imm |= Lane[I] << (LANE_SHIFT * I);
6819     }
6820     return true;
6821   }
6822   return false;
6823 }
6824 
6825 bool
6826 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6827   using namespace llvm::AMDGPU::Swizzle;
6828 
6829   SMLoc Loc;
6830   int64_t GroupSize;
6831   int64_t LaneIdx;
6832 
6833   if (!parseSwizzleOperand(GroupSize,
6834                            2, 32,
6835                            "group size must be in the interval [2,32]",
6836                            Loc)) {
6837     return false;
6838   }
6839   if (!isPowerOf2_64(GroupSize)) {
6840     Error(Loc, "group size must be a power of two");
6841     return false;
6842   }
6843   if (parseSwizzleOperand(LaneIdx,
6844                           0, GroupSize - 1,
6845                           "lane id must be in the interval [0,group size - 1]",
6846                           Loc)) {
6847     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6848     return true;
6849   }
6850   return false;
6851 }
6852 
6853 bool
6854 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6855   using namespace llvm::AMDGPU::Swizzle;
6856 
6857   SMLoc Loc;
6858   int64_t GroupSize;
6859 
6860   if (!parseSwizzleOperand(GroupSize,
6861                            2, 32,
6862                            "group size must be in the interval [2,32]",
6863                            Loc)) {
6864     return false;
6865   }
6866   if (!isPowerOf2_64(GroupSize)) {
6867     Error(Loc, "group size must be a power of two");
6868     return false;
6869   }
6870 
6871   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6872   return true;
6873 }
6874 
6875 bool
6876 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6877   using namespace llvm::AMDGPU::Swizzle;
6878 
6879   SMLoc Loc;
6880   int64_t GroupSize;
6881 
6882   if (!parseSwizzleOperand(GroupSize,
6883                            1, 16,
6884                            "group size must be in the interval [1,16]",
6885                            Loc)) {
6886     return false;
6887   }
6888   if (!isPowerOf2_64(GroupSize)) {
6889     Error(Loc, "group size must be a power of two");
6890     return false;
6891   }
6892 
6893   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6894   return true;
6895 }
6896 
6897 bool
6898 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6899   using namespace llvm::AMDGPU::Swizzle;
6900 
6901   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6902     return false;
6903   }
6904 
6905   StringRef Ctl;
6906   SMLoc StrLoc = getLoc();
6907   if (!parseString(Ctl)) {
6908     return false;
6909   }
6910   if (Ctl.size() != BITMASK_WIDTH) {
6911     Error(StrLoc, "expected a 5-character mask");
6912     return false;
6913   }
6914 
6915   unsigned AndMask = 0;
6916   unsigned OrMask = 0;
6917   unsigned XorMask = 0;
6918 
6919   for (size_t i = 0; i < Ctl.size(); ++i) {
6920     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6921     switch(Ctl[i]) {
6922     default:
6923       Error(StrLoc, "invalid mask");
6924       return false;
6925     case '0':
6926       break;
6927     case '1':
6928       OrMask |= Mask;
6929       break;
6930     case 'p':
6931       AndMask |= Mask;
6932       break;
6933     case 'i':
6934       AndMask |= Mask;
6935       XorMask |= Mask;
6936       break;
6937     }
6938   }
6939 
6940   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6941   return true;
6942 }
6943 
6944 bool
6945 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6946 
6947   SMLoc OffsetLoc = getLoc();
6948 
6949   if (!parseExpr(Imm, "a swizzle macro")) {
6950     return false;
6951   }
6952   if (!isUInt<16>(Imm)) {
6953     Error(OffsetLoc, "expected a 16-bit offset");
6954     return false;
6955   }
6956   return true;
6957 }
6958 
6959 bool
6960 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6961   using namespace llvm::AMDGPU::Swizzle;
6962 
6963   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6964 
6965     SMLoc ModeLoc = getLoc();
6966     bool Ok = false;
6967 
6968     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6969       Ok = parseSwizzleQuadPerm(Imm);
6970     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6971       Ok = parseSwizzleBitmaskPerm(Imm);
6972     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6973       Ok = parseSwizzleBroadcast(Imm);
6974     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6975       Ok = parseSwizzleSwap(Imm);
6976     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6977       Ok = parseSwizzleReverse(Imm);
6978     } else {
6979       Error(ModeLoc, "expected a swizzle mode");
6980     }
6981 
6982     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6983   }
6984 
6985   return false;
6986 }
6987 
6988 OperandMatchResultTy
6989 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6990   SMLoc S = getLoc();
6991   int64_t Imm = 0;
6992 
6993   if (trySkipId("offset")) {
6994 
6995     bool Ok = false;
6996     if (skipToken(AsmToken::Colon, "expected a colon")) {
6997       if (trySkipId("swizzle")) {
6998         Ok = parseSwizzleMacro(Imm);
6999       } else {
7000         Ok = parseSwizzleOffset(Imm);
7001       }
7002     }
7003 
7004     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7005 
7006     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7007   } else {
7008     // Swizzle "offset" operand is optional.
7009     // If it is omitted, try parsing other optional operands.
7010     return parseOptionalOpr(Operands);
7011   }
7012 }
7013 
7014 bool
7015 AMDGPUOperand::isSwizzle() const {
7016   return isImmTy(ImmTySwizzle);
7017 }
7018 
7019 //===----------------------------------------------------------------------===//
7020 // VGPR Index Mode
7021 //===----------------------------------------------------------------------===//
7022 
7023 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7024 
7025   using namespace llvm::AMDGPU::VGPRIndexMode;
7026 
7027   if (trySkipToken(AsmToken::RParen)) {
7028     return OFF;
7029   }
7030 
7031   int64_t Imm = 0;
7032 
7033   while (true) {
7034     unsigned Mode = 0;
7035     SMLoc S = getLoc();
7036 
7037     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7038       if (trySkipId(IdSymbolic[ModeId])) {
7039         Mode = 1 << ModeId;
7040         break;
7041       }
7042     }
7043 
7044     if (Mode == 0) {
7045       Error(S, (Imm == 0)?
7046                "expected a VGPR index mode or a closing parenthesis" :
7047                "expected a VGPR index mode");
7048       return UNDEF;
7049     }
7050 
7051     if (Imm & Mode) {
7052       Error(S, "duplicate VGPR index mode");
7053       return UNDEF;
7054     }
7055     Imm |= Mode;
7056 
7057     if (trySkipToken(AsmToken::RParen))
7058       break;
7059     if (!skipToken(AsmToken::Comma,
7060                    "expected a comma or a closing parenthesis"))
7061       return UNDEF;
7062   }
7063 
7064   return Imm;
7065 }
7066 
7067 OperandMatchResultTy
7068 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7069 
7070   using namespace llvm::AMDGPU::VGPRIndexMode;
7071 
7072   int64_t Imm = 0;
7073   SMLoc S = getLoc();
7074 
7075   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7076     Imm = parseGPRIdxMacro();
7077     if (Imm == UNDEF)
7078       return MatchOperand_ParseFail;
7079   } else {
7080     if (getParser().parseAbsoluteExpression(Imm))
7081       return MatchOperand_ParseFail;
7082     if (Imm < 0 || !isUInt<4>(Imm)) {
7083       Error(S, "invalid immediate: only 4-bit values are legal");
7084       return MatchOperand_ParseFail;
7085     }
7086   }
7087 
7088   Operands.push_back(
7089       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7090   return MatchOperand_Success;
7091 }
7092 
7093 bool AMDGPUOperand::isGPRIdxMode() const {
7094   return isImmTy(ImmTyGprIdxMode);
7095 }
7096 
7097 //===----------------------------------------------------------------------===//
7098 // sopp branch targets
7099 //===----------------------------------------------------------------------===//
7100 
7101 OperandMatchResultTy
7102 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7103 
7104   // Make sure we are not parsing something
7105   // that looks like a label or an expression but is not.
7106   // This will improve error messages.
7107   if (isRegister() || isModifier())
7108     return MatchOperand_NoMatch;
7109 
7110   if (!parseExpr(Operands))
7111     return MatchOperand_ParseFail;
7112 
7113   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7114   assert(Opr.isImm() || Opr.isExpr());
7115   SMLoc Loc = Opr.getStartLoc();
7116 
7117   // Currently we do not support arbitrary expressions as branch targets.
7118   // Only labels and absolute expressions are accepted.
7119   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7120     Error(Loc, "expected an absolute expression or a label");
7121   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7122     Error(Loc, "expected a 16-bit signed jump offset");
7123   }
7124 
7125   return MatchOperand_Success;
7126 }
7127 
7128 //===----------------------------------------------------------------------===//
7129 // Boolean holding registers
7130 //===----------------------------------------------------------------------===//
7131 
7132 OperandMatchResultTy
7133 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7134   return parseReg(Operands);
7135 }
7136 
7137 //===----------------------------------------------------------------------===//
7138 // mubuf
7139 //===----------------------------------------------------------------------===//
7140 
7141 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7142   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7143 }
7144 
7145 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7146                                    const OperandVector &Operands,
7147                                    bool IsAtomic,
7148                                    bool IsLds) {
7149   bool IsLdsOpcode = IsLds;
7150   bool HasLdsModifier = false;
7151   OptionalImmIndexMap OptionalIdx;
7152   unsigned FirstOperandIdx = 1;
7153   bool IsAtomicReturn = false;
7154 
7155   if (IsAtomic) {
7156     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7157       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7158       if (!Op.isCPol())
7159         continue;
7160       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7161       break;
7162     }
7163 
7164     if (!IsAtomicReturn) {
7165       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7166       if (NewOpc != -1)
7167         Inst.setOpcode(NewOpc);
7168     }
7169 
7170     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7171                       SIInstrFlags::IsAtomicRet;
7172   }
7173 
7174   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7175     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7176 
7177     // Add the register arguments
7178     if (Op.isReg()) {
7179       Op.addRegOperands(Inst, 1);
7180       // Insert a tied src for atomic return dst.
7181       // This cannot be postponed as subsequent calls to
7182       // addImmOperands rely on correct number of MC operands.
7183       if (IsAtomicReturn && i == FirstOperandIdx)
7184         Op.addRegOperands(Inst, 1);
7185       continue;
7186     }
7187 
7188     // Handle the case where soffset is an immediate
7189     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7190       Op.addImmOperands(Inst, 1);
7191       continue;
7192     }
7193 
7194     HasLdsModifier |= Op.isLDS();
7195 
7196     // Handle tokens like 'offen' which are sometimes hard-coded into the
7197     // asm string.  There are no MCInst operands for these.
7198     if (Op.isToken()) {
7199       continue;
7200     }
7201     assert(Op.isImm());
7202 
7203     // Handle optional arguments
7204     OptionalIdx[Op.getImmTy()] = i;
7205   }
7206 
7207   // This is a workaround for an llvm quirk which may result in an
7208   // incorrect instruction selection. Lds and non-lds versions of
7209   // MUBUF instructions are identical except that lds versions
7210   // have mandatory 'lds' modifier. However this modifier follows
7211   // optional modifiers and llvm asm matcher regards this 'lds'
7212   // modifier as an optional one. As a result, an lds version
7213   // of opcode may be selected even if it has no 'lds' modifier.
7214   if (IsLdsOpcode && !HasLdsModifier) {
7215     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7216     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7217       Inst.setOpcode(NoLdsOpcode);
7218       IsLdsOpcode = false;
7219     }
7220   }
7221 
7222   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7223   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7224 
7225   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7226     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7227   }
7228   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7229 }
7230 
7231 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7232   OptionalImmIndexMap OptionalIdx;
7233 
7234   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7235     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7236 
7237     // Add the register arguments
7238     if (Op.isReg()) {
7239       Op.addRegOperands(Inst, 1);
7240       continue;
7241     }
7242 
7243     // Handle the case where soffset is an immediate
7244     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7245       Op.addImmOperands(Inst, 1);
7246       continue;
7247     }
7248 
7249     // Handle tokens like 'offen' which are sometimes hard-coded into the
7250     // asm string.  There are no MCInst operands for these.
7251     if (Op.isToken()) {
7252       continue;
7253     }
7254     assert(Op.isImm());
7255 
7256     // Handle optional arguments
7257     OptionalIdx[Op.getImmTy()] = i;
7258   }
7259 
7260   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7261                         AMDGPUOperand::ImmTyOffset);
7262   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7263   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7264   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7265   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7266 }
7267 
7268 //===----------------------------------------------------------------------===//
7269 // mimg
7270 //===----------------------------------------------------------------------===//
7271 
7272 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7273                               bool IsAtomic) {
7274   unsigned I = 1;
7275   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7276   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7277     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7278   }
7279 
7280   if (IsAtomic) {
7281     // Add src, same as dst
7282     assert(Desc.getNumDefs() == 1);
7283     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7284   }
7285 
7286   OptionalImmIndexMap OptionalIdx;
7287 
7288   for (unsigned E = Operands.size(); I != E; ++I) {
7289     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7290 
7291     // Add the register arguments
7292     if (Op.isReg()) {
7293       Op.addRegOperands(Inst, 1);
7294     } else if (Op.isImmModifier()) {
7295       OptionalIdx[Op.getImmTy()] = I;
7296     } else if (!Op.isToken()) {
7297       llvm_unreachable("unexpected operand type");
7298     }
7299   }
7300 
7301   bool IsGFX10Plus = isGFX10Plus();
7302 
7303   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7304   if (IsGFX10Plus)
7305     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7306   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7307   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7308   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7309   if (IsGFX10Plus)
7310     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7311   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7312     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7313   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7314   if (!IsGFX10Plus)
7315     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7316   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7317 }
7318 
7319 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7320   cvtMIMG(Inst, Operands, true);
7321 }
7322 
7323 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7324   OptionalImmIndexMap OptionalIdx;
7325   bool IsAtomicReturn = false;
7326 
7327   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7328     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7329     if (!Op.isCPol())
7330       continue;
7331     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7332     break;
7333   }
7334 
7335   if (!IsAtomicReturn) {
7336     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7337     if (NewOpc != -1)
7338       Inst.setOpcode(NewOpc);
7339   }
7340 
7341   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7342                     SIInstrFlags::IsAtomicRet;
7343 
7344   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7345     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7346 
7347     // Add the register arguments
7348     if (Op.isReg()) {
7349       Op.addRegOperands(Inst, 1);
7350       if (IsAtomicReturn && i == 1)
7351         Op.addRegOperands(Inst, 1);
7352       continue;
7353     }
7354 
7355     // Handle the case where soffset is an immediate
7356     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7357       Op.addImmOperands(Inst, 1);
7358       continue;
7359     }
7360 
7361     // Handle tokens like 'offen' which are sometimes hard-coded into the
7362     // asm string.  There are no MCInst operands for these.
7363     if (Op.isToken()) {
7364       continue;
7365     }
7366     assert(Op.isImm());
7367 
7368     // Handle optional arguments
7369     OptionalIdx[Op.getImmTy()] = i;
7370   }
7371 
7372   if ((int)Inst.getNumOperands() <=
7373       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7374     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7375   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7376 }
7377 
7378 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7379                                       const OperandVector &Operands) {
7380   for (unsigned I = 1; I < Operands.size(); ++I) {
7381     auto &Operand = (AMDGPUOperand &)*Operands[I];
7382     if (Operand.isReg())
7383       Operand.addRegOperands(Inst, 1);
7384   }
7385 
7386   Inst.addOperand(MCOperand::createImm(1)); // a16
7387 }
7388 
7389 //===----------------------------------------------------------------------===//
7390 // smrd
7391 //===----------------------------------------------------------------------===//
7392 
7393 bool AMDGPUOperand::isSMRDOffset8() const {
7394   return isImm() && isUInt<8>(getImm());
7395 }
7396 
7397 bool AMDGPUOperand::isSMEMOffset() const {
7398   return isImm(); // Offset range is checked later by validator.
7399 }
7400 
7401 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7402   // 32-bit literals are only supported on CI and we only want to use them
7403   // when the offset is > 8-bits.
7404   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7405 }
7406 
7407 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7408   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7409 }
7410 
7411 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7412   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7413 }
7414 
7415 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7416   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7417 }
7418 
7419 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7420   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7421 }
7422 
7423 //===----------------------------------------------------------------------===//
7424 // vop3
7425 //===----------------------------------------------------------------------===//
7426 
7427 static bool ConvertOmodMul(int64_t &Mul) {
7428   if (Mul != 1 && Mul != 2 && Mul != 4)
7429     return false;
7430 
7431   Mul >>= 1;
7432   return true;
7433 }
7434 
7435 static bool ConvertOmodDiv(int64_t &Div) {
7436   if (Div == 1) {
7437     Div = 0;
7438     return true;
7439   }
7440 
7441   if (Div == 2) {
7442     Div = 3;
7443     return true;
7444   }
7445 
7446   return false;
7447 }
7448 
7449 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7450 // This is intentional and ensures compatibility with sp3.
7451 // See bug 35397 for details.
7452 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7453   if (BoundCtrl == 0 || BoundCtrl == 1) {
7454     BoundCtrl = 1;
7455     return true;
7456   }
7457   return false;
7458 }
7459 
7460 // Note: the order in this table matches the order of operands in AsmString.
7461 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7462   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7463   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7464   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7465   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7466   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7467   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7468   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7469   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7470   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7471   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7472   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7473   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7474   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7475   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7476   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7477   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7478   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7479   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7480   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7481   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7482   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7483   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7484   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7485   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7486   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7487   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7488   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7489   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7490   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7491   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7492   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7493   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7494   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7495   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7496   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7497   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7498   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7499   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7500   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7501   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7502   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7503 };
7504 
7505 void AMDGPUAsmParser::onBeginOfFile() {
7506   if (!getParser().getStreamer().getTargetStreamer() ||
7507       getSTI().getTargetTriple().getArch() == Triple::r600)
7508     return;
7509 
7510   if (!getTargetStreamer().getTargetID())
7511     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7512 
7513   if (isHsaAbiVersion3AndAbove(&getSTI()))
7514     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7515 }
7516 
7517 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7518 
7519   OperandMatchResultTy res = parseOptionalOpr(Operands);
7520 
7521   // This is a hack to enable hardcoded mandatory operands which follow
7522   // optional operands.
7523   //
7524   // Current design assumes that all operands after the first optional operand
7525   // are also optional. However implementation of some instructions violates
7526   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7527   //
7528   // To alleviate this problem, we have to (implicitly) parse extra operands
7529   // to make sure autogenerated parser of custom operands never hit hardcoded
7530   // mandatory operands.
7531 
7532   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7533     if (res != MatchOperand_Success ||
7534         isToken(AsmToken::EndOfStatement))
7535       break;
7536 
7537     trySkipToken(AsmToken::Comma);
7538     res = parseOptionalOpr(Operands);
7539   }
7540 
7541   return res;
7542 }
7543 
7544 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7545   OperandMatchResultTy res;
7546   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7547     // try to parse any optional operand here
7548     if (Op.IsBit) {
7549       res = parseNamedBit(Op.Name, Operands, Op.Type);
7550     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7551       res = parseOModOperand(Operands);
7552     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7553                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7554                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7555       res = parseSDWASel(Operands, Op.Name, Op.Type);
7556     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7557       res = parseSDWADstUnused(Operands);
7558     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7559                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7560                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7561                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7562       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7563                                         Op.ConvertResult);
7564     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7565       res = parseDim(Operands);
7566     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7567       res = parseCPol(Operands);
7568     } else {
7569       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7570     }
7571     if (res != MatchOperand_NoMatch) {
7572       return res;
7573     }
7574   }
7575   return MatchOperand_NoMatch;
7576 }
7577 
7578 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7579   StringRef Name = getTokenStr();
7580   if (Name == "mul") {
7581     return parseIntWithPrefix("mul", Operands,
7582                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7583   }
7584 
7585   if (Name == "div") {
7586     return parseIntWithPrefix("div", Operands,
7587                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7588   }
7589 
7590   return MatchOperand_NoMatch;
7591 }
7592 
7593 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7594   cvtVOP3P(Inst, Operands);
7595 
7596   int Opc = Inst.getOpcode();
7597 
7598   int SrcNum;
7599   const int Ops[] = { AMDGPU::OpName::src0,
7600                       AMDGPU::OpName::src1,
7601                       AMDGPU::OpName::src2 };
7602   for (SrcNum = 0;
7603        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7604        ++SrcNum);
7605   assert(SrcNum > 0);
7606 
7607   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7608   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7609 
7610   if ((OpSel & (1 << SrcNum)) != 0) {
7611     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7612     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7613     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7614   }
7615 }
7616 
7617 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7618       // 1. This operand is input modifiers
7619   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7620       // 2. This is not last operand
7621       && Desc.NumOperands > (OpNum + 1)
7622       // 3. Next operand is register class
7623       && Desc.OpInfo[OpNum + 1].RegClass != -1
7624       // 4. Next register is not tied to any other operand
7625       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7626 }
7627 
7628 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7629 {
7630   OptionalImmIndexMap OptionalIdx;
7631   unsigned Opc = Inst.getOpcode();
7632 
7633   unsigned I = 1;
7634   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7635   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7636     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7637   }
7638 
7639   for (unsigned E = Operands.size(); I != E; ++I) {
7640     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7641     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7642       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7643     } else if (Op.isInterpSlot() ||
7644                Op.isInterpAttr() ||
7645                Op.isAttrChan()) {
7646       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7647     } else if (Op.isImmModifier()) {
7648       OptionalIdx[Op.getImmTy()] = I;
7649     } else {
7650       llvm_unreachable("unhandled operand type");
7651     }
7652   }
7653 
7654   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7655     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7656   }
7657 
7658   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7659     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7660   }
7661 
7662   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7663     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7664   }
7665 }
7666 
7667 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7668                               OptionalImmIndexMap &OptionalIdx) {
7669   unsigned Opc = Inst.getOpcode();
7670 
7671   unsigned I = 1;
7672   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7673   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7674     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7675   }
7676 
7677   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7678     // This instruction has src modifiers
7679     for (unsigned E = Operands.size(); I != E; ++I) {
7680       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7681       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7682         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7683       } else if (Op.isImmModifier()) {
7684         OptionalIdx[Op.getImmTy()] = I;
7685       } else if (Op.isRegOrImm()) {
7686         Op.addRegOrImmOperands(Inst, 1);
7687       } else {
7688         llvm_unreachable("unhandled operand type");
7689       }
7690     }
7691   } else {
7692     // No src modifiers
7693     for (unsigned E = Operands.size(); I != E; ++I) {
7694       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7695       if (Op.isMod()) {
7696         OptionalIdx[Op.getImmTy()] = I;
7697       } else {
7698         Op.addRegOrImmOperands(Inst, 1);
7699       }
7700     }
7701   }
7702 
7703   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7704     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7705   }
7706 
7707   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7708     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7709   }
7710 
7711   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7712   // it has src2 register operand that is tied to dst operand
7713   // we don't allow modifiers for this operand in assembler so src2_modifiers
7714   // should be 0.
7715   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7716       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7717       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7718       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7719       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7720       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7721       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7722       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7723       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7724       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7725       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7726     auto it = Inst.begin();
7727     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7728     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7729     ++it;
7730     // Copy the operand to ensure it's not invalidated when Inst grows.
7731     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7732   }
7733 }
7734 
7735 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7736   OptionalImmIndexMap OptionalIdx;
7737   cvtVOP3(Inst, Operands, OptionalIdx);
7738 }
7739 
7740 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7741                                OptionalImmIndexMap &OptIdx) {
7742   const int Opc = Inst.getOpcode();
7743   const MCInstrDesc &Desc = MII.get(Opc);
7744 
7745   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7746 
7747   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7748     assert(!IsPacked);
7749     Inst.addOperand(Inst.getOperand(0));
7750   }
7751 
7752   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7753   // instruction, and then figure out where to actually put the modifiers
7754 
7755   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7756   if (OpSelIdx != -1) {
7757     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7758   }
7759 
7760   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7761   if (OpSelHiIdx != -1) {
7762     int DefaultVal = IsPacked ? -1 : 0;
7763     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7764                           DefaultVal);
7765   }
7766 
7767   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7768   if (NegLoIdx != -1) {
7769     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7770     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7771   }
7772 
7773   const int Ops[] = { AMDGPU::OpName::src0,
7774                       AMDGPU::OpName::src1,
7775                       AMDGPU::OpName::src2 };
7776   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7777                          AMDGPU::OpName::src1_modifiers,
7778                          AMDGPU::OpName::src2_modifiers };
7779 
7780   unsigned OpSel = 0;
7781   unsigned OpSelHi = 0;
7782   unsigned NegLo = 0;
7783   unsigned NegHi = 0;
7784 
7785   if (OpSelIdx != -1)
7786     OpSel = Inst.getOperand(OpSelIdx).getImm();
7787 
7788   if (OpSelHiIdx != -1)
7789     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7790 
7791   if (NegLoIdx != -1) {
7792     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7793     NegLo = Inst.getOperand(NegLoIdx).getImm();
7794     NegHi = Inst.getOperand(NegHiIdx).getImm();
7795   }
7796 
7797   for (int J = 0; J < 3; ++J) {
7798     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7799     if (OpIdx == -1)
7800       break;
7801 
7802     uint32_t ModVal = 0;
7803 
7804     if ((OpSel & (1 << J)) != 0)
7805       ModVal |= SISrcMods::OP_SEL_0;
7806 
7807     if ((OpSelHi & (1 << J)) != 0)
7808       ModVal |= SISrcMods::OP_SEL_1;
7809 
7810     if ((NegLo & (1 << J)) != 0)
7811       ModVal |= SISrcMods::NEG;
7812 
7813     if ((NegHi & (1 << J)) != 0)
7814       ModVal |= SISrcMods::NEG_HI;
7815 
7816     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7817 
7818     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7819   }
7820 }
7821 
7822 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7823   OptionalImmIndexMap OptIdx;
7824   cvtVOP3(Inst, Operands, OptIdx);
7825   cvtVOP3P(Inst, Operands, OptIdx);
7826 }
7827 
7828 //===----------------------------------------------------------------------===//
7829 // dpp
7830 //===----------------------------------------------------------------------===//
7831 
7832 bool AMDGPUOperand::isDPP8() const {
7833   return isImmTy(ImmTyDPP8);
7834 }
7835 
7836 bool AMDGPUOperand::isDPPCtrl() const {
7837   using namespace AMDGPU::DPP;
7838 
7839   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7840   if (result) {
7841     int64_t Imm = getImm();
7842     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7843            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7844            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7845            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7846            (Imm == DppCtrl::WAVE_SHL1) ||
7847            (Imm == DppCtrl::WAVE_ROL1) ||
7848            (Imm == DppCtrl::WAVE_SHR1) ||
7849            (Imm == DppCtrl::WAVE_ROR1) ||
7850            (Imm == DppCtrl::ROW_MIRROR) ||
7851            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7852            (Imm == DppCtrl::BCAST15) ||
7853            (Imm == DppCtrl::BCAST31) ||
7854            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7855            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7856   }
7857   return false;
7858 }
7859 
7860 //===----------------------------------------------------------------------===//
7861 // mAI
7862 //===----------------------------------------------------------------------===//
7863 
7864 bool AMDGPUOperand::isBLGP() const {
7865   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7866 }
7867 
7868 bool AMDGPUOperand::isCBSZ() const {
7869   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7870 }
7871 
7872 bool AMDGPUOperand::isABID() const {
7873   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7874 }
7875 
7876 bool AMDGPUOperand::isS16Imm() const {
7877   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7878 }
7879 
7880 bool AMDGPUOperand::isU16Imm() const {
7881   return isImm() && isUInt<16>(getImm());
7882 }
7883 
7884 //===----------------------------------------------------------------------===//
7885 // dim
7886 //===----------------------------------------------------------------------===//
7887 
7888 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7889   // We want to allow "dim:1D" etc.,
7890   // but the initial 1 is tokenized as an integer.
7891   std::string Token;
7892   if (isToken(AsmToken::Integer)) {
7893     SMLoc Loc = getToken().getEndLoc();
7894     Token = std::string(getTokenStr());
7895     lex();
7896     if (getLoc() != Loc)
7897       return false;
7898   }
7899 
7900   StringRef Suffix;
7901   if (!parseId(Suffix))
7902     return false;
7903   Token += Suffix;
7904 
7905   StringRef DimId = Token;
7906   if (DimId.startswith("SQ_RSRC_IMG_"))
7907     DimId = DimId.drop_front(12);
7908 
7909   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7910   if (!DimInfo)
7911     return false;
7912 
7913   Encoding = DimInfo->Encoding;
7914   return true;
7915 }
7916 
7917 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7918   if (!isGFX10Plus())
7919     return MatchOperand_NoMatch;
7920 
7921   SMLoc S = getLoc();
7922 
7923   if (!trySkipId("dim", AsmToken::Colon))
7924     return MatchOperand_NoMatch;
7925 
7926   unsigned Encoding;
7927   SMLoc Loc = getLoc();
7928   if (!parseDimId(Encoding)) {
7929     Error(Loc, "invalid dim value");
7930     return MatchOperand_ParseFail;
7931   }
7932 
7933   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7934                                               AMDGPUOperand::ImmTyDim));
7935   return MatchOperand_Success;
7936 }
7937 
7938 //===----------------------------------------------------------------------===//
7939 // dpp
7940 //===----------------------------------------------------------------------===//
7941 
7942 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7943   SMLoc S = getLoc();
7944 
7945   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7946     return MatchOperand_NoMatch;
7947 
7948   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7949 
7950   int64_t Sels[8];
7951 
7952   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7953     return MatchOperand_ParseFail;
7954 
7955   for (size_t i = 0; i < 8; ++i) {
7956     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7957       return MatchOperand_ParseFail;
7958 
7959     SMLoc Loc = getLoc();
7960     if (getParser().parseAbsoluteExpression(Sels[i]))
7961       return MatchOperand_ParseFail;
7962     if (0 > Sels[i] || 7 < Sels[i]) {
7963       Error(Loc, "expected a 3-bit value");
7964       return MatchOperand_ParseFail;
7965     }
7966   }
7967 
7968   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7969     return MatchOperand_ParseFail;
7970 
7971   unsigned DPP8 = 0;
7972   for (size_t i = 0; i < 8; ++i)
7973     DPP8 |= (Sels[i] << (i * 3));
7974 
7975   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7976   return MatchOperand_Success;
7977 }
7978 
7979 bool
7980 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7981                                     const OperandVector &Operands) {
7982   if (Ctrl == "row_newbcast")
7983     return isGFX90A();
7984 
7985   if (Ctrl == "row_share" ||
7986       Ctrl == "row_xmask")
7987     return isGFX10Plus();
7988 
7989   if (Ctrl == "wave_shl" ||
7990       Ctrl == "wave_shr" ||
7991       Ctrl == "wave_rol" ||
7992       Ctrl == "wave_ror" ||
7993       Ctrl == "row_bcast")
7994     return isVI() || isGFX9();
7995 
7996   return Ctrl == "row_mirror" ||
7997          Ctrl == "row_half_mirror" ||
7998          Ctrl == "quad_perm" ||
7999          Ctrl == "row_shl" ||
8000          Ctrl == "row_shr" ||
8001          Ctrl == "row_ror";
8002 }
8003 
8004 int64_t
8005 AMDGPUAsmParser::parseDPPCtrlPerm() {
8006   // quad_perm:[%d,%d,%d,%d]
8007 
8008   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8009     return -1;
8010 
8011   int64_t Val = 0;
8012   for (int i = 0; i < 4; ++i) {
8013     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8014       return -1;
8015 
8016     int64_t Temp;
8017     SMLoc Loc = getLoc();
8018     if (getParser().parseAbsoluteExpression(Temp))
8019       return -1;
8020     if (Temp < 0 || Temp > 3) {
8021       Error(Loc, "expected a 2-bit value");
8022       return -1;
8023     }
8024 
8025     Val += (Temp << i * 2);
8026   }
8027 
8028   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8029     return -1;
8030 
8031   return Val;
8032 }
8033 
8034 int64_t
8035 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8036   using namespace AMDGPU::DPP;
8037 
8038   // sel:%d
8039 
8040   int64_t Val;
8041   SMLoc Loc = getLoc();
8042 
8043   if (getParser().parseAbsoluteExpression(Val))
8044     return -1;
8045 
8046   struct DppCtrlCheck {
8047     int64_t Ctrl;
8048     int Lo;
8049     int Hi;
8050   };
8051 
8052   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8053     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8054     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8055     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8056     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8057     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8058     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8059     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8060     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8061     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8062     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8063     .Default({-1, 0, 0});
8064 
8065   bool Valid;
8066   if (Check.Ctrl == -1) {
8067     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8068     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8069   } else {
8070     Valid = Check.Lo <= Val && Val <= Check.Hi;
8071     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8072   }
8073 
8074   if (!Valid) {
8075     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8076     return -1;
8077   }
8078 
8079   return Val;
8080 }
8081 
8082 OperandMatchResultTy
8083 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8084   using namespace AMDGPU::DPP;
8085 
8086   if (!isToken(AsmToken::Identifier) ||
8087       !isSupportedDPPCtrl(getTokenStr(), Operands))
8088     return MatchOperand_NoMatch;
8089 
8090   SMLoc S = getLoc();
8091   int64_t Val = -1;
8092   StringRef Ctrl;
8093 
8094   parseId(Ctrl);
8095 
8096   if (Ctrl == "row_mirror") {
8097     Val = DppCtrl::ROW_MIRROR;
8098   } else if (Ctrl == "row_half_mirror") {
8099     Val = DppCtrl::ROW_HALF_MIRROR;
8100   } else {
8101     if (skipToken(AsmToken::Colon, "expected a colon")) {
8102       if (Ctrl == "quad_perm") {
8103         Val = parseDPPCtrlPerm();
8104       } else {
8105         Val = parseDPPCtrlSel(Ctrl);
8106       }
8107     }
8108   }
8109 
8110   if (Val == -1)
8111     return MatchOperand_ParseFail;
8112 
8113   Operands.push_back(
8114     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8115   return MatchOperand_Success;
8116 }
8117 
8118 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8119   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8120 }
8121 
8122 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8123   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8124 }
8125 
8126 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8127   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8128 }
8129 
8130 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8131   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8132 }
8133 
8134 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8135   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8136 }
8137 
8138 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8139   OptionalImmIndexMap OptionalIdx;
8140 
8141   unsigned Opc = Inst.getOpcode();
8142   bool HasModifiers =
8143       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8144   unsigned I = 1;
8145   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8146   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8147     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8148   }
8149 
8150   int Fi = 0;
8151   for (unsigned E = Operands.size(); I != E; ++I) {
8152     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8153                                             MCOI::TIED_TO);
8154     if (TiedTo != -1) {
8155       assert((unsigned)TiedTo < Inst.getNumOperands());
8156       // handle tied old or src2 for MAC instructions
8157       Inst.addOperand(Inst.getOperand(TiedTo));
8158     }
8159     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8160     // Add the register arguments
8161     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8162       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8163       // Skip it.
8164       continue;
8165     }
8166 
8167     if (IsDPP8) {
8168       if (Op.isDPP8()) {
8169         Op.addImmOperands(Inst, 1);
8170       } else if (HasModifiers &&
8171                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8172         Op.addRegWithFPInputModsOperands(Inst, 2);
8173       } else if (Op.isFI()) {
8174         Fi = Op.getImm();
8175       } else if (Op.isReg()) {
8176         Op.addRegOperands(Inst, 1);
8177       } else {
8178         llvm_unreachable("Invalid operand type");
8179       }
8180     } else {
8181       if (HasModifiers &&
8182           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8183         Op.addRegWithFPInputModsOperands(Inst, 2);
8184       } else if (Op.isReg()) {
8185         Op.addRegOperands(Inst, 1);
8186       } else if (Op.isDPPCtrl()) {
8187         Op.addImmOperands(Inst, 1);
8188       } else if (Op.isImm()) {
8189         // Handle optional arguments
8190         OptionalIdx[Op.getImmTy()] = I;
8191       } else {
8192         llvm_unreachable("Invalid operand type");
8193       }
8194     }
8195   }
8196 
8197   if (IsDPP8) {
8198     using namespace llvm::AMDGPU::DPP;
8199     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8200   } else {
8201     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8202     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8203     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8204     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8205       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8206     }
8207   }
8208 }
8209 
8210 //===----------------------------------------------------------------------===//
8211 // sdwa
8212 //===----------------------------------------------------------------------===//
8213 
8214 OperandMatchResultTy
8215 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8216                               AMDGPUOperand::ImmTy Type) {
8217   using namespace llvm::AMDGPU::SDWA;
8218 
8219   SMLoc S = getLoc();
8220   StringRef Value;
8221   OperandMatchResultTy res;
8222 
8223   SMLoc StringLoc;
8224   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8225   if (res != MatchOperand_Success) {
8226     return res;
8227   }
8228 
8229   int64_t Int;
8230   Int = StringSwitch<int64_t>(Value)
8231         .Case("BYTE_0", SdwaSel::BYTE_0)
8232         .Case("BYTE_1", SdwaSel::BYTE_1)
8233         .Case("BYTE_2", SdwaSel::BYTE_2)
8234         .Case("BYTE_3", SdwaSel::BYTE_3)
8235         .Case("WORD_0", SdwaSel::WORD_0)
8236         .Case("WORD_1", SdwaSel::WORD_1)
8237         .Case("DWORD", SdwaSel::DWORD)
8238         .Default(0xffffffff);
8239 
8240   if (Int == 0xffffffff) {
8241     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8242     return MatchOperand_ParseFail;
8243   }
8244 
8245   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8246   return MatchOperand_Success;
8247 }
8248 
8249 OperandMatchResultTy
8250 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8251   using namespace llvm::AMDGPU::SDWA;
8252 
8253   SMLoc S = getLoc();
8254   StringRef Value;
8255   OperandMatchResultTy res;
8256 
8257   SMLoc StringLoc;
8258   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8259   if (res != MatchOperand_Success) {
8260     return res;
8261   }
8262 
8263   int64_t Int;
8264   Int = StringSwitch<int64_t>(Value)
8265         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8266         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8267         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8268         .Default(0xffffffff);
8269 
8270   if (Int == 0xffffffff) {
8271     Error(StringLoc, "invalid dst_unused value");
8272     return MatchOperand_ParseFail;
8273   }
8274 
8275   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8276   return MatchOperand_Success;
8277 }
8278 
8279 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8280   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8281 }
8282 
8283 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8284   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8285 }
8286 
8287 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8288   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8289 }
8290 
8291 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8292   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8293 }
8294 
8295 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8296   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8297 }
8298 
8299 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8300                               uint64_t BasicInstType,
8301                               bool SkipDstVcc,
8302                               bool SkipSrcVcc) {
8303   using namespace llvm::AMDGPU::SDWA;
8304 
8305   OptionalImmIndexMap OptionalIdx;
8306   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8307   bool SkippedVcc = false;
8308 
8309   unsigned I = 1;
8310   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8311   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8312     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8313   }
8314 
8315   for (unsigned E = Operands.size(); I != E; ++I) {
8316     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8317     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8318         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8319       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8320       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8321       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8322       // Skip VCC only if we didn't skip it on previous iteration.
8323       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8324       if (BasicInstType == SIInstrFlags::VOP2 &&
8325           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8326            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8327         SkippedVcc = true;
8328         continue;
8329       } else if (BasicInstType == SIInstrFlags::VOPC &&
8330                  Inst.getNumOperands() == 0) {
8331         SkippedVcc = true;
8332         continue;
8333       }
8334     }
8335     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8336       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8337     } else if (Op.isImm()) {
8338       // Handle optional arguments
8339       OptionalIdx[Op.getImmTy()] = I;
8340     } else {
8341       llvm_unreachable("Invalid operand type");
8342     }
8343     SkippedVcc = false;
8344   }
8345 
8346   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8347       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8348       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8349     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8350     switch (BasicInstType) {
8351     case SIInstrFlags::VOP1:
8352       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8353       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8354         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8355       }
8356       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8357       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8358       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8359       break;
8360 
8361     case SIInstrFlags::VOP2:
8362       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8363       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8364         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8365       }
8366       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8367       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8368       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8369       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8370       break;
8371 
8372     case SIInstrFlags::VOPC:
8373       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8374         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8375       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8376       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8377       break;
8378 
8379     default:
8380       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8381     }
8382   }
8383 
8384   // special case v_mac_{f16, f32}:
8385   // it has src2 register operand that is tied to dst operand
8386   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8387       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8388     auto it = Inst.begin();
8389     std::advance(
8390       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8391     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8392   }
8393 }
8394 
8395 //===----------------------------------------------------------------------===//
8396 // mAI
8397 //===----------------------------------------------------------------------===//
8398 
8399 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8400   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8401 }
8402 
8403 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8404   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8405 }
8406 
8407 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8408   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8409 }
8410 
8411 /// Force static initialization.
8412 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8413   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8414   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8415 }
8416 
8417 #define GET_REGISTER_MATCHER
8418 #define GET_MATCHER_IMPLEMENTATION
8419 #define GET_MNEMONIC_SPELL_CHECKER
8420 #define GET_MNEMONIC_CHECKER
8421 #include "AMDGPUGenAsmMatcher.inc"
8422 
8423 // This function should be defined after auto-generated include so that we have
8424 // MatchClassKind enum defined
8425 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8426                                                      unsigned Kind) {
8427   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8428   // But MatchInstructionImpl() expects to meet token and fails to validate
8429   // operand. This method checks if we are given immediate operand but expect to
8430   // get corresponding token.
8431   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8432   switch (Kind) {
8433   case MCK_addr64:
8434     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8435   case MCK_gds:
8436     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8437   case MCK_lds:
8438     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8439   case MCK_idxen:
8440     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8441   case MCK_offen:
8442     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8443   case MCK_SSrcB32:
8444     // When operands have expression values, they will return true for isToken,
8445     // because it is not possible to distinguish between a token and an
8446     // expression at parse time. MatchInstructionImpl() will always try to
8447     // match an operand as a token, when isToken returns true, and when the
8448     // name of the expression is not a valid token, the match will fail,
8449     // so we need to handle it here.
8450     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8451   case MCK_SSrcF32:
8452     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8453   case MCK_SoppBrTarget:
8454     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8455   case MCK_VReg32OrOff:
8456     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8457   case MCK_InterpSlot:
8458     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8459   case MCK_Attr:
8460     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8461   case MCK_AttrChan:
8462     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8463   case MCK_ImmSMEMOffset:
8464     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8465   case MCK_SReg_64:
8466   case MCK_SReg_64_XEXEC:
8467     // Null is defined as a 32-bit register but
8468     // it should also be enabled with 64-bit operands.
8469     // The following code enables it for SReg_64 operands
8470     // used as source and destination. Remaining source
8471     // operands are handled in isInlinableImm.
8472     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8473   default:
8474     return Match_InvalidOperand;
8475   }
8476 }
8477 
8478 //===----------------------------------------------------------------------===//
8479 // endpgm
8480 //===----------------------------------------------------------------------===//
8481 
8482 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8483   SMLoc S = getLoc();
8484   int64_t Imm = 0;
8485 
8486   if (!parseExpr(Imm)) {
8487     // The operand is optional, if not present default to 0
8488     Imm = 0;
8489   }
8490 
8491   if (!isUInt<16>(Imm)) {
8492     Error(S, "expected a 16-bit value");
8493     return MatchOperand_ParseFail;
8494   }
8495 
8496   Operands.push_back(
8497       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8498   return MatchOperand_Success;
8499 }
8500 
8501 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8502