1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/AMDGPUMetadata.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/MachineValueType.h"
38 #include "llvm/Support/TargetParser.h"
39 
40 using namespace llvm;
41 using namespace llvm::AMDGPU;
42 using namespace llvm::amdhsa;
43 
44 namespace {
45 
46 class AMDGPUAsmParser;
47 
48 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
49 
50 //===----------------------------------------------------------------------===//
51 // Operand
52 //===----------------------------------------------------------------------===//
53 
54 class AMDGPUOperand : public MCParsedAsmOperand {
55   enum KindTy {
56     Token,
57     Immediate,
58     Register,
59     Expression
60   } Kind;
61 
62   SMLoc StartLoc, EndLoc;
63   const AMDGPUAsmParser *AsmParser;
64 
65 public:
66   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
67       : Kind(Kind_), AsmParser(AsmParser_) {}
68 
69   using Ptr = std::unique_ptr<AMDGPUOperand>;
70 
71   struct Modifiers {
72     bool Abs = false;
73     bool Neg = false;
74     bool Sext = false;
75 
76     bool hasFPModifiers() const { return Abs || Neg; }
77     bool hasIntModifiers() const { return Sext; }
78     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
79 
80     int64_t getFPModifiersOperand() const {
81       int64_t Operand = 0;
82       Operand |= Abs ? SISrcMods::ABS : 0u;
83       Operand |= Neg ? SISrcMods::NEG : 0u;
84       return Operand;
85     }
86 
87     int64_t getIntModifiersOperand() const {
88       int64_t Operand = 0;
89       Operand |= Sext ? SISrcMods::SEXT : 0u;
90       return Operand;
91     }
92 
93     int64_t getModifiersOperand() const {
94       assert(!(hasFPModifiers() && hasIntModifiers())
95            && "fp and int modifiers should not be used simultaneously");
96       if (hasFPModifiers()) {
97         return getFPModifiersOperand();
98       } else if (hasIntModifiers()) {
99         return getIntModifiersOperand();
100       } else {
101         return 0;
102       }
103     }
104 
105     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
106   };
107 
108   enum ImmTy {
109     ImmTyNone,
110     ImmTyGDS,
111     ImmTyLDS,
112     ImmTyOffen,
113     ImmTyIdxen,
114     ImmTyAddr64,
115     ImmTyOffset,
116     ImmTyInstOffset,
117     ImmTyOffset0,
118     ImmTyOffset1,
119     ImmTyCPol,
120     ImmTySWZ,
121     ImmTyTFE,
122     ImmTyD16,
123     ImmTyClampSI,
124     ImmTyOModSI,
125     ImmTyDPP8,
126     ImmTyDppCtrl,
127     ImmTyDppRowMask,
128     ImmTyDppBankMask,
129     ImmTyDppBoundCtrl,
130     ImmTyDppFi,
131     ImmTySdwaDstSel,
132     ImmTySdwaSrc0Sel,
133     ImmTySdwaSrc1Sel,
134     ImmTySdwaDstUnused,
135     ImmTyDMask,
136     ImmTyDim,
137     ImmTyUNorm,
138     ImmTyDA,
139     ImmTyR128A16,
140     ImmTyA16,
141     ImmTyLWE,
142     ImmTyExpTgt,
143     ImmTyExpCompr,
144     ImmTyExpVM,
145     ImmTyFORMAT,
146     ImmTyHwreg,
147     ImmTyOff,
148     ImmTySendMsg,
149     ImmTyInterpSlot,
150     ImmTyInterpAttr,
151     ImmTyAttrChan,
152     ImmTyOpSel,
153     ImmTyOpSelHi,
154     ImmTyNegLo,
155     ImmTyNegHi,
156     ImmTySwizzle,
157     ImmTyGprIdxMode,
158     ImmTyHigh,
159     ImmTyBLGP,
160     ImmTyCBSZ,
161     ImmTyABID,
162     ImmTyEndpgm,
163   };
164 
165   enum ImmKindTy {
166     ImmKindTyNone,
167     ImmKindTyLiteral,
168     ImmKindTyConst,
169   };
170 
171 private:
172   struct TokOp {
173     const char *Data;
174     unsigned Length;
175   };
176 
177   struct ImmOp {
178     int64_t Val;
179     ImmTy Type;
180     bool IsFPImm;
181     mutable ImmKindTy Kind;
182     Modifiers Mods;
183   };
184 
185   struct RegOp {
186     unsigned RegNo;
187     Modifiers Mods;
188   };
189 
190   union {
191     TokOp Tok;
192     ImmOp Imm;
193     RegOp Reg;
194     const MCExpr *Expr;
195   };
196 
197 public:
198   bool isToken() const override {
199     if (Kind == Token)
200       return true;
201 
202     // When parsing operands, we can't always tell if something was meant to be
203     // a token, like 'gds', or an expression that references a global variable.
204     // In this case, we assume the string is an expression, and if we need to
205     // interpret is a token, then we treat the symbol name as the token.
206     return isSymbolRefExpr();
207   }
208 
209   bool isSymbolRefExpr() const {
210     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
211   }
212 
213   bool isImm() const override {
214     return Kind == Immediate;
215   }
216 
217   void setImmKindNone() const {
218     assert(isImm());
219     Imm.Kind = ImmKindTyNone;
220   }
221 
222   void setImmKindLiteral() const {
223     assert(isImm());
224     Imm.Kind = ImmKindTyLiteral;
225   }
226 
227   void setImmKindConst() const {
228     assert(isImm());
229     Imm.Kind = ImmKindTyConst;
230   }
231 
232   bool IsImmKindLiteral() const {
233     return isImm() && Imm.Kind == ImmKindTyLiteral;
234   }
235 
236   bool isImmKindConst() const {
237     return isImm() && Imm.Kind == ImmKindTyConst;
238   }
239 
240   bool isInlinableImm(MVT type) const;
241   bool isLiteralImm(MVT type) const;
242 
243   bool isRegKind() const {
244     return Kind == Register;
245   }
246 
247   bool isReg() const override {
248     return isRegKind() && !hasModifiers();
249   }
250 
251   bool isRegOrInline(unsigned RCID, MVT type) const {
252     return isRegClass(RCID) || isInlinableImm(type);
253   }
254 
255   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
256     return isRegOrInline(RCID, type) || isLiteralImm(type);
257   }
258 
259   bool isRegOrImmWithInt16InputMods() const {
260     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
261   }
262 
263   bool isRegOrImmWithInt32InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
265   }
266 
267   bool isRegOrImmWithInt64InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
269   }
270 
271   bool isRegOrImmWithFP16InputMods() const {
272     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
273   }
274 
275   bool isRegOrImmWithFP32InputMods() const {
276     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
277   }
278 
279   bool isRegOrImmWithFP64InputMods() const {
280     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
281   }
282 
283   bool isVReg() const {
284     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
285            isRegClass(AMDGPU::VReg_64RegClassID) ||
286            isRegClass(AMDGPU::VReg_96RegClassID) ||
287            isRegClass(AMDGPU::VReg_128RegClassID) ||
288            isRegClass(AMDGPU::VReg_160RegClassID) ||
289            isRegClass(AMDGPU::VReg_192RegClassID) ||
290            isRegClass(AMDGPU::VReg_256RegClassID) ||
291            isRegClass(AMDGPU::VReg_512RegClassID) ||
292            isRegClass(AMDGPU::VReg_1024RegClassID);
293   }
294 
295   bool isVReg32() const {
296     return isRegClass(AMDGPU::VGPR_32RegClassID);
297   }
298 
299   bool isVReg32OrOff() const {
300     return isOff() || isVReg32();
301   }
302 
303   bool isNull() const {
304     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
305   }
306 
307   bool isVRegWithInputMods() const;
308 
309   bool isSDWAOperand(MVT type) const;
310   bool isSDWAFP16Operand() const;
311   bool isSDWAFP32Operand() const;
312   bool isSDWAInt16Operand() const;
313   bool isSDWAInt32Operand() const;
314 
315   bool isImmTy(ImmTy ImmT) const {
316     return isImm() && Imm.Type == ImmT;
317   }
318 
319   bool isImmModifier() const {
320     return isImm() && Imm.Type != ImmTyNone;
321   }
322 
323   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
324   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
325   bool isDMask() const { return isImmTy(ImmTyDMask); }
326   bool isDim() const { return isImmTy(ImmTyDim); }
327   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
328   bool isDA() const { return isImmTy(ImmTyDA); }
329   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
330   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
331   bool isLWE() const { return isImmTy(ImmTyLWE); }
332   bool isOff() const { return isImmTy(ImmTyOff); }
333   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
334   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
335   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
336   bool isOffen() const { return isImmTy(ImmTyOffen); }
337   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
338   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
339   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
340   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
341   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
342 
343   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
344   bool isGDS() const { return isImmTy(ImmTyGDS); }
345   bool isLDS() const { return isImmTy(ImmTyLDS); }
346   bool isCPol() const { return isImmTy(ImmTyCPol); }
347   bool isSWZ() const { return isImmTy(ImmTySWZ); }
348   bool isTFE() const { return isImmTy(ImmTyTFE); }
349   bool isD16() const { return isImmTy(ImmTyD16); }
350   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
351   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
352   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
353   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
354   bool isFI() const { return isImmTy(ImmTyDppFi); }
355   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
356   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
357   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
358   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
359   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
360   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
361   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
362   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
363   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
364   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
365   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
366   bool isHigh() const { return isImmTy(ImmTyHigh); }
367 
368   bool isMod() const {
369     return isClampSI() || isOModSI();
370   }
371 
372   bool isRegOrImm() const {
373     return isReg() || isImm();
374   }
375 
376   bool isRegClass(unsigned RCID) const;
377 
378   bool isInlineValue() const;
379 
380   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
381     return isRegOrInline(RCID, type) && !hasModifiers();
382   }
383 
384   bool isSCSrcB16() const {
385     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
386   }
387 
388   bool isSCSrcV2B16() const {
389     return isSCSrcB16();
390   }
391 
392   bool isSCSrcB32() const {
393     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
394   }
395 
396   bool isSCSrcB64() const {
397     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
398   }
399 
400   bool isBoolReg() const;
401 
402   bool isSCSrcF16() const {
403     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
404   }
405 
406   bool isSCSrcV2F16() const {
407     return isSCSrcF16();
408   }
409 
410   bool isSCSrcF32() const {
411     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
412   }
413 
414   bool isSCSrcF64() const {
415     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
416   }
417 
418   bool isSSrcB32() const {
419     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
420   }
421 
422   bool isSSrcB16() const {
423     return isSCSrcB16() || isLiteralImm(MVT::i16);
424   }
425 
426   bool isSSrcV2B16() const {
427     llvm_unreachable("cannot happen");
428     return isSSrcB16();
429   }
430 
431   bool isSSrcB64() const {
432     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
433     // See isVSrc64().
434     return isSCSrcB64() || isLiteralImm(MVT::i64);
435   }
436 
437   bool isSSrcF32() const {
438     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
439   }
440 
441   bool isSSrcF64() const {
442     return isSCSrcB64() || isLiteralImm(MVT::f64);
443   }
444 
445   bool isSSrcF16() const {
446     return isSCSrcB16() || isLiteralImm(MVT::f16);
447   }
448 
449   bool isSSrcV2F16() const {
450     llvm_unreachable("cannot happen");
451     return isSSrcF16();
452   }
453 
454   bool isSSrcV2FP32() const {
455     llvm_unreachable("cannot happen");
456     return isSSrcF32();
457   }
458 
459   bool isSCSrcV2FP32() const {
460     llvm_unreachable("cannot happen");
461     return isSCSrcF32();
462   }
463 
464   bool isSSrcV2INT32() const {
465     llvm_unreachable("cannot happen");
466     return isSSrcB32();
467   }
468 
469   bool isSCSrcV2INT32() const {
470     llvm_unreachable("cannot happen");
471     return isSCSrcB32();
472   }
473 
474   bool isSSrcOrLdsB32() const {
475     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
476            isLiteralImm(MVT::i32) || isExpr();
477   }
478 
479   bool isVCSrcB32() const {
480     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
481   }
482 
483   bool isVCSrcB64() const {
484     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
485   }
486 
487   bool isVCSrcB16() const {
488     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
489   }
490 
491   bool isVCSrcV2B16() const {
492     return isVCSrcB16();
493   }
494 
495   bool isVCSrcF32() const {
496     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
497   }
498 
499   bool isVCSrcF64() const {
500     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
501   }
502 
503   bool isVCSrcF16() const {
504     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
505   }
506 
507   bool isVCSrcV2F16() const {
508     return isVCSrcF16();
509   }
510 
511   bool isVSrcB32() const {
512     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
513   }
514 
515   bool isVSrcB64() const {
516     return isVCSrcF64() || isLiteralImm(MVT::i64);
517   }
518 
519   bool isVSrcB16() const {
520     return isVCSrcB16() || isLiteralImm(MVT::i16);
521   }
522 
523   bool isVSrcV2B16() const {
524     return isVSrcB16() || isLiteralImm(MVT::v2i16);
525   }
526 
527   bool isVCSrcV2FP32() const {
528     return isVCSrcF64();
529   }
530 
531   bool isVSrcV2FP32() const {
532     return isVSrcF64() || isLiteralImm(MVT::v2f32);
533   }
534 
535   bool isVCSrcV2INT32() const {
536     return isVCSrcB64();
537   }
538 
539   bool isVSrcV2INT32() const {
540     return isVSrcB64() || isLiteralImm(MVT::v2i32);
541   }
542 
543   bool isVSrcF32() const {
544     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
545   }
546 
547   bool isVSrcF64() const {
548     return isVCSrcF64() || isLiteralImm(MVT::f64);
549   }
550 
551   bool isVSrcF16() const {
552     return isVCSrcF16() || isLiteralImm(MVT::f16);
553   }
554 
555   bool isVSrcV2F16() const {
556     return isVSrcF16() || isLiteralImm(MVT::v2f16);
557   }
558 
559   bool isVISrcB32() const {
560     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
561   }
562 
563   bool isVISrcB16() const {
564     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
565   }
566 
567   bool isVISrcV2B16() const {
568     return isVISrcB16();
569   }
570 
571   bool isVISrcF32() const {
572     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
573   }
574 
575   bool isVISrcF16() const {
576     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
577   }
578 
579   bool isVISrcV2F16() const {
580     return isVISrcF16() || isVISrcB32();
581   }
582 
583   bool isVISrc_64B64() const {
584     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
585   }
586 
587   bool isVISrc_64F64() const {
588     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
589   }
590 
591   bool isVISrc_64V2FP32() const {
592     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
593   }
594 
595   bool isVISrc_64V2INT32() const {
596     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
597   }
598 
599   bool isVISrc_256B64() const {
600     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
601   }
602 
603   bool isVISrc_256F64() const {
604     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
605   }
606 
607   bool isVISrc_128B16() const {
608     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
609   }
610 
611   bool isVISrc_128V2B16() const {
612     return isVISrc_128B16();
613   }
614 
615   bool isVISrc_128B32() const {
616     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
617   }
618 
619   bool isVISrc_128F32() const {
620     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
621   }
622 
623   bool isVISrc_256V2FP32() const {
624     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
625   }
626 
627   bool isVISrc_256V2INT32() const {
628     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
629   }
630 
631   bool isVISrc_512B32() const {
632     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
633   }
634 
635   bool isVISrc_512B16() const {
636     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
637   }
638 
639   bool isVISrc_512V2B16() const {
640     return isVISrc_512B16();
641   }
642 
643   bool isVISrc_512F32() const {
644     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
645   }
646 
647   bool isVISrc_512F16() const {
648     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
649   }
650 
651   bool isVISrc_512V2F16() const {
652     return isVISrc_512F16() || isVISrc_512B32();
653   }
654 
655   bool isVISrc_1024B32() const {
656     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
657   }
658 
659   bool isVISrc_1024B16() const {
660     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
661   }
662 
663   bool isVISrc_1024V2B16() const {
664     return isVISrc_1024B16();
665   }
666 
667   bool isVISrc_1024F32() const {
668     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
669   }
670 
671   bool isVISrc_1024F16() const {
672     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
673   }
674 
675   bool isVISrc_1024V2F16() const {
676     return isVISrc_1024F16() || isVISrc_1024B32();
677   }
678 
679   bool isAISrcB32() const {
680     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
681   }
682 
683   bool isAISrcB16() const {
684     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
685   }
686 
687   bool isAISrcV2B16() const {
688     return isAISrcB16();
689   }
690 
691   bool isAISrcF32() const {
692     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
693   }
694 
695   bool isAISrcF16() const {
696     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
697   }
698 
699   bool isAISrcV2F16() const {
700     return isAISrcF16() || isAISrcB32();
701   }
702 
703   bool isAISrc_64B64() const {
704     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
705   }
706 
707   bool isAISrc_64F64() const {
708     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
709   }
710 
711   bool isAISrc_128B32() const {
712     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
713   }
714 
715   bool isAISrc_128B16() const {
716     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
717   }
718 
719   bool isAISrc_128V2B16() const {
720     return isAISrc_128B16();
721   }
722 
723   bool isAISrc_128F32() const {
724     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
725   }
726 
727   bool isAISrc_128F16() const {
728     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
729   }
730 
731   bool isAISrc_128V2F16() const {
732     return isAISrc_128F16() || isAISrc_128B32();
733   }
734 
735   bool isVISrc_128F16() const {
736     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
737   }
738 
739   bool isVISrc_128V2F16() const {
740     return isVISrc_128F16() || isVISrc_128B32();
741   }
742 
743   bool isAISrc_256B64() const {
744     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
745   }
746 
747   bool isAISrc_256F64() const {
748     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
749   }
750 
751   bool isAISrc_512B32() const {
752     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
753   }
754 
755   bool isAISrc_512B16() const {
756     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
757   }
758 
759   bool isAISrc_512V2B16() const {
760     return isAISrc_512B16();
761   }
762 
763   bool isAISrc_512F32() const {
764     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
765   }
766 
767   bool isAISrc_512F16() const {
768     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
769   }
770 
771   bool isAISrc_512V2F16() const {
772     return isAISrc_512F16() || isAISrc_512B32();
773   }
774 
775   bool isAISrc_1024B32() const {
776     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
777   }
778 
779   bool isAISrc_1024B16() const {
780     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
781   }
782 
783   bool isAISrc_1024V2B16() const {
784     return isAISrc_1024B16();
785   }
786 
787   bool isAISrc_1024F32() const {
788     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
789   }
790 
791   bool isAISrc_1024F16() const {
792     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
793   }
794 
795   bool isAISrc_1024V2F16() const {
796     return isAISrc_1024F16() || isAISrc_1024B32();
797   }
798 
799   bool isKImmFP32() const {
800     return isLiteralImm(MVT::f32);
801   }
802 
803   bool isKImmFP16() const {
804     return isLiteralImm(MVT::f16);
805   }
806 
807   bool isMem() const override {
808     return false;
809   }
810 
811   bool isExpr() const {
812     return Kind == Expression;
813   }
814 
815   bool isSoppBrTarget() const {
816     return isExpr() || isImm();
817   }
818 
819   bool isSWaitCnt() const;
820   bool isHwreg() const;
821   bool isSendMsg() const;
822   bool isSwizzle() const;
823   bool isSMRDOffset8() const;
824   bool isSMEMOffset() const;
825   bool isSMRDLiteralOffset() const;
826   bool isDPP8() const;
827   bool isDPPCtrl() const;
828   bool isBLGP() const;
829   bool isCBSZ() const;
830   bool isABID() const;
831   bool isGPRIdxMode() const;
832   bool isS16Imm() const;
833   bool isU16Imm() const;
834   bool isEndpgm() const;
835 
836   StringRef getExpressionAsToken() const {
837     assert(isExpr());
838     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
839     return S->getSymbol().getName();
840   }
841 
842   StringRef getToken() const {
843     assert(isToken());
844 
845     if (Kind == Expression)
846       return getExpressionAsToken();
847 
848     return StringRef(Tok.Data, Tok.Length);
849   }
850 
851   int64_t getImm() const {
852     assert(isImm());
853     return Imm.Val;
854   }
855 
856   void setImm(int64_t Val) {
857     assert(isImm());
858     Imm.Val = Val;
859   }
860 
861   ImmTy getImmTy() const {
862     assert(isImm());
863     return Imm.Type;
864   }
865 
866   unsigned getReg() const override {
867     assert(isRegKind());
868     return Reg.RegNo;
869   }
870 
871   SMLoc getStartLoc() const override {
872     return StartLoc;
873   }
874 
875   SMLoc getEndLoc() const override {
876     return EndLoc;
877   }
878 
879   SMRange getLocRange() const {
880     return SMRange(StartLoc, EndLoc);
881   }
882 
883   Modifiers getModifiers() const {
884     assert(isRegKind() || isImmTy(ImmTyNone));
885     return isRegKind() ? Reg.Mods : Imm.Mods;
886   }
887 
888   void setModifiers(Modifiers Mods) {
889     assert(isRegKind() || isImmTy(ImmTyNone));
890     if (isRegKind())
891       Reg.Mods = Mods;
892     else
893       Imm.Mods = Mods;
894   }
895 
896   bool hasModifiers() const {
897     return getModifiers().hasModifiers();
898   }
899 
900   bool hasFPModifiers() const {
901     return getModifiers().hasFPModifiers();
902   }
903 
904   bool hasIntModifiers() const {
905     return getModifiers().hasIntModifiers();
906   }
907 
908   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
909 
910   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
911 
912   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
913 
914   template <unsigned Bitwidth>
915   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
916 
917   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
918     addKImmFPOperands<16>(Inst, N);
919   }
920 
921   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
922     addKImmFPOperands<32>(Inst, N);
923   }
924 
925   void addRegOperands(MCInst &Inst, unsigned N) const;
926 
927   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
928     addRegOperands(Inst, N);
929   }
930 
931   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
932     if (isRegKind())
933       addRegOperands(Inst, N);
934     else if (isExpr())
935       Inst.addOperand(MCOperand::createExpr(Expr));
936     else
937       addImmOperands(Inst, N);
938   }
939 
940   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
941     Modifiers Mods = getModifiers();
942     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
943     if (isRegKind()) {
944       addRegOperands(Inst, N);
945     } else {
946       addImmOperands(Inst, N, false);
947     }
948   }
949 
950   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
951     assert(!hasIntModifiers());
952     addRegOrImmWithInputModsOperands(Inst, N);
953   }
954 
955   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
956     assert(!hasFPModifiers());
957     addRegOrImmWithInputModsOperands(Inst, N);
958   }
959 
960   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
961     Modifiers Mods = getModifiers();
962     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
963     assert(isRegKind());
964     addRegOperands(Inst, N);
965   }
966 
967   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
968     assert(!hasIntModifiers());
969     addRegWithInputModsOperands(Inst, N);
970   }
971 
972   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
973     assert(!hasFPModifiers());
974     addRegWithInputModsOperands(Inst, N);
975   }
976 
977   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
978     if (isImm())
979       addImmOperands(Inst, N);
980     else {
981       assert(isExpr());
982       Inst.addOperand(MCOperand::createExpr(Expr));
983     }
984   }
985 
986   static void printImmTy(raw_ostream& OS, ImmTy Type) {
987     switch (Type) {
988     case ImmTyNone: OS << "None"; break;
989     case ImmTyGDS: OS << "GDS"; break;
990     case ImmTyLDS: OS << "LDS"; break;
991     case ImmTyOffen: OS << "Offen"; break;
992     case ImmTyIdxen: OS << "Idxen"; break;
993     case ImmTyAddr64: OS << "Addr64"; break;
994     case ImmTyOffset: OS << "Offset"; break;
995     case ImmTyInstOffset: OS << "InstOffset"; break;
996     case ImmTyOffset0: OS << "Offset0"; break;
997     case ImmTyOffset1: OS << "Offset1"; break;
998     case ImmTyCPol: OS << "CPol"; break;
999     case ImmTySWZ: OS << "SWZ"; break;
1000     case ImmTyTFE: OS << "TFE"; break;
1001     case ImmTyD16: OS << "D16"; break;
1002     case ImmTyFORMAT: OS << "FORMAT"; break;
1003     case ImmTyClampSI: OS << "ClampSI"; break;
1004     case ImmTyOModSI: OS << "OModSI"; break;
1005     case ImmTyDPP8: OS << "DPP8"; break;
1006     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1007     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1008     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1009     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1010     case ImmTyDppFi: OS << "FI"; break;
1011     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1012     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1013     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1014     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1015     case ImmTyDMask: OS << "DMask"; break;
1016     case ImmTyDim: OS << "Dim"; break;
1017     case ImmTyUNorm: OS << "UNorm"; break;
1018     case ImmTyDA: OS << "DA"; break;
1019     case ImmTyR128A16: OS << "R128A16"; break;
1020     case ImmTyA16: OS << "A16"; break;
1021     case ImmTyLWE: OS << "LWE"; break;
1022     case ImmTyOff: OS << "Off"; break;
1023     case ImmTyExpTgt: OS << "ExpTgt"; break;
1024     case ImmTyExpCompr: OS << "ExpCompr"; break;
1025     case ImmTyExpVM: OS << "ExpVM"; break;
1026     case ImmTyHwreg: OS << "Hwreg"; break;
1027     case ImmTySendMsg: OS << "SendMsg"; break;
1028     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1029     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1030     case ImmTyAttrChan: OS << "AttrChan"; break;
1031     case ImmTyOpSel: OS << "OpSel"; break;
1032     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1033     case ImmTyNegLo: OS << "NegLo"; break;
1034     case ImmTyNegHi: OS << "NegHi"; break;
1035     case ImmTySwizzle: OS << "Swizzle"; break;
1036     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1037     case ImmTyHigh: OS << "High"; break;
1038     case ImmTyBLGP: OS << "BLGP"; break;
1039     case ImmTyCBSZ: OS << "CBSZ"; break;
1040     case ImmTyABID: OS << "ABID"; break;
1041     case ImmTyEndpgm: OS << "Endpgm"; break;
1042     }
1043   }
1044 
1045   void print(raw_ostream &OS) const override {
1046     switch (Kind) {
1047     case Register:
1048       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1049       break;
1050     case Immediate:
1051       OS << '<' << getImm();
1052       if (getImmTy() != ImmTyNone) {
1053         OS << " type: "; printImmTy(OS, getImmTy());
1054       }
1055       OS << " mods: " << Imm.Mods << '>';
1056       break;
1057     case Token:
1058       OS << '\'' << getToken() << '\'';
1059       break;
1060     case Expression:
1061       OS << "<expr " << *Expr << '>';
1062       break;
1063     }
1064   }
1065 
1066   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1067                                       int64_t Val, SMLoc Loc,
1068                                       ImmTy Type = ImmTyNone,
1069                                       bool IsFPImm = false) {
1070     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1071     Op->Imm.Val = Val;
1072     Op->Imm.IsFPImm = IsFPImm;
1073     Op->Imm.Kind = ImmKindTyNone;
1074     Op->Imm.Type = Type;
1075     Op->Imm.Mods = Modifiers();
1076     Op->StartLoc = Loc;
1077     Op->EndLoc = Loc;
1078     return Op;
1079   }
1080 
1081   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1082                                         StringRef Str, SMLoc Loc,
1083                                         bool HasExplicitEncodingSize = true) {
1084     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1085     Res->Tok.Data = Str.data();
1086     Res->Tok.Length = Str.size();
1087     Res->StartLoc = Loc;
1088     Res->EndLoc = Loc;
1089     return Res;
1090   }
1091 
1092   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1093                                       unsigned RegNo, SMLoc S,
1094                                       SMLoc E) {
1095     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1096     Op->Reg.RegNo = RegNo;
1097     Op->Reg.Mods = Modifiers();
1098     Op->StartLoc = S;
1099     Op->EndLoc = E;
1100     return Op;
1101   }
1102 
1103   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1104                                        const class MCExpr *Expr, SMLoc S) {
1105     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1106     Op->Expr = Expr;
1107     Op->StartLoc = S;
1108     Op->EndLoc = S;
1109     return Op;
1110   }
1111 };
1112 
1113 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1114   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1115   return OS;
1116 }
1117 
1118 //===----------------------------------------------------------------------===//
1119 // AsmParser
1120 //===----------------------------------------------------------------------===//
1121 
1122 // Holds info related to the current kernel, e.g. count of SGPRs used.
1123 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1124 // .amdgpu_hsa_kernel or at EOF.
1125 class KernelScopeInfo {
1126   int SgprIndexUnusedMin = -1;
1127   int VgprIndexUnusedMin = -1;
1128   MCContext *Ctx = nullptr;
1129 
1130   void usesSgprAt(int i) {
1131     if (i >= SgprIndexUnusedMin) {
1132       SgprIndexUnusedMin = ++i;
1133       if (Ctx) {
1134         MCSymbol* const Sym =
1135           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1136         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1137       }
1138     }
1139   }
1140 
1141   void usesVgprAt(int i) {
1142     if (i >= VgprIndexUnusedMin) {
1143       VgprIndexUnusedMin = ++i;
1144       if (Ctx) {
1145         MCSymbol* const Sym =
1146           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1147         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1148       }
1149     }
1150   }
1151 
1152 public:
1153   KernelScopeInfo() = default;
1154 
1155   void initialize(MCContext &Context) {
1156     Ctx = &Context;
1157     usesSgprAt(SgprIndexUnusedMin = -1);
1158     usesVgprAt(VgprIndexUnusedMin = -1);
1159   }
1160 
1161   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1162     switch (RegKind) {
1163       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1164       case IS_AGPR: // fall through
1165       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1166       default: break;
1167     }
1168   }
1169 };
1170 
1171 class AMDGPUAsmParser : public MCTargetAsmParser {
1172   MCAsmParser &Parser;
1173 
1174   // Number of extra operands parsed after the first optional operand.
1175   // This may be necessary to skip hardcoded mandatory operands.
1176   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1177 
1178   unsigned ForcedEncodingSize = 0;
1179   bool ForcedDPP = false;
1180   bool ForcedSDWA = false;
1181   KernelScopeInfo KernelScope;
1182   unsigned CPolSeen;
1183 
1184   /// @name Auto-generated Match Functions
1185   /// {
1186 
1187 #define GET_ASSEMBLER_HEADER
1188 #include "AMDGPUGenAsmMatcher.inc"
1189 
1190   /// }
1191 
1192 private:
1193   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1194   bool OutOfRangeError(SMRange Range);
1195   /// Calculate VGPR/SGPR blocks required for given target, reserved
1196   /// registers, and user-specified NextFreeXGPR values.
1197   ///
1198   /// \param Features [in] Target features, used for bug corrections.
1199   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1200   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1201   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1202   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1203   /// descriptor field, if valid.
1204   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1205   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1206   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1207   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1208   /// \param VGPRBlocks [out] Result VGPR block count.
1209   /// \param SGPRBlocks [out] Result SGPR block count.
1210   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1211                           bool FlatScrUsed, bool XNACKUsed,
1212                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1213                           SMRange VGPRRange, unsigned NextFreeSGPR,
1214                           SMRange SGPRRange, unsigned &VGPRBlocks,
1215                           unsigned &SGPRBlocks);
1216   bool ParseDirectiveAMDGCNTarget();
1217   bool ParseDirectiveAMDHSAKernel();
1218   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1219   bool ParseDirectiveHSACodeObjectVersion();
1220   bool ParseDirectiveHSACodeObjectISA();
1221   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1222   bool ParseDirectiveAMDKernelCodeT();
1223   // TODO: Possibly make subtargetHasRegister const.
1224   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1225   bool ParseDirectiveAMDGPUHsaKernel();
1226 
1227   bool ParseDirectiveISAVersion();
1228   bool ParseDirectiveHSAMetadata();
1229   bool ParseDirectivePALMetadataBegin();
1230   bool ParseDirectivePALMetadata();
1231   bool ParseDirectiveAMDGPULDS();
1232 
1233   /// Common code to parse out a block of text (typically YAML) between start and
1234   /// end directives.
1235   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1236                            const char *AssemblerDirectiveEnd,
1237                            std::string &CollectString);
1238 
1239   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1240                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1241   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1242                            unsigned &RegNum, unsigned &RegWidth,
1243                            bool RestoreOnFailure = false);
1244   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1245                            unsigned &RegNum, unsigned &RegWidth,
1246                            SmallVectorImpl<AsmToken> &Tokens);
1247   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1248                            unsigned &RegWidth,
1249                            SmallVectorImpl<AsmToken> &Tokens);
1250   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1251                            unsigned &RegWidth,
1252                            SmallVectorImpl<AsmToken> &Tokens);
1253   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1254                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1255   bool ParseRegRange(unsigned& Num, unsigned& Width);
1256   unsigned getRegularReg(RegisterKind RegKind,
1257                          unsigned RegNum,
1258                          unsigned RegWidth,
1259                          SMLoc Loc);
1260 
1261   bool isRegister();
1262   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1263   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1264   void initializeGprCountSymbol(RegisterKind RegKind);
1265   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1266                              unsigned RegWidth);
1267   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1268                     bool IsAtomic, bool IsLds = false);
1269   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1270                  bool IsGdsHardcoded);
1271 
1272 public:
1273   enum AMDGPUMatchResultTy {
1274     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1275   };
1276   enum OperandMode {
1277     OperandMode_Default,
1278     OperandMode_NSA,
1279   };
1280 
1281   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1282 
1283   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1284                const MCInstrInfo &MII,
1285                const MCTargetOptions &Options)
1286       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1287     MCAsmParserExtension::Initialize(Parser);
1288 
1289     if (getFeatureBits().none()) {
1290       // Set default features.
1291       copySTI().ToggleFeature("southern-islands");
1292     }
1293 
1294     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1295 
1296     {
1297       // TODO: make those pre-defined variables read-only.
1298       // Currently there is none suitable machinery in the core llvm-mc for this.
1299       // MCSymbol::isRedefinable is intended for another purpose, and
1300       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1301       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1302       MCContext &Ctx = getContext();
1303       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1304         MCSymbol *Sym =
1305             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1306         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1308         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1310         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1311       } else {
1312         MCSymbol *Sym =
1313             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1314         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1315         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1316         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1317         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1318         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1319       }
1320       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1321         initializeGprCountSymbol(IS_VGPR);
1322         initializeGprCountSymbol(IS_SGPR);
1323       } else
1324         KernelScope.initialize(getContext());
1325     }
1326   }
1327 
1328   bool hasMIMG_R128() const {
1329     return AMDGPU::hasMIMG_R128(getSTI());
1330   }
1331 
1332   bool hasPackedD16() const {
1333     return AMDGPU::hasPackedD16(getSTI());
1334   }
1335 
1336   bool hasGFX10A16() const {
1337     return AMDGPU::hasGFX10A16(getSTI());
1338   }
1339 
1340   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1341 
1342   bool isSI() const {
1343     return AMDGPU::isSI(getSTI());
1344   }
1345 
1346   bool isCI() const {
1347     return AMDGPU::isCI(getSTI());
1348   }
1349 
1350   bool isVI() const {
1351     return AMDGPU::isVI(getSTI());
1352   }
1353 
1354   bool isGFX9() const {
1355     return AMDGPU::isGFX9(getSTI());
1356   }
1357 
1358   bool isGFX90A() const {
1359     return AMDGPU::isGFX90A(getSTI());
1360   }
1361 
1362   bool isGFX9Plus() const {
1363     return AMDGPU::isGFX9Plus(getSTI());
1364   }
1365 
1366   bool isGFX10() const {
1367     return AMDGPU::isGFX10(getSTI());
1368   }
1369 
1370   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1371 
1372   bool isGFX10_BEncoding() const {
1373     return AMDGPU::isGFX10_BEncoding(getSTI());
1374   }
1375 
1376   bool hasInv2PiInlineImm() const {
1377     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1378   }
1379 
1380   bool hasFlatOffsets() const {
1381     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1382   }
1383 
1384   bool hasArchitectedFlatScratch() const {
1385     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1386   }
1387 
1388   bool hasSGPR102_SGPR103() const {
1389     return !isVI() && !isGFX9();
1390   }
1391 
1392   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1393 
1394   bool hasIntClamp() const {
1395     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1396   }
1397 
1398   AMDGPUTargetStreamer &getTargetStreamer() {
1399     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1400     return static_cast<AMDGPUTargetStreamer &>(TS);
1401   }
1402 
1403   const MCRegisterInfo *getMRI() const {
1404     // We need this const_cast because for some reason getContext() is not const
1405     // in MCAsmParser.
1406     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1407   }
1408 
1409   const MCInstrInfo *getMII() const {
1410     return &MII;
1411   }
1412 
1413   const FeatureBitset &getFeatureBits() const {
1414     return getSTI().getFeatureBits();
1415   }
1416 
1417   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1418   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1419   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1420 
1421   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1422   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1423   bool isForcedDPP() const { return ForcedDPP; }
1424   bool isForcedSDWA() const { return ForcedSDWA; }
1425   ArrayRef<unsigned> getMatchedVariants() const;
1426   StringRef getMatchedVariantName() const;
1427 
1428   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1429   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1430                      bool RestoreOnFailure);
1431   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1432   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1433                                         SMLoc &EndLoc) override;
1434   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1435   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1436                                       unsigned Kind) override;
1437   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1438                                OperandVector &Operands, MCStreamer &Out,
1439                                uint64_t &ErrorInfo,
1440                                bool MatchingInlineAsm) override;
1441   bool ParseDirective(AsmToken DirectiveID) override;
1442   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1443                                     OperandMode Mode = OperandMode_Default);
1444   StringRef parseMnemonicSuffix(StringRef Name);
1445   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1446                         SMLoc NameLoc, OperandVector &Operands) override;
1447   //bool ProcessInstruction(MCInst &Inst);
1448 
1449   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1450 
1451   OperandMatchResultTy
1452   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1453                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1454                      bool (*ConvertResult)(int64_t &) = nullptr);
1455 
1456   OperandMatchResultTy
1457   parseOperandArrayWithPrefix(const char *Prefix,
1458                               OperandVector &Operands,
1459                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1460                               bool (*ConvertResult)(int64_t&) = nullptr);
1461 
1462   OperandMatchResultTy
1463   parseNamedBit(StringRef Name, OperandVector &Operands,
1464                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1465   OperandMatchResultTy parseCPol(OperandVector &Operands);
1466   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1467                                              StringRef &Value,
1468                                              SMLoc &StringLoc);
1469 
1470   bool isModifier();
1471   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1472   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1473   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1474   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1475   bool parseSP3NegModifier();
1476   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1477   OperandMatchResultTy parseReg(OperandVector &Operands);
1478   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1479   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1480   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1481   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1482   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1483   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1484   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1485   OperandMatchResultTy parseUfmt(int64_t &Format);
1486   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1487   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1488   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1489   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1490   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1491   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1492   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1493 
1494   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1495   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1496   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1497   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1498 
1499   bool parseCnt(int64_t &IntVal);
1500   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1501   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1502 
1503 private:
1504   struct OperandInfoTy {
1505     SMLoc Loc;
1506     int64_t Id;
1507     bool IsSymbolic = false;
1508     bool IsDefined = false;
1509 
1510     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1511   };
1512 
1513   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1514   bool validateSendMsg(const OperandInfoTy &Msg,
1515                        const OperandInfoTy &Op,
1516                        const OperandInfoTy &Stream);
1517 
1518   bool parseHwregBody(OperandInfoTy &HwReg,
1519                       OperandInfoTy &Offset,
1520                       OperandInfoTy &Width);
1521   bool validateHwreg(const OperandInfoTy &HwReg,
1522                      const OperandInfoTy &Offset,
1523                      const OperandInfoTy &Width);
1524 
1525   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1526   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1527 
1528   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1529                       const OperandVector &Operands) const;
1530   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1531   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1532   SMLoc getLitLoc(const OperandVector &Operands) const;
1533   SMLoc getConstLoc(const OperandVector &Operands) const;
1534 
1535   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1536   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1537   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1538   bool validateSOPLiteral(const MCInst &Inst) const;
1539   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1540   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1541   bool validateIntClampSupported(const MCInst &Inst);
1542   bool validateMIMGAtomicDMask(const MCInst &Inst);
1543   bool validateMIMGGatherDMask(const MCInst &Inst);
1544   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1545   bool validateMIMGDataSize(const MCInst &Inst);
1546   bool validateMIMGAddrSize(const MCInst &Inst);
1547   bool validateMIMGD16(const MCInst &Inst);
1548   bool validateMIMGDim(const MCInst &Inst);
1549   bool validateMIMGMSAA(const MCInst &Inst);
1550   bool validateOpSel(const MCInst &Inst);
1551   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1552   bool validateVccOperand(unsigned Reg) const;
1553   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1554   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1555   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1556   bool validateAGPRLdSt(const MCInst &Inst) const;
1557   bool validateVGPRAlign(const MCInst &Inst) const;
1558   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1559   bool validateDivScale(const MCInst &Inst);
1560   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1561                              const SMLoc &IDLoc);
1562   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1563   unsigned getConstantBusLimit(unsigned Opcode) const;
1564   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1565   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1566   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1567 
1568   bool isSupportedMnemo(StringRef Mnemo,
1569                         const FeatureBitset &FBS);
1570   bool isSupportedMnemo(StringRef Mnemo,
1571                         const FeatureBitset &FBS,
1572                         ArrayRef<unsigned> Variants);
1573   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1574 
1575   bool isId(const StringRef Id) const;
1576   bool isId(const AsmToken &Token, const StringRef Id) const;
1577   bool isToken(const AsmToken::TokenKind Kind) const;
1578   bool trySkipId(const StringRef Id);
1579   bool trySkipId(const StringRef Pref, const StringRef Id);
1580   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1581   bool trySkipToken(const AsmToken::TokenKind Kind);
1582   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1583   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1584   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1585 
1586   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1587   AsmToken::TokenKind getTokenKind() const;
1588   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1589   bool parseExpr(OperandVector &Operands);
1590   StringRef getTokenStr() const;
1591   AsmToken peekToken();
1592   AsmToken getToken() const;
1593   SMLoc getLoc() const;
1594   void lex();
1595 
1596 public:
1597   void onBeginOfFile() override;
1598 
1599   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1600   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1601 
1602   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1603   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1604   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1605   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1606   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1607   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1608 
1609   bool parseSwizzleOperand(int64_t &Op,
1610                            const unsigned MinVal,
1611                            const unsigned MaxVal,
1612                            const StringRef ErrMsg,
1613                            SMLoc &Loc);
1614   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1615                             const unsigned MinVal,
1616                             const unsigned MaxVal,
1617                             const StringRef ErrMsg);
1618   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1619   bool parseSwizzleOffset(int64_t &Imm);
1620   bool parseSwizzleMacro(int64_t &Imm);
1621   bool parseSwizzleQuadPerm(int64_t &Imm);
1622   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1623   bool parseSwizzleBroadcast(int64_t &Imm);
1624   bool parseSwizzleSwap(int64_t &Imm);
1625   bool parseSwizzleReverse(int64_t &Imm);
1626 
1627   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1628   int64_t parseGPRIdxMacro();
1629 
1630   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1631   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1632   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1633   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1634 
1635   AMDGPUOperand::Ptr defaultCPol() const;
1636 
1637   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1638   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1639   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1640   AMDGPUOperand::Ptr defaultFlatOffset() const;
1641 
1642   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1643 
1644   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1645                OptionalImmIndexMap &OptionalIdx);
1646   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1647   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1648   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1649   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1650                 OptionalImmIndexMap &OptionalIdx);
1651 
1652   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1653 
1654   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1655                bool IsAtomic = false);
1656   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1657   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1658 
1659   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1660 
1661   bool parseDimId(unsigned &Encoding);
1662   OperandMatchResultTy parseDim(OperandVector &Operands);
1663   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1664   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1665   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1666   int64_t parseDPPCtrlSel(StringRef Ctrl);
1667   int64_t parseDPPCtrlPerm();
1668   AMDGPUOperand::Ptr defaultRowMask() const;
1669   AMDGPUOperand::Ptr defaultBankMask() const;
1670   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1671   AMDGPUOperand::Ptr defaultFI() const;
1672   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1673   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1674 
1675   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1676                                     AMDGPUOperand::ImmTy Type);
1677   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1678   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1679   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1680   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1681   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1682   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1683   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1684                uint64_t BasicInstType,
1685                bool SkipDstVcc = false,
1686                bool SkipSrcVcc = false);
1687 
1688   AMDGPUOperand::Ptr defaultBLGP() const;
1689   AMDGPUOperand::Ptr defaultCBSZ() const;
1690   AMDGPUOperand::Ptr defaultABID() const;
1691 
1692   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1693   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1694 };
1695 
1696 struct OptionalOperand {
1697   const char *Name;
1698   AMDGPUOperand::ImmTy Type;
1699   bool IsBit;
1700   bool (*ConvertResult)(int64_t&);
1701 };
1702 
1703 } // end anonymous namespace
1704 
1705 // May be called with integer type with equivalent bitwidth.
1706 static const fltSemantics *getFltSemantics(unsigned Size) {
1707   switch (Size) {
1708   case 4:
1709     return &APFloat::IEEEsingle();
1710   case 8:
1711     return &APFloat::IEEEdouble();
1712   case 2:
1713     return &APFloat::IEEEhalf();
1714   default:
1715     llvm_unreachable("unsupported fp type");
1716   }
1717 }
1718 
1719 static const fltSemantics *getFltSemantics(MVT VT) {
1720   return getFltSemantics(VT.getSizeInBits() / 8);
1721 }
1722 
1723 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1724   switch (OperandType) {
1725   case AMDGPU::OPERAND_REG_IMM_INT32:
1726   case AMDGPU::OPERAND_REG_IMM_FP32:
1727   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1728   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1729   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1730   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1731   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1732   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1733   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1734   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1735   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1736   case AMDGPU::OPERAND_KIMM32:
1737     return &APFloat::IEEEsingle();
1738   case AMDGPU::OPERAND_REG_IMM_INT64:
1739   case AMDGPU::OPERAND_REG_IMM_FP64:
1740   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1741   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1742   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1743     return &APFloat::IEEEdouble();
1744   case AMDGPU::OPERAND_REG_IMM_INT16:
1745   case AMDGPU::OPERAND_REG_IMM_FP16:
1746   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1747   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1748   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1749   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1750   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1751   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1752   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1753   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1754   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1755   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1756   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1757   case AMDGPU::OPERAND_KIMM16:
1758     return &APFloat::IEEEhalf();
1759   default:
1760     llvm_unreachable("unsupported fp type");
1761   }
1762 }
1763 
1764 //===----------------------------------------------------------------------===//
1765 // Operand
1766 //===----------------------------------------------------------------------===//
1767 
1768 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1769   bool Lost;
1770 
1771   // Convert literal to single precision
1772   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1773                                                APFloat::rmNearestTiesToEven,
1774                                                &Lost);
1775   // We allow precision lost but not overflow or underflow
1776   if (Status != APFloat::opOK &&
1777       Lost &&
1778       ((Status & APFloat::opOverflow)  != 0 ||
1779        (Status & APFloat::opUnderflow) != 0)) {
1780     return false;
1781   }
1782 
1783   return true;
1784 }
1785 
1786 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1787   return isUIntN(Size, Val) || isIntN(Size, Val);
1788 }
1789 
1790 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1791   if (VT.getScalarType() == MVT::i16) {
1792     // FP immediate values are broken.
1793     return isInlinableIntLiteral(Val);
1794   }
1795 
1796   // f16/v2f16 operands work correctly for all values.
1797   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1798 }
1799 
1800 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1801 
1802   // This is a hack to enable named inline values like
1803   // shared_base with both 32-bit and 64-bit operands.
1804   // Note that these values are defined as
1805   // 32-bit operands only.
1806   if (isInlineValue()) {
1807     return true;
1808   }
1809 
1810   if (!isImmTy(ImmTyNone)) {
1811     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1812     return false;
1813   }
1814   // TODO: We should avoid using host float here. It would be better to
1815   // check the float bit values which is what a few other places do.
1816   // We've had bot failures before due to weird NaN support on mips hosts.
1817 
1818   APInt Literal(64, Imm.Val);
1819 
1820   if (Imm.IsFPImm) { // We got fp literal token
1821     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1822       return AMDGPU::isInlinableLiteral64(Imm.Val,
1823                                           AsmParser->hasInv2PiInlineImm());
1824     }
1825 
1826     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1827     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1828       return false;
1829 
1830     if (type.getScalarSizeInBits() == 16) {
1831       return isInlineableLiteralOp16(
1832         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1833         type, AsmParser->hasInv2PiInlineImm());
1834     }
1835 
1836     // Check if single precision literal is inlinable
1837     return AMDGPU::isInlinableLiteral32(
1838       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1839       AsmParser->hasInv2PiInlineImm());
1840   }
1841 
1842   // We got int literal token.
1843   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1844     return AMDGPU::isInlinableLiteral64(Imm.Val,
1845                                         AsmParser->hasInv2PiInlineImm());
1846   }
1847 
1848   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1849     return false;
1850   }
1851 
1852   if (type.getScalarSizeInBits() == 16) {
1853     return isInlineableLiteralOp16(
1854       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1855       type, AsmParser->hasInv2PiInlineImm());
1856   }
1857 
1858   return AMDGPU::isInlinableLiteral32(
1859     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1860     AsmParser->hasInv2PiInlineImm());
1861 }
1862 
1863 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1864   // Check that this immediate can be added as literal
1865   if (!isImmTy(ImmTyNone)) {
1866     return false;
1867   }
1868 
1869   if (!Imm.IsFPImm) {
1870     // We got int literal token.
1871 
1872     if (type == MVT::f64 && hasFPModifiers()) {
1873       // Cannot apply fp modifiers to int literals preserving the same semantics
1874       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1875       // disable these cases.
1876       return false;
1877     }
1878 
1879     unsigned Size = type.getSizeInBits();
1880     if (Size == 64)
1881       Size = 32;
1882 
1883     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1884     // types.
1885     return isSafeTruncation(Imm.Val, Size);
1886   }
1887 
1888   // We got fp literal token
1889   if (type == MVT::f64) { // Expected 64-bit fp operand
1890     // We would set low 64-bits of literal to zeroes but we accept this literals
1891     return true;
1892   }
1893 
1894   if (type == MVT::i64) { // Expected 64-bit int operand
1895     // We don't allow fp literals in 64-bit integer instructions. It is
1896     // unclear how we should encode them.
1897     return false;
1898   }
1899 
1900   // We allow fp literals with f16x2 operands assuming that the specified
1901   // literal goes into the lower half and the upper half is zero. We also
1902   // require that the literal may be losslesly converted to f16.
1903   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1904                      (type == MVT::v2i16)? MVT::i16 :
1905                      (type == MVT::v2f32)? MVT::f32 : type;
1906 
1907   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1908   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1909 }
1910 
1911 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1912   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1913 }
1914 
1915 bool AMDGPUOperand::isVRegWithInputMods() const {
1916   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1917          // GFX90A allows DPP on 64-bit operands.
1918          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1919           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1920 }
1921 
1922 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1923   if (AsmParser->isVI())
1924     return isVReg32();
1925   else if (AsmParser->isGFX9Plus())
1926     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1927   else
1928     return false;
1929 }
1930 
1931 bool AMDGPUOperand::isSDWAFP16Operand() const {
1932   return isSDWAOperand(MVT::f16);
1933 }
1934 
1935 bool AMDGPUOperand::isSDWAFP32Operand() const {
1936   return isSDWAOperand(MVT::f32);
1937 }
1938 
1939 bool AMDGPUOperand::isSDWAInt16Operand() const {
1940   return isSDWAOperand(MVT::i16);
1941 }
1942 
1943 bool AMDGPUOperand::isSDWAInt32Operand() const {
1944   return isSDWAOperand(MVT::i32);
1945 }
1946 
1947 bool AMDGPUOperand::isBoolReg() const {
1948   auto FB = AsmParser->getFeatureBits();
1949   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1950                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1951 }
1952 
1953 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1954 {
1955   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1956   assert(Size == 2 || Size == 4 || Size == 8);
1957 
1958   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1959 
1960   if (Imm.Mods.Abs) {
1961     Val &= ~FpSignMask;
1962   }
1963   if (Imm.Mods.Neg) {
1964     Val ^= FpSignMask;
1965   }
1966 
1967   return Val;
1968 }
1969 
1970 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1971   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1972                              Inst.getNumOperands())) {
1973     addLiteralImmOperand(Inst, Imm.Val,
1974                          ApplyModifiers &
1975                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1976   } else {
1977     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1978     Inst.addOperand(MCOperand::createImm(Imm.Val));
1979     setImmKindNone();
1980   }
1981 }
1982 
1983 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1984   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1985   auto OpNum = Inst.getNumOperands();
1986   // Check that this operand accepts literals
1987   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1988 
1989   if (ApplyModifiers) {
1990     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1991     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1992     Val = applyInputFPModifiers(Val, Size);
1993   }
1994 
1995   APInt Literal(64, Val);
1996   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1997 
1998   if (Imm.IsFPImm) { // We got fp literal token
1999     switch (OpTy) {
2000     case AMDGPU::OPERAND_REG_IMM_INT64:
2001     case AMDGPU::OPERAND_REG_IMM_FP64:
2002     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2003     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2004     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2005       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2006                                        AsmParser->hasInv2PiInlineImm())) {
2007         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2008         setImmKindConst();
2009         return;
2010       }
2011 
2012       // Non-inlineable
2013       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2014         // For fp operands we check if low 32 bits are zeros
2015         if (Literal.getLoBits(32) != 0) {
2016           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2017           "Can't encode literal as exact 64-bit floating-point operand. "
2018           "Low 32-bits will be set to zero");
2019         }
2020 
2021         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2022         setImmKindLiteral();
2023         return;
2024       }
2025 
2026       // We don't allow fp literals in 64-bit integer instructions. It is
2027       // unclear how we should encode them. This case should be checked earlier
2028       // in predicate methods (isLiteralImm())
2029       llvm_unreachable("fp literal in 64-bit integer instruction.");
2030 
2031     case AMDGPU::OPERAND_REG_IMM_INT32:
2032     case AMDGPU::OPERAND_REG_IMM_FP32:
2033     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2034     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2035     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2036     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2037     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2038     case AMDGPU::OPERAND_REG_IMM_INT16:
2039     case AMDGPU::OPERAND_REG_IMM_FP16:
2040     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2041     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2042     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2043     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2044     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2045     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2046     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2047     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2048     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2049     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2050     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2051     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2052     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2053     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2054     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2055     case AMDGPU::OPERAND_KIMM32:
2056     case AMDGPU::OPERAND_KIMM16: {
2057       bool lost;
2058       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2059       // Convert literal to single precision
2060       FPLiteral.convert(*getOpFltSemantics(OpTy),
2061                         APFloat::rmNearestTiesToEven, &lost);
2062       // We allow precision lost but not overflow or underflow. This should be
2063       // checked earlier in isLiteralImm()
2064 
2065       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2066       Inst.addOperand(MCOperand::createImm(ImmVal));
2067       setImmKindLiteral();
2068       return;
2069     }
2070     default:
2071       llvm_unreachable("invalid operand size");
2072     }
2073 
2074     return;
2075   }
2076 
2077   // We got int literal token.
2078   // Only sign extend inline immediates.
2079   switch (OpTy) {
2080   case AMDGPU::OPERAND_REG_IMM_INT32:
2081   case AMDGPU::OPERAND_REG_IMM_FP32:
2082   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2083   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2084   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2085   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2086   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2087   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2088   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2089   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2090   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2091   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2092   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2093     if (isSafeTruncation(Val, 32) &&
2094         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2095                                      AsmParser->hasInv2PiInlineImm())) {
2096       Inst.addOperand(MCOperand::createImm(Val));
2097       setImmKindConst();
2098       return;
2099     }
2100 
2101     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2102     setImmKindLiteral();
2103     return;
2104 
2105   case AMDGPU::OPERAND_REG_IMM_INT64:
2106   case AMDGPU::OPERAND_REG_IMM_FP64:
2107   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2108   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2109   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2110     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2111       Inst.addOperand(MCOperand::createImm(Val));
2112       setImmKindConst();
2113       return;
2114     }
2115 
2116     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2117     setImmKindLiteral();
2118     return;
2119 
2120   case AMDGPU::OPERAND_REG_IMM_INT16:
2121   case AMDGPU::OPERAND_REG_IMM_FP16:
2122   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2123   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2124   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2125   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2126   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2127     if (isSafeTruncation(Val, 16) &&
2128         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2129                                      AsmParser->hasInv2PiInlineImm())) {
2130       Inst.addOperand(MCOperand::createImm(Val));
2131       setImmKindConst();
2132       return;
2133     }
2134 
2135     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2136     setImmKindLiteral();
2137     return;
2138 
2139   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2140   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2141   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2142   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2143     assert(isSafeTruncation(Val, 16));
2144     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2145                                         AsmParser->hasInv2PiInlineImm()));
2146 
2147     Inst.addOperand(MCOperand::createImm(Val));
2148     return;
2149   }
2150   case AMDGPU::OPERAND_KIMM32:
2151     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2152     setImmKindNone();
2153     return;
2154   case AMDGPU::OPERAND_KIMM16:
2155     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2156     setImmKindNone();
2157     return;
2158   default:
2159     llvm_unreachable("invalid operand size");
2160   }
2161 }
2162 
2163 template <unsigned Bitwidth>
2164 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2165   APInt Literal(64, Imm.Val);
2166   setImmKindNone();
2167 
2168   if (!Imm.IsFPImm) {
2169     // We got int literal token.
2170     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2171     return;
2172   }
2173 
2174   bool Lost;
2175   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2176   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2177                     APFloat::rmNearestTiesToEven, &Lost);
2178   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2179 }
2180 
2181 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2182   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2183 }
2184 
2185 static bool isInlineValue(unsigned Reg) {
2186   switch (Reg) {
2187   case AMDGPU::SRC_SHARED_BASE:
2188   case AMDGPU::SRC_SHARED_LIMIT:
2189   case AMDGPU::SRC_PRIVATE_BASE:
2190   case AMDGPU::SRC_PRIVATE_LIMIT:
2191   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2192     return true;
2193   case AMDGPU::SRC_VCCZ:
2194   case AMDGPU::SRC_EXECZ:
2195   case AMDGPU::SRC_SCC:
2196     return true;
2197   case AMDGPU::SGPR_NULL:
2198     return true;
2199   default:
2200     return false;
2201   }
2202 }
2203 
2204 bool AMDGPUOperand::isInlineValue() const {
2205   return isRegKind() && ::isInlineValue(getReg());
2206 }
2207 
2208 //===----------------------------------------------------------------------===//
2209 // AsmParser
2210 //===----------------------------------------------------------------------===//
2211 
2212 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2213   if (Is == IS_VGPR) {
2214     switch (RegWidth) {
2215       default: return -1;
2216       case 1: return AMDGPU::VGPR_32RegClassID;
2217       case 2: return AMDGPU::VReg_64RegClassID;
2218       case 3: return AMDGPU::VReg_96RegClassID;
2219       case 4: return AMDGPU::VReg_128RegClassID;
2220       case 5: return AMDGPU::VReg_160RegClassID;
2221       case 6: return AMDGPU::VReg_192RegClassID;
2222       case 7: return AMDGPU::VReg_224RegClassID;
2223       case 8: return AMDGPU::VReg_256RegClassID;
2224       case 16: return AMDGPU::VReg_512RegClassID;
2225       case 32: return AMDGPU::VReg_1024RegClassID;
2226     }
2227   } else if (Is == IS_TTMP) {
2228     switch (RegWidth) {
2229       default: return -1;
2230       case 1: return AMDGPU::TTMP_32RegClassID;
2231       case 2: return AMDGPU::TTMP_64RegClassID;
2232       case 4: return AMDGPU::TTMP_128RegClassID;
2233       case 8: return AMDGPU::TTMP_256RegClassID;
2234       case 16: return AMDGPU::TTMP_512RegClassID;
2235     }
2236   } else if (Is == IS_SGPR) {
2237     switch (RegWidth) {
2238       default: return -1;
2239       case 1: return AMDGPU::SGPR_32RegClassID;
2240       case 2: return AMDGPU::SGPR_64RegClassID;
2241       case 3: return AMDGPU::SGPR_96RegClassID;
2242       case 4: return AMDGPU::SGPR_128RegClassID;
2243       case 5: return AMDGPU::SGPR_160RegClassID;
2244       case 6: return AMDGPU::SGPR_192RegClassID;
2245       case 7: return AMDGPU::SGPR_224RegClassID;
2246       case 8: return AMDGPU::SGPR_256RegClassID;
2247       case 16: return AMDGPU::SGPR_512RegClassID;
2248     }
2249   } else if (Is == IS_AGPR) {
2250     switch (RegWidth) {
2251       default: return -1;
2252       case 1: return AMDGPU::AGPR_32RegClassID;
2253       case 2: return AMDGPU::AReg_64RegClassID;
2254       case 3: return AMDGPU::AReg_96RegClassID;
2255       case 4: return AMDGPU::AReg_128RegClassID;
2256       case 5: return AMDGPU::AReg_160RegClassID;
2257       case 6: return AMDGPU::AReg_192RegClassID;
2258       case 7: return AMDGPU::AReg_224RegClassID;
2259       case 8: return AMDGPU::AReg_256RegClassID;
2260       case 16: return AMDGPU::AReg_512RegClassID;
2261       case 32: return AMDGPU::AReg_1024RegClassID;
2262     }
2263   }
2264   return -1;
2265 }
2266 
2267 static unsigned getSpecialRegForName(StringRef RegName) {
2268   return StringSwitch<unsigned>(RegName)
2269     .Case("exec", AMDGPU::EXEC)
2270     .Case("vcc", AMDGPU::VCC)
2271     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2272     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2273     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2274     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2275     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2276     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2277     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2278     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2279     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2280     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2281     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2282     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2283     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2284     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2285     .Case("m0", AMDGPU::M0)
2286     .Case("vccz", AMDGPU::SRC_VCCZ)
2287     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2288     .Case("execz", AMDGPU::SRC_EXECZ)
2289     .Case("src_execz", AMDGPU::SRC_EXECZ)
2290     .Case("scc", AMDGPU::SRC_SCC)
2291     .Case("src_scc", AMDGPU::SRC_SCC)
2292     .Case("tba", AMDGPU::TBA)
2293     .Case("tma", AMDGPU::TMA)
2294     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2295     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2296     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2297     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2298     .Case("vcc_lo", AMDGPU::VCC_LO)
2299     .Case("vcc_hi", AMDGPU::VCC_HI)
2300     .Case("exec_lo", AMDGPU::EXEC_LO)
2301     .Case("exec_hi", AMDGPU::EXEC_HI)
2302     .Case("tma_lo", AMDGPU::TMA_LO)
2303     .Case("tma_hi", AMDGPU::TMA_HI)
2304     .Case("tba_lo", AMDGPU::TBA_LO)
2305     .Case("tba_hi", AMDGPU::TBA_HI)
2306     .Case("pc", AMDGPU::PC_REG)
2307     .Case("null", AMDGPU::SGPR_NULL)
2308     .Default(AMDGPU::NoRegister);
2309 }
2310 
2311 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2312                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2313   auto R = parseRegister();
2314   if (!R) return true;
2315   assert(R->isReg());
2316   RegNo = R->getReg();
2317   StartLoc = R->getStartLoc();
2318   EndLoc = R->getEndLoc();
2319   return false;
2320 }
2321 
2322 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2323                                     SMLoc &EndLoc) {
2324   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2325 }
2326 
2327 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2328                                                        SMLoc &StartLoc,
2329                                                        SMLoc &EndLoc) {
2330   bool Result =
2331       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2332   bool PendingErrors = getParser().hasPendingError();
2333   getParser().clearPendingErrors();
2334   if (PendingErrors)
2335     return MatchOperand_ParseFail;
2336   if (Result)
2337     return MatchOperand_NoMatch;
2338   return MatchOperand_Success;
2339 }
2340 
2341 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2342                                             RegisterKind RegKind, unsigned Reg1,
2343                                             SMLoc Loc) {
2344   switch (RegKind) {
2345   case IS_SPECIAL:
2346     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2347       Reg = AMDGPU::EXEC;
2348       RegWidth = 2;
2349       return true;
2350     }
2351     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2352       Reg = AMDGPU::FLAT_SCR;
2353       RegWidth = 2;
2354       return true;
2355     }
2356     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2357       Reg = AMDGPU::XNACK_MASK;
2358       RegWidth = 2;
2359       return true;
2360     }
2361     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2362       Reg = AMDGPU::VCC;
2363       RegWidth = 2;
2364       return true;
2365     }
2366     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2367       Reg = AMDGPU::TBA;
2368       RegWidth = 2;
2369       return true;
2370     }
2371     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2372       Reg = AMDGPU::TMA;
2373       RegWidth = 2;
2374       return true;
2375     }
2376     Error(Loc, "register does not fit in the list");
2377     return false;
2378   case IS_VGPR:
2379   case IS_SGPR:
2380   case IS_AGPR:
2381   case IS_TTMP:
2382     if (Reg1 != Reg + RegWidth) {
2383       Error(Loc, "registers in a list must have consecutive indices");
2384       return false;
2385     }
2386     RegWidth++;
2387     return true;
2388   default:
2389     llvm_unreachable("unexpected register kind");
2390   }
2391 }
2392 
2393 struct RegInfo {
2394   StringLiteral Name;
2395   RegisterKind Kind;
2396 };
2397 
2398 static constexpr RegInfo RegularRegisters[] = {
2399   {{"v"},    IS_VGPR},
2400   {{"s"},    IS_SGPR},
2401   {{"ttmp"}, IS_TTMP},
2402   {{"acc"},  IS_AGPR},
2403   {{"a"},    IS_AGPR},
2404 };
2405 
2406 static bool isRegularReg(RegisterKind Kind) {
2407   return Kind == IS_VGPR ||
2408          Kind == IS_SGPR ||
2409          Kind == IS_TTMP ||
2410          Kind == IS_AGPR;
2411 }
2412 
2413 static const RegInfo* getRegularRegInfo(StringRef Str) {
2414   for (const RegInfo &Reg : RegularRegisters)
2415     if (Str.startswith(Reg.Name))
2416       return &Reg;
2417   return nullptr;
2418 }
2419 
2420 static bool getRegNum(StringRef Str, unsigned& Num) {
2421   return !Str.getAsInteger(10, Num);
2422 }
2423 
2424 bool
2425 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2426                             const AsmToken &NextToken) const {
2427 
2428   // A list of consecutive registers: [s0,s1,s2,s3]
2429   if (Token.is(AsmToken::LBrac))
2430     return true;
2431 
2432   if (!Token.is(AsmToken::Identifier))
2433     return false;
2434 
2435   // A single register like s0 or a range of registers like s[0:1]
2436 
2437   StringRef Str = Token.getString();
2438   const RegInfo *Reg = getRegularRegInfo(Str);
2439   if (Reg) {
2440     StringRef RegName = Reg->Name;
2441     StringRef RegSuffix = Str.substr(RegName.size());
2442     if (!RegSuffix.empty()) {
2443       unsigned Num;
2444       // A single register with an index: rXX
2445       if (getRegNum(RegSuffix, Num))
2446         return true;
2447     } else {
2448       // A range of registers: r[XX:YY].
2449       if (NextToken.is(AsmToken::LBrac))
2450         return true;
2451     }
2452   }
2453 
2454   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2455 }
2456 
2457 bool
2458 AMDGPUAsmParser::isRegister()
2459 {
2460   return isRegister(getToken(), peekToken());
2461 }
2462 
2463 unsigned
2464 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2465                                unsigned RegNum,
2466                                unsigned RegWidth,
2467                                SMLoc Loc) {
2468 
2469   assert(isRegularReg(RegKind));
2470 
2471   unsigned AlignSize = 1;
2472   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2473     // SGPR and TTMP registers must be aligned.
2474     // Max required alignment is 4 dwords.
2475     AlignSize = std::min(RegWidth, 4u);
2476   }
2477 
2478   if (RegNum % AlignSize != 0) {
2479     Error(Loc, "invalid register alignment");
2480     return AMDGPU::NoRegister;
2481   }
2482 
2483   unsigned RegIdx = RegNum / AlignSize;
2484   int RCID = getRegClass(RegKind, RegWidth);
2485   if (RCID == -1) {
2486     Error(Loc, "invalid or unsupported register size");
2487     return AMDGPU::NoRegister;
2488   }
2489 
2490   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2491   const MCRegisterClass RC = TRI->getRegClass(RCID);
2492   if (RegIdx >= RC.getNumRegs()) {
2493     Error(Loc, "register index is out of range");
2494     return AMDGPU::NoRegister;
2495   }
2496 
2497   return RC.getRegister(RegIdx);
2498 }
2499 
2500 bool
2501 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2502   int64_t RegLo, RegHi;
2503   if (!skipToken(AsmToken::LBrac, "missing register index"))
2504     return false;
2505 
2506   SMLoc FirstIdxLoc = getLoc();
2507   SMLoc SecondIdxLoc;
2508 
2509   if (!parseExpr(RegLo))
2510     return false;
2511 
2512   if (trySkipToken(AsmToken::Colon)) {
2513     SecondIdxLoc = getLoc();
2514     if (!parseExpr(RegHi))
2515       return false;
2516   } else {
2517     RegHi = RegLo;
2518   }
2519 
2520   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2521     return false;
2522 
2523   if (!isUInt<32>(RegLo)) {
2524     Error(FirstIdxLoc, "invalid register index");
2525     return false;
2526   }
2527 
2528   if (!isUInt<32>(RegHi)) {
2529     Error(SecondIdxLoc, "invalid register index");
2530     return false;
2531   }
2532 
2533   if (RegLo > RegHi) {
2534     Error(FirstIdxLoc, "first register index should not exceed second index");
2535     return false;
2536   }
2537 
2538   Num = static_cast<unsigned>(RegLo);
2539   Width = (RegHi - RegLo) + 1;
2540   return true;
2541 }
2542 
2543 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2544                                           unsigned &RegNum, unsigned &RegWidth,
2545                                           SmallVectorImpl<AsmToken> &Tokens) {
2546   assert(isToken(AsmToken::Identifier));
2547   unsigned Reg = getSpecialRegForName(getTokenStr());
2548   if (Reg) {
2549     RegNum = 0;
2550     RegWidth = 1;
2551     RegKind = IS_SPECIAL;
2552     Tokens.push_back(getToken());
2553     lex(); // skip register name
2554   }
2555   return Reg;
2556 }
2557 
2558 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2559                                           unsigned &RegNum, unsigned &RegWidth,
2560                                           SmallVectorImpl<AsmToken> &Tokens) {
2561   assert(isToken(AsmToken::Identifier));
2562   StringRef RegName = getTokenStr();
2563   auto Loc = getLoc();
2564 
2565   const RegInfo *RI = getRegularRegInfo(RegName);
2566   if (!RI) {
2567     Error(Loc, "invalid register name");
2568     return AMDGPU::NoRegister;
2569   }
2570 
2571   Tokens.push_back(getToken());
2572   lex(); // skip register name
2573 
2574   RegKind = RI->Kind;
2575   StringRef RegSuffix = RegName.substr(RI->Name.size());
2576   if (!RegSuffix.empty()) {
2577     // Single 32-bit register: vXX.
2578     if (!getRegNum(RegSuffix, RegNum)) {
2579       Error(Loc, "invalid register index");
2580       return AMDGPU::NoRegister;
2581     }
2582     RegWidth = 1;
2583   } else {
2584     // Range of registers: v[XX:YY]. ":YY" is optional.
2585     if (!ParseRegRange(RegNum, RegWidth))
2586       return AMDGPU::NoRegister;
2587   }
2588 
2589   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2590 }
2591 
2592 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2593                                        unsigned &RegWidth,
2594                                        SmallVectorImpl<AsmToken> &Tokens) {
2595   unsigned Reg = AMDGPU::NoRegister;
2596   auto ListLoc = getLoc();
2597 
2598   if (!skipToken(AsmToken::LBrac,
2599                  "expected a register or a list of registers")) {
2600     return AMDGPU::NoRegister;
2601   }
2602 
2603   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2604 
2605   auto Loc = getLoc();
2606   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2607     return AMDGPU::NoRegister;
2608   if (RegWidth != 1) {
2609     Error(Loc, "expected a single 32-bit register");
2610     return AMDGPU::NoRegister;
2611   }
2612 
2613   for (; trySkipToken(AsmToken::Comma); ) {
2614     RegisterKind NextRegKind;
2615     unsigned NextReg, NextRegNum, NextRegWidth;
2616     Loc = getLoc();
2617 
2618     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2619                              NextRegNum, NextRegWidth,
2620                              Tokens)) {
2621       return AMDGPU::NoRegister;
2622     }
2623     if (NextRegWidth != 1) {
2624       Error(Loc, "expected a single 32-bit register");
2625       return AMDGPU::NoRegister;
2626     }
2627     if (NextRegKind != RegKind) {
2628       Error(Loc, "registers in a list must be of the same kind");
2629       return AMDGPU::NoRegister;
2630     }
2631     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2632       return AMDGPU::NoRegister;
2633   }
2634 
2635   if (!skipToken(AsmToken::RBrac,
2636                  "expected a comma or a closing square bracket")) {
2637     return AMDGPU::NoRegister;
2638   }
2639 
2640   if (isRegularReg(RegKind))
2641     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2642 
2643   return Reg;
2644 }
2645 
2646 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2647                                           unsigned &RegNum, unsigned &RegWidth,
2648                                           SmallVectorImpl<AsmToken> &Tokens) {
2649   auto Loc = getLoc();
2650   Reg = AMDGPU::NoRegister;
2651 
2652   if (isToken(AsmToken::Identifier)) {
2653     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2654     if (Reg == AMDGPU::NoRegister)
2655       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2656   } else {
2657     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2658   }
2659 
2660   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2661   if (Reg == AMDGPU::NoRegister) {
2662     assert(Parser.hasPendingError());
2663     return false;
2664   }
2665 
2666   if (!subtargetHasRegister(*TRI, Reg)) {
2667     if (Reg == AMDGPU::SGPR_NULL) {
2668       Error(Loc, "'null' operand is not supported on this GPU");
2669     } else {
2670       Error(Loc, "register not available on this GPU");
2671     }
2672     return false;
2673   }
2674 
2675   return true;
2676 }
2677 
2678 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2679                                           unsigned &RegNum, unsigned &RegWidth,
2680                                           bool RestoreOnFailure /*=false*/) {
2681   Reg = AMDGPU::NoRegister;
2682 
2683   SmallVector<AsmToken, 1> Tokens;
2684   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2685     if (RestoreOnFailure) {
2686       while (!Tokens.empty()) {
2687         getLexer().UnLex(Tokens.pop_back_val());
2688       }
2689     }
2690     return true;
2691   }
2692   return false;
2693 }
2694 
2695 Optional<StringRef>
2696 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2697   switch (RegKind) {
2698   case IS_VGPR:
2699     return StringRef(".amdgcn.next_free_vgpr");
2700   case IS_SGPR:
2701     return StringRef(".amdgcn.next_free_sgpr");
2702   default:
2703     return None;
2704   }
2705 }
2706 
2707 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2708   auto SymbolName = getGprCountSymbolName(RegKind);
2709   assert(SymbolName && "initializing invalid register kind");
2710   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2711   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2712 }
2713 
2714 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2715                                             unsigned DwordRegIndex,
2716                                             unsigned RegWidth) {
2717   // Symbols are only defined for GCN targets
2718   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2719     return true;
2720 
2721   auto SymbolName = getGprCountSymbolName(RegKind);
2722   if (!SymbolName)
2723     return true;
2724   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2725 
2726   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2727   int64_t OldCount;
2728 
2729   if (!Sym->isVariable())
2730     return !Error(getLoc(),
2731                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2732   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2733     return !Error(
2734         getLoc(),
2735         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2736 
2737   if (OldCount <= NewMax)
2738     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2739 
2740   return true;
2741 }
2742 
2743 std::unique_ptr<AMDGPUOperand>
2744 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2745   const auto &Tok = getToken();
2746   SMLoc StartLoc = Tok.getLoc();
2747   SMLoc EndLoc = Tok.getEndLoc();
2748   RegisterKind RegKind;
2749   unsigned Reg, RegNum, RegWidth;
2750 
2751   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2752     return nullptr;
2753   }
2754   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2755     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2756       return nullptr;
2757   } else
2758     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2759   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2760 }
2761 
2762 OperandMatchResultTy
2763 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2764   // TODO: add syntactic sugar for 1/(2*PI)
2765 
2766   assert(!isRegister());
2767   assert(!isModifier());
2768 
2769   const auto& Tok = getToken();
2770   const auto& NextTok = peekToken();
2771   bool IsReal = Tok.is(AsmToken::Real);
2772   SMLoc S = getLoc();
2773   bool Negate = false;
2774 
2775   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2776     lex();
2777     IsReal = true;
2778     Negate = true;
2779   }
2780 
2781   if (IsReal) {
2782     // Floating-point expressions are not supported.
2783     // Can only allow floating-point literals with an
2784     // optional sign.
2785 
2786     StringRef Num = getTokenStr();
2787     lex();
2788 
2789     APFloat RealVal(APFloat::IEEEdouble());
2790     auto roundMode = APFloat::rmNearestTiesToEven;
2791     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2792       return MatchOperand_ParseFail;
2793     }
2794     if (Negate)
2795       RealVal.changeSign();
2796 
2797     Operands.push_back(
2798       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2799                                AMDGPUOperand::ImmTyNone, true));
2800 
2801     return MatchOperand_Success;
2802 
2803   } else {
2804     int64_t IntVal;
2805     const MCExpr *Expr;
2806     SMLoc S = getLoc();
2807 
2808     if (HasSP3AbsModifier) {
2809       // This is a workaround for handling expressions
2810       // as arguments of SP3 'abs' modifier, for example:
2811       //     |1.0|
2812       //     |-1|
2813       //     |1+x|
2814       // This syntax is not compatible with syntax of standard
2815       // MC expressions (due to the trailing '|').
2816       SMLoc EndLoc;
2817       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2818         return MatchOperand_ParseFail;
2819     } else {
2820       if (Parser.parseExpression(Expr))
2821         return MatchOperand_ParseFail;
2822     }
2823 
2824     if (Expr->evaluateAsAbsolute(IntVal)) {
2825       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2826     } else {
2827       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2828     }
2829 
2830     return MatchOperand_Success;
2831   }
2832 
2833   return MatchOperand_NoMatch;
2834 }
2835 
2836 OperandMatchResultTy
2837 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2838   if (!isRegister())
2839     return MatchOperand_NoMatch;
2840 
2841   if (auto R = parseRegister()) {
2842     assert(R->isReg());
2843     Operands.push_back(std::move(R));
2844     return MatchOperand_Success;
2845   }
2846   return MatchOperand_ParseFail;
2847 }
2848 
2849 OperandMatchResultTy
2850 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2851   auto res = parseReg(Operands);
2852   if (res != MatchOperand_NoMatch) {
2853     return res;
2854   } else if (isModifier()) {
2855     return MatchOperand_NoMatch;
2856   } else {
2857     return parseImm(Operands, HasSP3AbsMod);
2858   }
2859 }
2860 
2861 bool
2862 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2863   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2864     const auto &str = Token.getString();
2865     return str == "abs" || str == "neg" || str == "sext";
2866   }
2867   return false;
2868 }
2869 
2870 bool
2871 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2872   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2873 }
2874 
2875 bool
2876 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2877   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2878 }
2879 
2880 bool
2881 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2882   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2883 }
2884 
2885 // Check if this is an operand modifier or an opcode modifier
2886 // which may look like an expression but it is not. We should
2887 // avoid parsing these modifiers as expressions. Currently
2888 // recognized sequences are:
2889 //   |...|
2890 //   abs(...)
2891 //   neg(...)
2892 //   sext(...)
2893 //   -reg
2894 //   -|...|
2895 //   -abs(...)
2896 //   name:...
2897 // Note that simple opcode modifiers like 'gds' may be parsed as
2898 // expressions; this is a special case. See getExpressionAsToken.
2899 //
2900 bool
2901 AMDGPUAsmParser::isModifier() {
2902 
2903   AsmToken Tok = getToken();
2904   AsmToken NextToken[2];
2905   peekTokens(NextToken);
2906 
2907   return isOperandModifier(Tok, NextToken[0]) ||
2908          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2909          isOpcodeModifierWithVal(Tok, NextToken[0]);
2910 }
2911 
2912 // Check if the current token is an SP3 'neg' modifier.
2913 // Currently this modifier is allowed in the following context:
2914 //
2915 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2916 // 2. Before an 'abs' modifier: -abs(...)
2917 // 3. Before an SP3 'abs' modifier: -|...|
2918 //
2919 // In all other cases "-" is handled as a part
2920 // of an expression that follows the sign.
2921 //
2922 // Note: When "-" is followed by an integer literal,
2923 // this is interpreted as integer negation rather
2924 // than a floating-point NEG modifier applied to N.
2925 // Beside being contr-intuitive, such use of floating-point
2926 // NEG modifier would have resulted in different meaning
2927 // of integer literals used with VOP1/2/C and VOP3,
2928 // for example:
2929 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2930 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2931 // Negative fp literals with preceding "-" are
2932 // handled likewise for unifomtity
2933 //
2934 bool
2935 AMDGPUAsmParser::parseSP3NegModifier() {
2936 
2937   AsmToken NextToken[2];
2938   peekTokens(NextToken);
2939 
2940   if (isToken(AsmToken::Minus) &&
2941       (isRegister(NextToken[0], NextToken[1]) ||
2942        NextToken[0].is(AsmToken::Pipe) ||
2943        isId(NextToken[0], "abs"))) {
2944     lex();
2945     return true;
2946   }
2947 
2948   return false;
2949 }
2950 
2951 OperandMatchResultTy
2952 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2953                                               bool AllowImm) {
2954   bool Neg, SP3Neg;
2955   bool Abs, SP3Abs;
2956   SMLoc Loc;
2957 
2958   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2959   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2960     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2961     return MatchOperand_ParseFail;
2962   }
2963 
2964   SP3Neg = parseSP3NegModifier();
2965 
2966   Loc = getLoc();
2967   Neg = trySkipId("neg");
2968   if (Neg && SP3Neg) {
2969     Error(Loc, "expected register or immediate");
2970     return MatchOperand_ParseFail;
2971   }
2972   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2973     return MatchOperand_ParseFail;
2974 
2975   Abs = trySkipId("abs");
2976   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2977     return MatchOperand_ParseFail;
2978 
2979   Loc = getLoc();
2980   SP3Abs = trySkipToken(AsmToken::Pipe);
2981   if (Abs && SP3Abs) {
2982     Error(Loc, "expected register or immediate");
2983     return MatchOperand_ParseFail;
2984   }
2985 
2986   OperandMatchResultTy Res;
2987   if (AllowImm) {
2988     Res = parseRegOrImm(Operands, SP3Abs);
2989   } else {
2990     Res = parseReg(Operands);
2991   }
2992   if (Res != MatchOperand_Success) {
2993     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2994   }
2995 
2996   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2997     return MatchOperand_ParseFail;
2998   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2999     return MatchOperand_ParseFail;
3000   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3001     return MatchOperand_ParseFail;
3002 
3003   AMDGPUOperand::Modifiers Mods;
3004   Mods.Abs = Abs || SP3Abs;
3005   Mods.Neg = Neg || SP3Neg;
3006 
3007   if (Mods.hasFPModifiers()) {
3008     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3009     if (Op.isExpr()) {
3010       Error(Op.getStartLoc(), "expected an absolute expression");
3011       return MatchOperand_ParseFail;
3012     }
3013     Op.setModifiers(Mods);
3014   }
3015   return MatchOperand_Success;
3016 }
3017 
3018 OperandMatchResultTy
3019 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3020                                                bool AllowImm) {
3021   bool Sext = trySkipId("sext");
3022   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3023     return MatchOperand_ParseFail;
3024 
3025   OperandMatchResultTy Res;
3026   if (AllowImm) {
3027     Res = parseRegOrImm(Operands);
3028   } else {
3029     Res = parseReg(Operands);
3030   }
3031   if (Res != MatchOperand_Success) {
3032     return Sext? MatchOperand_ParseFail : Res;
3033   }
3034 
3035   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3036     return MatchOperand_ParseFail;
3037 
3038   AMDGPUOperand::Modifiers Mods;
3039   Mods.Sext = Sext;
3040 
3041   if (Mods.hasIntModifiers()) {
3042     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3043     if (Op.isExpr()) {
3044       Error(Op.getStartLoc(), "expected an absolute expression");
3045       return MatchOperand_ParseFail;
3046     }
3047     Op.setModifiers(Mods);
3048   }
3049 
3050   return MatchOperand_Success;
3051 }
3052 
3053 OperandMatchResultTy
3054 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3055   return parseRegOrImmWithFPInputMods(Operands, false);
3056 }
3057 
3058 OperandMatchResultTy
3059 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3060   return parseRegOrImmWithIntInputMods(Operands, false);
3061 }
3062 
3063 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3064   auto Loc = getLoc();
3065   if (trySkipId("off")) {
3066     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3067                                                 AMDGPUOperand::ImmTyOff, false));
3068     return MatchOperand_Success;
3069   }
3070 
3071   if (!isRegister())
3072     return MatchOperand_NoMatch;
3073 
3074   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3075   if (Reg) {
3076     Operands.push_back(std::move(Reg));
3077     return MatchOperand_Success;
3078   }
3079 
3080   return MatchOperand_ParseFail;
3081 
3082 }
3083 
3084 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3085   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3086 
3087   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3088       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3089       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3090       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3091     return Match_InvalidOperand;
3092 
3093   if ((TSFlags & SIInstrFlags::VOP3) &&
3094       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3095       getForcedEncodingSize() != 64)
3096     return Match_PreferE32;
3097 
3098   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3099       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3100     // v_mac_f32/16 allow only dst_sel == DWORD;
3101     auto OpNum =
3102         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3103     const auto &Op = Inst.getOperand(OpNum);
3104     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3105       return Match_InvalidOperand;
3106     }
3107   }
3108 
3109   return Match_Success;
3110 }
3111 
3112 static ArrayRef<unsigned> getAllVariants() {
3113   static const unsigned Variants[] = {
3114     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3115     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3116   };
3117 
3118   return makeArrayRef(Variants);
3119 }
3120 
3121 // What asm variants we should check
3122 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3123   if (getForcedEncodingSize() == 32) {
3124     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3125     return makeArrayRef(Variants);
3126   }
3127 
3128   if (isForcedVOP3()) {
3129     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3130     return makeArrayRef(Variants);
3131   }
3132 
3133   if (isForcedSDWA()) {
3134     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3135                                         AMDGPUAsmVariants::SDWA9};
3136     return makeArrayRef(Variants);
3137   }
3138 
3139   if (isForcedDPP()) {
3140     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3141     return makeArrayRef(Variants);
3142   }
3143 
3144   return getAllVariants();
3145 }
3146 
3147 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3148   if (getForcedEncodingSize() == 32)
3149     return "e32";
3150 
3151   if (isForcedVOP3())
3152     return "e64";
3153 
3154   if (isForcedSDWA())
3155     return "sdwa";
3156 
3157   if (isForcedDPP())
3158     return "dpp";
3159 
3160   return "";
3161 }
3162 
3163 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3164   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3165   const unsigned Num = Desc.getNumImplicitUses();
3166   for (unsigned i = 0; i < Num; ++i) {
3167     unsigned Reg = Desc.ImplicitUses[i];
3168     switch (Reg) {
3169     case AMDGPU::FLAT_SCR:
3170     case AMDGPU::VCC:
3171     case AMDGPU::VCC_LO:
3172     case AMDGPU::VCC_HI:
3173     case AMDGPU::M0:
3174       return Reg;
3175     default:
3176       break;
3177     }
3178   }
3179   return AMDGPU::NoRegister;
3180 }
3181 
3182 // NB: This code is correct only when used to check constant
3183 // bus limitations because GFX7 support no f16 inline constants.
3184 // Note that there are no cases when a GFX7 opcode violates
3185 // constant bus limitations due to the use of an f16 constant.
3186 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3187                                        unsigned OpIdx) const {
3188   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3189 
3190   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3191     return false;
3192   }
3193 
3194   const MCOperand &MO = Inst.getOperand(OpIdx);
3195 
3196   int64_t Val = MO.getImm();
3197   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3198 
3199   switch (OpSize) { // expected operand size
3200   case 8:
3201     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3202   case 4:
3203     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3204   case 2: {
3205     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3206     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3207         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3208         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3209       return AMDGPU::isInlinableIntLiteral(Val);
3210 
3211     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3212         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3213         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3214       return AMDGPU::isInlinableIntLiteralV216(Val);
3215 
3216     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3217         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3218         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3219       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3220 
3221     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3222   }
3223   default:
3224     llvm_unreachable("invalid operand size");
3225   }
3226 }
3227 
3228 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3229   if (!isGFX10Plus())
3230     return 1;
3231 
3232   switch (Opcode) {
3233   // 64-bit shift instructions can use only one scalar value input
3234   case AMDGPU::V_LSHLREV_B64_e64:
3235   case AMDGPU::V_LSHLREV_B64_gfx10:
3236   case AMDGPU::V_LSHRREV_B64_e64:
3237   case AMDGPU::V_LSHRREV_B64_gfx10:
3238   case AMDGPU::V_ASHRREV_I64_e64:
3239   case AMDGPU::V_ASHRREV_I64_gfx10:
3240   case AMDGPU::V_LSHL_B64_e64:
3241   case AMDGPU::V_LSHR_B64_e64:
3242   case AMDGPU::V_ASHR_I64_e64:
3243     return 1;
3244   default:
3245     return 2;
3246   }
3247 }
3248 
3249 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3250   const MCOperand &MO = Inst.getOperand(OpIdx);
3251   if (MO.isImm()) {
3252     return !isInlineConstant(Inst, OpIdx);
3253   } else if (MO.isReg()) {
3254     auto Reg = MO.getReg();
3255     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3256     auto PReg = mc2PseudoReg(Reg);
3257     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3258   } else {
3259     return true;
3260   }
3261 }
3262 
3263 bool
3264 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3265                                                 const OperandVector &Operands) {
3266   const unsigned Opcode = Inst.getOpcode();
3267   const MCInstrDesc &Desc = MII.get(Opcode);
3268   unsigned LastSGPR = AMDGPU::NoRegister;
3269   unsigned ConstantBusUseCount = 0;
3270   unsigned NumLiterals = 0;
3271   unsigned LiteralSize;
3272 
3273   if (Desc.TSFlags &
3274       (SIInstrFlags::VOPC |
3275        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3276        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3277        SIInstrFlags::SDWA)) {
3278     // Check special imm operands (used by madmk, etc)
3279     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3280       ++NumLiterals;
3281       LiteralSize = 4;
3282     }
3283 
3284     SmallDenseSet<unsigned> SGPRsUsed;
3285     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3286     if (SGPRUsed != AMDGPU::NoRegister) {
3287       SGPRsUsed.insert(SGPRUsed);
3288       ++ConstantBusUseCount;
3289     }
3290 
3291     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3292     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3293     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3294 
3295     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3296 
3297     for (int OpIdx : OpIndices) {
3298       if (OpIdx == -1) break;
3299 
3300       const MCOperand &MO = Inst.getOperand(OpIdx);
3301       if (usesConstantBus(Inst, OpIdx)) {
3302         if (MO.isReg()) {
3303           LastSGPR = mc2PseudoReg(MO.getReg());
3304           // Pairs of registers with a partial intersections like these
3305           //   s0, s[0:1]
3306           //   flat_scratch_lo, flat_scratch
3307           //   flat_scratch_lo, flat_scratch_hi
3308           // are theoretically valid but they are disabled anyway.
3309           // Note that this code mimics SIInstrInfo::verifyInstruction
3310           if (!SGPRsUsed.count(LastSGPR)) {
3311             SGPRsUsed.insert(LastSGPR);
3312             ++ConstantBusUseCount;
3313           }
3314         } else { // Expression or a literal
3315 
3316           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3317             continue; // special operand like VINTERP attr_chan
3318 
3319           // An instruction may use only one literal.
3320           // This has been validated on the previous step.
3321           // See validateVOPLiteral.
3322           // This literal may be used as more than one operand.
3323           // If all these operands are of the same size,
3324           // this literal counts as one scalar value.
3325           // Otherwise it counts as 2 scalar values.
3326           // See "GFX10 Shader Programming", section 3.6.2.3.
3327 
3328           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3329           if (Size < 4) Size = 4;
3330 
3331           if (NumLiterals == 0) {
3332             NumLiterals = 1;
3333             LiteralSize = Size;
3334           } else if (LiteralSize != Size) {
3335             NumLiterals = 2;
3336           }
3337         }
3338       }
3339     }
3340   }
3341   ConstantBusUseCount += NumLiterals;
3342 
3343   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3344     return true;
3345 
3346   SMLoc LitLoc = getLitLoc(Operands);
3347   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3348   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3349   Error(Loc, "invalid operand (violates constant bus restrictions)");
3350   return false;
3351 }
3352 
3353 bool
3354 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3355                                                  const OperandVector &Operands) {
3356   const unsigned Opcode = Inst.getOpcode();
3357   const MCInstrDesc &Desc = MII.get(Opcode);
3358 
3359   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3360   if (DstIdx == -1 ||
3361       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3362     return true;
3363   }
3364 
3365   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3366 
3367   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3368   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3369   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3370 
3371   assert(DstIdx != -1);
3372   const MCOperand &Dst = Inst.getOperand(DstIdx);
3373   assert(Dst.isReg());
3374   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3375 
3376   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3377 
3378   for (int SrcIdx : SrcIndices) {
3379     if (SrcIdx == -1) break;
3380     const MCOperand &Src = Inst.getOperand(SrcIdx);
3381     if (Src.isReg()) {
3382       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3383       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3384         Error(getRegLoc(SrcReg, Operands),
3385           "destination must be different than all sources");
3386         return false;
3387       }
3388     }
3389   }
3390 
3391   return true;
3392 }
3393 
3394 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3395 
3396   const unsigned Opc = Inst.getOpcode();
3397   const MCInstrDesc &Desc = MII.get(Opc);
3398 
3399   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3400     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3401     assert(ClampIdx != -1);
3402     return Inst.getOperand(ClampIdx).getImm() == 0;
3403   }
3404 
3405   return true;
3406 }
3407 
3408 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3409 
3410   const unsigned Opc = Inst.getOpcode();
3411   const MCInstrDesc &Desc = MII.get(Opc);
3412 
3413   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3414     return true;
3415 
3416   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3417   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3418   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3419 
3420   assert(VDataIdx != -1);
3421 
3422   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3423     return true;
3424 
3425   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3426   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3427   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3428   if (DMask == 0)
3429     DMask = 1;
3430 
3431   unsigned DataSize =
3432     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3433   if (hasPackedD16()) {
3434     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3435     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3436       DataSize = (DataSize + 1) / 2;
3437   }
3438 
3439   return (VDataSize / 4) == DataSize + TFESize;
3440 }
3441 
3442 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3443   const unsigned Opc = Inst.getOpcode();
3444   const MCInstrDesc &Desc = MII.get(Opc);
3445 
3446   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3447     return true;
3448 
3449   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3450 
3451   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3452       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3453   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3454   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3455   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3456   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3457 
3458   assert(VAddr0Idx != -1);
3459   assert(SrsrcIdx != -1);
3460   assert(SrsrcIdx > VAddr0Idx);
3461 
3462   if (DimIdx == -1)
3463     return true; // intersect_ray
3464 
3465   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3466   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3467   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3468   unsigned ActualAddrSize =
3469       IsNSA ? SrsrcIdx - VAddr0Idx
3470             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3471   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3472 
3473   unsigned ExpectedAddrSize =
3474       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3475 
3476   if (!IsNSA) {
3477     if (ExpectedAddrSize > 8)
3478       ExpectedAddrSize = 16;
3479 
3480     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3481     // This provides backward compatibility for assembly created
3482     // before 160b/192b/224b types were directly supported.
3483     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3484       return true;
3485   }
3486 
3487   return ActualAddrSize == ExpectedAddrSize;
3488 }
3489 
3490 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3491 
3492   const unsigned Opc = Inst.getOpcode();
3493   const MCInstrDesc &Desc = MII.get(Opc);
3494 
3495   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3496     return true;
3497   if (!Desc.mayLoad() || !Desc.mayStore())
3498     return true; // Not atomic
3499 
3500   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3501   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3502 
3503   // This is an incomplete check because image_atomic_cmpswap
3504   // may only use 0x3 and 0xf while other atomic operations
3505   // may use 0x1 and 0x3. However these limitations are
3506   // verified when we check that dmask matches dst size.
3507   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3508 }
3509 
3510 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3511 
3512   const unsigned Opc = Inst.getOpcode();
3513   const MCInstrDesc &Desc = MII.get(Opc);
3514 
3515   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3516     return true;
3517 
3518   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3519   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3520 
3521   // GATHER4 instructions use dmask in a different fashion compared to
3522   // other MIMG instructions. The only useful DMASK values are
3523   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3524   // (red,red,red,red) etc.) The ISA document doesn't mention
3525   // this.
3526   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3527 }
3528 
3529 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3530   const unsigned Opc = Inst.getOpcode();
3531   const MCInstrDesc &Desc = MII.get(Opc);
3532 
3533   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3534     return true;
3535 
3536   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3537   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3538       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3539 
3540   if (!BaseOpcode->MSAA)
3541     return true;
3542 
3543   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3544   assert(DimIdx != -1);
3545 
3546   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3547   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3548 
3549   return DimInfo->MSAA;
3550 }
3551 
3552 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3553 {
3554   switch (Opcode) {
3555   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3556   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3557   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3558     return true;
3559   default:
3560     return false;
3561   }
3562 }
3563 
3564 // movrels* opcodes should only allow VGPRS as src0.
3565 // This is specified in .td description for vop1/vop3,
3566 // but sdwa is handled differently. See isSDWAOperand.
3567 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3568                                       const OperandVector &Operands) {
3569 
3570   const unsigned Opc = Inst.getOpcode();
3571   const MCInstrDesc &Desc = MII.get(Opc);
3572 
3573   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3574     return true;
3575 
3576   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3577   assert(Src0Idx != -1);
3578 
3579   SMLoc ErrLoc;
3580   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3581   if (Src0.isReg()) {
3582     auto Reg = mc2PseudoReg(Src0.getReg());
3583     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3584     if (!isSGPR(Reg, TRI))
3585       return true;
3586     ErrLoc = getRegLoc(Reg, Operands);
3587   } else {
3588     ErrLoc = getConstLoc(Operands);
3589   }
3590 
3591   Error(ErrLoc, "source operand must be a VGPR");
3592   return false;
3593 }
3594 
3595 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3596                                           const OperandVector &Operands) {
3597 
3598   const unsigned Opc = Inst.getOpcode();
3599 
3600   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3601     return true;
3602 
3603   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3604   assert(Src0Idx != -1);
3605 
3606   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3607   if (!Src0.isReg())
3608     return true;
3609 
3610   auto Reg = mc2PseudoReg(Src0.getReg());
3611   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3612   if (isSGPR(Reg, TRI)) {
3613     Error(getRegLoc(Reg, Operands),
3614           "source operand must be either a VGPR or an inline constant");
3615     return false;
3616   }
3617 
3618   return true;
3619 }
3620 
3621 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3622                                    const OperandVector &Operands) {
3623   const unsigned Opc = Inst.getOpcode();
3624   const MCInstrDesc &Desc = MII.get(Opc);
3625 
3626   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3627     return true;
3628 
3629   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3630   if (Src2Idx == -1)
3631     return true;
3632 
3633   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3634   if (!Src2.isReg())
3635     return true;
3636 
3637   MCRegister Src2Reg = Src2.getReg();
3638   MCRegister DstReg = Inst.getOperand(0).getReg();
3639   if (Src2Reg == DstReg)
3640     return true;
3641 
3642   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3643   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3644     return true;
3645 
3646   if (isRegIntersect(Src2Reg, DstReg, TRI)) {
3647     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3648           "source 2 operand must not partially overlap with dst");
3649     return false;
3650   }
3651 
3652   return true;
3653 }
3654 
3655 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3656   switch (Inst.getOpcode()) {
3657   default:
3658     return true;
3659   case V_DIV_SCALE_F32_gfx6_gfx7:
3660   case V_DIV_SCALE_F32_vi:
3661   case V_DIV_SCALE_F32_gfx10:
3662   case V_DIV_SCALE_F64_gfx6_gfx7:
3663   case V_DIV_SCALE_F64_vi:
3664   case V_DIV_SCALE_F64_gfx10:
3665     break;
3666   }
3667 
3668   // TODO: Check that src0 = src1 or src2.
3669 
3670   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3671                     AMDGPU::OpName::src2_modifiers,
3672                     AMDGPU::OpName::src2_modifiers}) {
3673     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3674             .getImm() &
3675         SISrcMods::ABS) {
3676       return false;
3677     }
3678   }
3679 
3680   return true;
3681 }
3682 
3683 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3684 
3685   const unsigned Opc = Inst.getOpcode();
3686   const MCInstrDesc &Desc = MII.get(Opc);
3687 
3688   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3689     return true;
3690 
3691   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3692   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3693     if (isCI() || isSI())
3694       return false;
3695   }
3696 
3697   return true;
3698 }
3699 
3700 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3701   const unsigned Opc = Inst.getOpcode();
3702   const MCInstrDesc &Desc = MII.get(Opc);
3703 
3704   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3705     return true;
3706 
3707   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3708   if (DimIdx < 0)
3709     return true;
3710 
3711   long Imm = Inst.getOperand(DimIdx).getImm();
3712   if (Imm < 0 || Imm >= 8)
3713     return false;
3714 
3715   return true;
3716 }
3717 
3718 static bool IsRevOpcode(const unsigned Opcode)
3719 {
3720   switch (Opcode) {
3721   case AMDGPU::V_SUBREV_F32_e32:
3722   case AMDGPU::V_SUBREV_F32_e64:
3723   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3724   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3725   case AMDGPU::V_SUBREV_F32_e32_vi:
3726   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3727   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3728   case AMDGPU::V_SUBREV_F32_e64_vi:
3729 
3730   case AMDGPU::V_SUBREV_CO_U32_e32:
3731   case AMDGPU::V_SUBREV_CO_U32_e64:
3732   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3733   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3734 
3735   case AMDGPU::V_SUBBREV_U32_e32:
3736   case AMDGPU::V_SUBBREV_U32_e64:
3737   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3738   case AMDGPU::V_SUBBREV_U32_e32_vi:
3739   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3740   case AMDGPU::V_SUBBREV_U32_e64_vi:
3741 
3742   case AMDGPU::V_SUBREV_U32_e32:
3743   case AMDGPU::V_SUBREV_U32_e64:
3744   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3745   case AMDGPU::V_SUBREV_U32_e32_vi:
3746   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3747   case AMDGPU::V_SUBREV_U32_e64_vi:
3748 
3749   case AMDGPU::V_SUBREV_F16_e32:
3750   case AMDGPU::V_SUBREV_F16_e64:
3751   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3752   case AMDGPU::V_SUBREV_F16_e32_vi:
3753   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3754   case AMDGPU::V_SUBREV_F16_e64_vi:
3755 
3756   case AMDGPU::V_SUBREV_U16_e32:
3757   case AMDGPU::V_SUBREV_U16_e64:
3758   case AMDGPU::V_SUBREV_U16_e32_vi:
3759   case AMDGPU::V_SUBREV_U16_e64_vi:
3760 
3761   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3762   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3763   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3764 
3765   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3766   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3767 
3768   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3769   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3770 
3771   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3772   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3773 
3774   case AMDGPU::V_LSHRREV_B32_e32:
3775   case AMDGPU::V_LSHRREV_B32_e64:
3776   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3777   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3778   case AMDGPU::V_LSHRREV_B32_e32_vi:
3779   case AMDGPU::V_LSHRREV_B32_e64_vi:
3780   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3781   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3782 
3783   case AMDGPU::V_ASHRREV_I32_e32:
3784   case AMDGPU::V_ASHRREV_I32_e64:
3785   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3786   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3787   case AMDGPU::V_ASHRREV_I32_e32_vi:
3788   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3789   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3790   case AMDGPU::V_ASHRREV_I32_e64_vi:
3791 
3792   case AMDGPU::V_LSHLREV_B32_e32:
3793   case AMDGPU::V_LSHLREV_B32_e64:
3794   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3795   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3796   case AMDGPU::V_LSHLREV_B32_e32_vi:
3797   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3798   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3799   case AMDGPU::V_LSHLREV_B32_e64_vi:
3800 
3801   case AMDGPU::V_LSHLREV_B16_e32:
3802   case AMDGPU::V_LSHLREV_B16_e64:
3803   case AMDGPU::V_LSHLREV_B16_e32_vi:
3804   case AMDGPU::V_LSHLREV_B16_e64_vi:
3805   case AMDGPU::V_LSHLREV_B16_gfx10:
3806 
3807   case AMDGPU::V_LSHRREV_B16_e32:
3808   case AMDGPU::V_LSHRREV_B16_e64:
3809   case AMDGPU::V_LSHRREV_B16_e32_vi:
3810   case AMDGPU::V_LSHRREV_B16_e64_vi:
3811   case AMDGPU::V_LSHRREV_B16_gfx10:
3812 
3813   case AMDGPU::V_ASHRREV_I16_e32:
3814   case AMDGPU::V_ASHRREV_I16_e64:
3815   case AMDGPU::V_ASHRREV_I16_e32_vi:
3816   case AMDGPU::V_ASHRREV_I16_e64_vi:
3817   case AMDGPU::V_ASHRREV_I16_gfx10:
3818 
3819   case AMDGPU::V_LSHLREV_B64_e64:
3820   case AMDGPU::V_LSHLREV_B64_gfx10:
3821   case AMDGPU::V_LSHLREV_B64_vi:
3822 
3823   case AMDGPU::V_LSHRREV_B64_e64:
3824   case AMDGPU::V_LSHRREV_B64_gfx10:
3825   case AMDGPU::V_LSHRREV_B64_vi:
3826 
3827   case AMDGPU::V_ASHRREV_I64_e64:
3828   case AMDGPU::V_ASHRREV_I64_gfx10:
3829   case AMDGPU::V_ASHRREV_I64_vi:
3830 
3831   case AMDGPU::V_PK_LSHLREV_B16:
3832   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3833   case AMDGPU::V_PK_LSHLREV_B16_vi:
3834 
3835   case AMDGPU::V_PK_LSHRREV_B16:
3836   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3837   case AMDGPU::V_PK_LSHRREV_B16_vi:
3838   case AMDGPU::V_PK_ASHRREV_I16:
3839   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3840   case AMDGPU::V_PK_ASHRREV_I16_vi:
3841     return true;
3842   default:
3843     return false;
3844   }
3845 }
3846 
3847 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3848 
3849   using namespace SIInstrFlags;
3850   const unsigned Opcode = Inst.getOpcode();
3851   const MCInstrDesc &Desc = MII.get(Opcode);
3852 
3853   // lds_direct register is defined so that it can be used
3854   // with 9-bit operands only. Ignore encodings which do not accept these.
3855   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3856   if ((Desc.TSFlags & Enc) == 0)
3857     return None;
3858 
3859   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3860     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3861     if (SrcIdx == -1)
3862       break;
3863     const auto &Src = Inst.getOperand(SrcIdx);
3864     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3865 
3866       if (isGFX90A())
3867         return StringRef("lds_direct is not supported on this GPU");
3868 
3869       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3870         return StringRef("lds_direct cannot be used with this instruction");
3871 
3872       if (SrcName != OpName::src0)
3873         return StringRef("lds_direct may be used as src0 only");
3874     }
3875   }
3876 
3877   return None;
3878 }
3879 
3880 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3881   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3882     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3883     if (Op.isFlatOffset())
3884       return Op.getStartLoc();
3885   }
3886   return getLoc();
3887 }
3888 
3889 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3890                                          const OperandVector &Operands) {
3891   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3892   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3893     return true;
3894 
3895   auto Opcode = Inst.getOpcode();
3896   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3897   assert(OpNum != -1);
3898 
3899   const auto &Op = Inst.getOperand(OpNum);
3900   if (!hasFlatOffsets() && Op.getImm() != 0) {
3901     Error(getFlatOffsetLoc(Operands),
3902           "flat offset modifier is not supported on this GPU");
3903     return false;
3904   }
3905 
3906   // For FLAT segment the offset must be positive;
3907   // MSB is ignored and forced to zero.
3908   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3909     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3910     if (!isIntN(OffsetSize, Op.getImm())) {
3911       Error(getFlatOffsetLoc(Operands),
3912             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3913       return false;
3914     }
3915   } else {
3916     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3917     if (!isUIntN(OffsetSize, Op.getImm())) {
3918       Error(getFlatOffsetLoc(Operands),
3919             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3920       return false;
3921     }
3922   }
3923 
3924   return true;
3925 }
3926 
3927 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3928   // Start with second operand because SMEM Offset cannot be dst or src0.
3929   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3930     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3931     if (Op.isSMEMOffset())
3932       return Op.getStartLoc();
3933   }
3934   return getLoc();
3935 }
3936 
3937 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3938                                          const OperandVector &Operands) {
3939   if (isCI() || isSI())
3940     return true;
3941 
3942   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3943   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3944     return true;
3945 
3946   auto Opcode = Inst.getOpcode();
3947   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3948   if (OpNum == -1)
3949     return true;
3950 
3951   const auto &Op = Inst.getOperand(OpNum);
3952   if (!Op.isImm())
3953     return true;
3954 
3955   uint64_t Offset = Op.getImm();
3956   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3957   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3958       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3959     return true;
3960 
3961   Error(getSMEMOffsetLoc(Operands),
3962         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3963                                "expected a 21-bit signed offset");
3964 
3965   return false;
3966 }
3967 
3968 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3969   unsigned Opcode = Inst.getOpcode();
3970   const MCInstrDesc &Desc = MII.get(Opcode);
3971   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3972     return true;
3973 
3974   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3975   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3976 
3977   const int OpIndices[] = { Src0Idx, Src1Idx };
3978 
3979   unsigned NumExprs = 0;
3980   unsigned NumLiterals = 0;
3981   uint32_t LiteralValue;
3982 
3983   for (int OpIdx : OpIndices) {
3984     if (OpIdx == -1) break;
3985 
3986     const MCOperand &MO = Inst.getOperand(OpIdx);
3987     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3988     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3989       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3990         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3991         if (NumLiterals == 0 || LiteralValue != Value) {
3992           LiteralValue = Value;
3993           ++NumLiterals;
3994         }
3995       } else if (MO.isExpr()) {
3996         ++NumExprs;
3997       }
3998     }
3999   }
4000 
4001   return NumLiterals + NumExprs <= 1;
4002 }
4003 
4004 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4005   const unsigned Opc = Inst.getOpcode();
4006   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4007       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4008     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4009     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4010 
4011     if (OpSel & ~3)
4012       return false;
4013   }
4014   return true;
4015 }
4016 
4017 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4018                                   const OperandVector &Operands) {
4019   const unsigned Opc = Inst.getOpcode();
4020   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4021   if (DppCtrlIdx < 0)
4022     return true;
4023   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4024 
4025   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4026     // DPP64 is supported for row_newbcast only.
4027     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4028     if (Src0Idx >= 0 &&
4029         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4030       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4031       Error(S, "64 bit dpp only supports row_newbcast");
4032       return false;
4033     }
4034   }
4035 
4036   return true;
4037 }
4038 
4039 // Check if VCC register matches wavefront size
4040 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4041   auto FB = getFeatureBits();
4042   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4043     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4044 }
4045 
4046 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4047 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4048                                          const OperandVector &Operands) {
4049   unsigned Opcode = Inst.getOpcode();
4050   const MCInstrDesc &Desc = MII.get(Opcode);
4051   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4052   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4053       ImmIdx == -1)
4054     return true;
4055 
4056   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4057   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4058   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4059 
4060   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4061 
4062   unsigned NumExprs = 0;
4063   unsigned NumLiterals = 0;
4064   uint32_t LiteralValue;
4065 
4066   for (int OpIdx : OpIndices) {
4067     if (OpIdx == -1)
4068       continue;
4069 
4070     const MCOperand &MO = Inst.getOperand(OpIdx);
4071     if (!MO.isImm() && !MO.isExpr())
4072       continue;
4073     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4074       continue;
4075 
4076     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4077         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4078       Error(getConstLoc(Operands),
4079             "inline constants are not allowed for this operand");
4080       return false;
4081     }
4082 
4083     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4084       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4085       if (NumLiterals == 0 || LiteralValue != Value) {
4086         LiteralValue = Value;
4087         ++NumLiterals;
4088       }
4089     } else if (MO.isExpr()) {
4090       ++NumExprs;
4091     }
4092   }
4093   NumLiterals += NumExprs;
4094 
4095   if (!NumLiterals)
4096     return true;
4097 
4098   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4099     Error(getLitLoc(Operands), "literal operands are not supported");
4100     return false;
4101   }
4102 
4103   if (NumLiterals > 1) {
4104     Error(getLitLoc(Operands), "only one literal operand is allowed");
4105     return false;
4106   }
4107 
4108   return true;
4109 }
4110 
4111 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4112 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4113                          const MCRegisterInfo *MRI) {
4114   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4115   if (OpIdx < 0)
4116     return -1;
4117 
4118   const MCOperand &Op = Inst.getOperand(OpIdx);
4119   if (!Op.isReg())
4120     return -1;
4121 
4122   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4123   auto Reg = Sub ? Sub : Op.getReg();
4124   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4125   return AGPR32.contains(Reg) ? 1 : 0;
4126 }
4127 
4128 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4129   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4130   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4131                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4132                   SIInstrFlags::DS)) == 0)
4133     return true;
4134 
4135   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4136                                                       : AMDGPU::OpName::vdata;
4137 
4138   const MCRegisterInfo *MRI = getMRI();
4139   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4140   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4141 
4142   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4143     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4144     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4145       return false;
4146   }
4147 
4148   auto FB = getFeatureBits();
4149   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4150     if (DataAreg < 0 || DstAreg < 0)
4151       return true;
4152     return DstAreg == DataAreg;
4153   }
4154 
4155   return DstAreg < 1 && DataAreg < 1;
4156 }
4157 
4158 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4159   auto FB = getFeatureBits();
4160   if (!FB[AMDGPU::FeatureGFX90AInsts])
4161     return true;
4162 
4163   const MCRegisterInfo *MRI = getMRI();
4164   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4165   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4166   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4167     const MCOperand &Op = Inst.getOperand(I);
4168     if (!Op.isReg())
4169       continue;
4170 
4171     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4172     if (!Sub)
4173       continue;
4174 
4175     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4176       return false;
4177     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4178       return false;
4179   }
4180 
4181   return true;
4182 }
4183 
4184 // gfx90a has an undocumented limitation:
4185 // DS_GWS opcodes must use even aligned registers.
4186 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4187                                   const OperandVector &Operands) {
4188   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4189     return true;
4190 
4191   int Opc = Inst.getOpcode();
4192   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4193       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4194     return true;
4195 
4196   const MCRegisterInfo *MRI = getMRI();
4197   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4198   int Data0Pos =
4199       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4200   assert(Data0Pos != -1);
4201   auto Reg = Inst.getOperand(Data0Pos).getReg();
4202   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4203   if (RegIdx & 1) {
4204     SMLoc RegLoc = getRegLoc(Reg, Operands);
4205     Error(RegLoc, "vgpr must be even aligned");
4206     return false;
4207   }
4208 
4209   return true;
4210 }
4211 
4212 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4213                                             const OperandVector &Operands,
4214                                             const SMLoc &IDLoc) {
4215   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4216                                            AMDGPU::OpName::cpol);
4217   if (CPolPos == -1)
4218     return true;
4219 
4220   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4221 
4222   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4223   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4224       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4225     Error(IDLoc, "invalid cache policy for SMRD instruction");
4226     return false;
4227   }
4228 
4229   if (isGFX90A() && (CPol & CPol::SCC)) {
4230     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4231     StringRef CStr(S.getPointer());
4232     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4233     Error(S, "scc is not supported on this GPU");
4234     return false;
4235   }
4236 
4237   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4238     return true;
4239 
4240   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4241     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4242       Error(IDLoc, "instruction must use glc");
4243       return false;
4244     }
4245   } else {
4246     if (CPol & CPol::GLC) {
4247       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4248       StringRef CStr(S.getPointer());
4249       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4250       Error(S, "instruction must not use glc");
4251       return false;
4252     }
4253   }
4254 
4255   return true;
4256 }
4257 
4258 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4259                                           const SMLoc &IDLoc,
4260                                           const OperandVector &Operands) {
4261   if (auto ErrMsg = validateLdsDirect(Inst)) {
4262     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4263     return false;
4264   }
4265   if (!validateSOPLiteral(Inst)) {
4266     Error(getLitLoc(Operands),
4267       "only one literal operand is allowed");
4268     return false;
4269   }
4270   if (!validateVOPLiteral(Inst, Operands)) {
4271     return false;
4272   }
4273   if (!validateConstantBusLimitations(Inst, Operands)) {
4274     return false;
4275   }
4276   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4277     return false;
4278   }
4279   if (!validateIntClampSupported(Inst)) {
4280     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4281       "integer clamping is not supported on this GPU");
4282     return false;
4283   }
4284   if (!validateOpSel(Inst)) {
4285     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4286       "invalid op_sel operand");
4287     return false;
4288   }
4289   if (!validateDPP(Inst, Operands)) {
4290     return false;
4291   }
4292   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4293   if (!validateMIMGD16(Inst)) {
4294     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4295       "d16 modifier is not supported on this GPU");
4296     return false;
4297   }
4298   if (!validateMIMGDim(Inst)) {
4299     Error(IDLoc, "dim modifier is required on this GPU");
4300     return false;
4301   }
4302   if (!validateMIMGMSAA(Inst)) {
4303     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4304           "invalid dim; must be MSAA type");
4305     return false;
4306   }
4307   if (!validateMIMGDataSize(Inst)) {
4308     Error(IDLoc,
4309       "image data size does not match dmask and tfe");
4310     return false;
4311   }
4312   if (!validateMIMGAddrSize(Inst)) {
4313     Error(IDLoc,
4314       "image address size does not match dim and a16");
4315     return false;
4316   }
4317   if (!validateMIMGAtomicDMask(Inst)) {
4318     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4319       "invalid atomic image dmask");
4320     return false;
4321   }
4322   if (!validateMIMGGatherDMask(Inst)) {
4323     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4324       "invalid image_gather dmask: only one bit must be set");
4325     return false;
4326   }
4327   if (!validateMovrels(Inst, Operands)) {
4328     return false;
4329   }
4330   if (!validateFlatOffset(Inst, Operands)) {
4331     return false;
4332   }
4333   if (!validateSMEMOffset(Inst, Operands)) {
4334     return false;
4335   }
4336   if (!validateMAIAccWrite(Inst, Operands)) {
4337     return false;
4338   }
4339   if (!validateMFMA(Inst, Operands)) {
4340     return false;
4341   }
4342   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4343     return false;
4344   }
4345 
4346   if (!validateAGPRLdSt(Inst)) {
4347     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4348     ? "invalid register class: data and dst should be all VGPR or AGPR"
4349     : "invalid register class: agpr loads and stores not supported on this GPU"
4350     );
4351     return false;
4352   }
4353   if (!validateVGPRAlign(Inst)) {
4354     Error(IDLoc,
4355       "invalid register class: vgpr tuples must be 64 bit aligned");
4356     return false;
4357   }
4358   if (!validateGWS(Inst, Operands)) {
4359     return false;
4360   }
4361 
4362   if (!validateDivScale(Inst)) {
4363     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4364     return false;
4365   }
4366   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4367     return false;
4368   }
4369 
4370   return true;
4371 }
4372 
4373 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4374                                             const FeatureBitset &FBS,
4375                                             unsigned VariantID = 0);
4376 
4377 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4378                                 const FeatureBitset &AvailableFeatures,
4379                                 unsigned VariantID);
4380 
4381 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4382                                        const FeatureBitset &FBS) {
4383   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4384 }
4385 
4386 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4387                                        const FeatureBitset &FBS,
4388                                        ArrayRef<unsigned> Variants) {
4389   for (auto Variant : Variants) {
4390     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4391       return true;
4392   }
4393 
4394   return false;
4395 }
4396 
4397 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4398                                                   const SMLoc &IDLoc) {
4399   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4400 
4401   // Check if requested instruction variant is supported.
4402   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4403     return false;
4404 
4405   // This instruction is not supported.
4406   // Clear any other pending errors because they are no longer relevant.
4407   getParser().clearPendingErrors();
4408 
4409   // Requested instruction variant is not supported.
4410   // Check if any other variants are supported.
4411   StringRef VariantName = getMatchedVariantName();
4412   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4413     return Error(IDLoc,
4414                  Twine(VariantName,
4415                        " variant of this instruction is not supported"));
4416   }
4417 
4418   // Finally check if this instruction is supported on any other GPU.
4419   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4420     return Error(IDLoc, "instruction not supported on this GPU");
4421   }
4422 
4423   // Instruction not supported on any GPU. Probably a typo.
4424   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4425   return Error(IDLoc, "invalid instruction" + Suggestion);
4426 }
4427 
4428 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4429                                               OperandVector &Operands,
4430                                               MCStreamer &Out,
4431                                               uint64_t &ErrorInfo,
4432                                               bool MatchingInlineAsm) {
4433   MCInst Inst;
4434   unsigned Result = Match_Success;
4435   for (auto Variant : getMatchedVariants()) {
4436     uint64_t EI;
4437     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4438                                   Variant);
4439     // We order match statuses from least to most specific. We use most specific
4440     // status as resulting
4441     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4442     if ((R == Match_Success) ||
4443         (R == Match_PreferE32) ||
4444         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4445         (R == Match_InvalidOperand && Result != Match_MissingFeature
4446                                    && Result != Match_PreferE32) ||
4447         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4448                                    && Result != Match_MissingFeature
4449                                    && Result != Match_PreferE32)) {
4450       Result = R;
4451       ErrorInfo = EI;
4452     }
4453     if (R == Match_Success)
4454       break;
4455   }
4456 
4457   if (Result == Match_Success) {
4458     if (!validateInstruction(Inst, IDLoc, Operands)) {
4459       return true;
4460     }
4461     Inst.setLoc(IDLoc);
4462     Out.emitInstruction(Inst, getSTI());
4463     return false;
4464   }
4465 
4466   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4467   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4468     return true;
4469   }
4470 
4471   switch (Result) {
4472   default: break;
4473   case Match_MissingFeature:
4474     // It has been verified that the specified instruction
4475     // mnemonic is valid. A match was found but it requires
4476     // features which are not supported on this GPU.
4477     return Error(IDLoc, "operands are not valid for this GPU or mode");
4478 
4479   case Match_InvalidOperand: {
4480     SMLoc ErrorLoc = IDLoc;
4481     if (ErrorInfo != ~0ULL) {
4482       if (ErrorInfo >= Operands.size()) {
4483         return Error(IDLoc, "too few operands for instruction");
4484       }
4485       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4486       if (ErrorLoc == SMLoc())
4487         ErrorLoc = IDLoc;
4488     }
4489     return Error(ErrorLoc, "invalid operand for instruction");
4490   }
4491 
4492   case Match_PreferE32:
4493     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4494                         "should be encoded as e32");
4495   case Match_MnemonicFail:
4496     llvm_unreachable("Invalid instructions should have been handled already");
4497   }
4498   llvm_unreachable("Implement any new match types added!");
4499 }
4500 
4501 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4502   int64_t Tmp = -1;
4503   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4504     return true;
4505   }
4506   if (getParser().parseAbsoluteExpression(Tmp)) {
4507     return true;
4508   }
4509   Ret = static_cast<uint32_t>(Tmp);
4510   return false;
4511 }
4512 
4513 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4514                                                uint32_t &Minor) {
4515   if (ParseAsAbsoluteExpression(Major))
4516     return TokError("invalid major version");
4517 
4518   if (!trySkipToken(AsmToken::Comma))
4519     return TokError("minor version number required, comma expected");
4520 
4521   if (ParseAsAbsoluteExpression(Minor))
4522     return TokError("invalid minor version");
4523 
4524   return false;
4525 }
4526 
4527 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4528   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4529     return TokError("directive only supported for amdgcn architecture");
4530 
4531   std::string TargetIDDirective;
4532   SMLoc TargetStart = getTok().getLoc();
4533   if (getParser().parseEscapedString(TargetIDDirective))
4534     return true;
4535 
4536   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4537   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4538     return getParser().Error(TargetRange.Start,
4539         (Twine(".amdgcn_target directive's target id ") +
4540          Twine(TargetIDDirective) +
4541          Twine(" does not match the specified target id ") +
4542          Twine(getTargetStreamer().getTargetID()->toString())).str());
4543 
4544   return false;
4545 }
4546 
4547 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4548   return Error(Range.Start, "value out of range", Range);
4549 }
4550 
4551 bool AMDGPUAsmParser::calculateGPRBlocks(
4552     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4553     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4554     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4555     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4556   // TODO(scott.linder): These calculations are duplicated from
4557   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4558   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4559 
4560   unsigned NumVGPRs = NextFreeVGPR;
4561   unsigned NumSGPRs = NextFreeSGPR;
4562 
4563   if (Version.Major >= 10)
4564     NumSGPRs = 0;
4565   else {
4566     unsigned MaxAddressableNumSGPRs =
4567         IsaInfo::getAddressableNumSGPRs(&getSTI());
4568 
4569     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4570         NumSGPRs > MaxAddressableNumSGPRs)
4571       return OutOfRangeError(SGPRRange);
4572 
4573     NumSGPRs +=
4574         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4575 
4576     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4577         NumSGPRs > MaxAddressableNumSGPRs)
4578       return OutOfRangeError(SGPRRange);
4579 
4580     if (Features.test(FeatureSGPRInitBug))
4581       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4582   }
4583 
4584   VGPRBlocks =
4585       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4586   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4587 
4588   return false;
4589 }
4590 
4591 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4592   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4593     return TokError("directive only supported for amdgcn architecture");
4594 
4595   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4596     return TokError("directive only supported for amdhsa OS");
4597 
4598   StringRef KernelName;
4599   if (getParser().parseIdentifier(KernelName))
4600     return true;
4601 
4602   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4603 
4604   StringSet<> Seen;
4605 
4606   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4607 
4608   SMRange VGPRRange;
4609   uint64_t NextFreeVGPR = 0;
4610   uint64_t AccumOffset = 0;
4611   SMRange SGPRRange;
4612   uint64_t NextFreeSGPR = 0;
4613 
4614   // Count the number of user SGPRs implied from the enabled feature bits.
4615   unsigned ImpliedUserSGPRCount = 0;
4616 
4617   // Track if the asm explicitly contains the directive for the user SGPR
4618   // count.
4619   Optional<unsigned> ExplicitUserSGPRCount;
4620   bool ReserveVCC = true;
4621   bool ReserveFlatScr = true;
4622   Optional<bool> EnableWavefrontSize32;
4623 
4624   while (true) {
4625     while (trySkipToken(AsmToken::EndOfStatement));
4626 
4627     StringRef ID;
4628     SMRange IDRange = getTok().getLocRange();
4629     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4630       return true;
4631 
4632     if (ID == ".end_amdhsa_kernel")
4633       break;
4634 
4635     if (Seen.find(ID) != Seen.end())
4636       return TokError(".amdhsa_ directives cannot be repeated");
4637     Seen.insert(ID);
4638 
4639     SMLoc ValStart = getLoc();
4640     int64_t IVal;
4641     if (getParser().parseAbsoluteExpression(IVal))
4642       return true;
4643     SMLoc ValEnd = getLoc();
4644     SMRange ValRange = SMRange(ValStart, ValEnd);
4645 
4646     if (IVal < 0)
4647       return OutOfRangeError(ValRange);
4648 
4649     uint64_t Val = IVal;
4650 
4651 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4652   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4653     return OutOfRangeError(RANGE);                                             \
4654   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4655 
4656     if (ID == ".amdhsa_group_segment_fixed_size") {
4657       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4658         return OutOfRangeError(ValRange);
4659       KD.group_segment_fixed_size = Val;
4660     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4661       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4662         return OutOfRangeError(ValRange);
4663       KD.private_segment_fixed_size = Val;
4664     } else if (ID == ".amdhsa_kernarg_size") {
4665       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4666         return OutOfRangeError(ValRange);
4667       KD.kernarg_size = Val;
4668     } else if (ID == ".amdhsa_user_sgpr_count") {
4669       ExplicitUserSGPRCount = Val;
4670     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4671       if (hasArchitectedFlatScratch())
4672         return Error(IDRange.Start,
4673                      "directive is not supported with architected flat scratch",
4674                      IDRange);
4675       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4676                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4677                        Val, ValRange);
4678       if (Val)
4679         ImpliedUserSGPRCount += 4;
4680     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4681       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4682                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4683                        ValRange);
4684       if (Val)
4685         ImpliedUserSGPRCount += 2;
4686     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4687       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4688                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4689                        ValRange);
4690       if (Val)
4691         ImpliedUserSGPRCount += 2;
4692     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4693       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4694                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4695                        Val, ValRange);
4696       if (Val)
4697         ImpliedUserSGPRCount += 2;
4698     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4699       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4700                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4701                        ValRange);
4702       if (Val)
4703         ImpliedUserSGPRCount += 2;
4704     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4705       if (hasArchitectedFlatScratch())
4706         return Error(IDRange.Start,
4707                      "directive is not supported with architected flat scratch",
4708                      IDRange);
4709       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4710                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4711                        ValRange);
4712       if (Val)
4713         ImpliedUserSGPRCount += 2;
4714     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4715       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4716                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4717                        Val, ValRange);
4718       if (Val)
4719         ImpliedUserSGPRCount += 1;
4720     } else if (ID == ".amdhsa_wavefront_size32") {
4721       if (IVersion.Major < 10)
4722         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4723       EnableWavefrontSize32 = Val;
4724       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4725                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4726                        Val, ValRange);
4727     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4728       if (hasArchitectedFlatScratch())
4729         return Error(IDRange.Start,
4730                      "directive is not supported with architected flat scratch",
4731                      IDRange);
4732       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4733                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4734     } else if (ID == ".amdhsa_enable_private_segment") {
4735       if (!hasArchitectedFlatScratch())
4736         return Error(
4737             IDRange.Start,
4738             "directive is not supported without architected flat scratch",
4739             IDRange);
4740       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4741                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4742     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4743       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4744                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4745                        ValRange);
4746     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4747       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4748                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4749                        ValRange);
4750     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4751       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4752                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4753                        ValRange);
4754     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4755       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4756                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4757                        ValRange);
4758     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4759       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4760                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4761                        ValRange);
4762     } else if (ID == ".amdhsa_next_free_vgpr") {
4763       VGPRRange = ValRange;
4764       NextFreeVGPR = Val;
4765     } else if (ID == ".amdhsa_next_free_sgpr") {
4766       SGPRRange = ValRange;
4767       NextFreeSGPR = Val;
4768     } else if (ID == ".amdhsa_accum_offset") {
4769       if (!isGFX90A())
4770         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4771       AccumOffset = Val;
4772     } else if (ID == ".amdhsa_reserve_vcc") {
4773       if (!isUInt<1>(Val))
4774         return OutOfRangeError(ValRange);
4775       ReserveVCC = Val;
4776     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4777       if (IVersion.Major < 7)
4778         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4779       if (hasArchitectedFlatScratch())
4780         return Error(IDRange.Start,
4781                      "directive is not supported with architected flat scratch",
4782                      IDRange);
4783       if (!isUInt<1>(Val))
4784         return OutOfRangeError(ValRange);
4785       ReserveFlatScr = Val;
4786     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4787       if (IVersion.Major < 8)
4788         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4789       if (!isUInt<1>(Val))
4790         return OutOfRangeError(ValRange);
4791       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4792         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4793                                  IDRange);
4794     } else if (ID == ".amdhsa_float_round_mode_32") {
4795       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4796                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4797     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4798       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4799                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4800     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4801       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4802                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4803     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4804       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4805                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4806                        ValRange);
4807     } else if (ID == ".amdhsa_dx10_clamp") {
4808       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4809                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4810     } else if (ID == ".amdhsa_ieee_mode") {
4811       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4812                        Val, ValRange);
4813     } else if (ID == ".amdhsa_fp16_overflow") {
4814       if (IVersion.Major < 9)
4815         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4816       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4817                        ValRange);
4818     } else if (ID == ".amdhsa_tg_split") {
4819       if (!isGFX90A())
4820         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4821       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4822                        ValRange);
4823     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4824       if (IVersion.Major < 10)
4825         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4826       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4827                        ValRange);
4828     } else if (ID == ".amdhsa_memory_ordered") {
4829       if (IVersion.Major < 10)
4830         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4831       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4832                        ValRange);
4833     } else if (ID == ".amdhsa_forward_progress") {
4834       if (IVersion.Major < 10)
4835         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4836       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4837                        ValRange);
4838     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4839       PARSE_BITS_ENTRY(
4840           KD.compute_pgm_rsrc2,
4841           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4842           ValRange);
4843     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4844       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4845                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4846                        Val, ValRange);
4847     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4848       PARSE_BITS_ENTRY(
4849           KD.compute_pgm_rsrc2,
4850           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4851           ValRange);
4852     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4853       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4854                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4855                        Val, ValRange);
4856     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4857       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4858                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4859                        Val, ValRange);
4860     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4861       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4862                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4863                        Val, ValRange);
4864     } else if (ID == ".amdhsa_exception_int_div_zero") {
4865       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4866                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4867                        Val, ValRange);
4868     } else {
4869       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4870     }
4871 
4872 #undef PARSE_BITS_ENTRY
4873   }
4874 
4875   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4876     return TokError(".amdhsa_next_free_vgpr directive is required");
4877 
4878   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4879     return TokError(".amdhsa_next_free_sgpr directive is required");
4880 
4881   unsigned VGPRBlocks;
4882   unsigned SGPRBlocks;
4883   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4884                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4885                          EnableWavefrontSize32, NextFreeVGPR,
4886                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4887                          SGPRBlocks))
4888     return true;
4889 
4890   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4891           VGPRBlocks))
4892     return OutOfRangeError(VGPRRange);
4893   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4894                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4895 
4896   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4897           SGPRBlocks))
4898     return OutOfRangeError(SGPRRange);
4899   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4900                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4901                   SGPRBlocks);
4902 
4903   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
4904     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
4905                     "enabled user SGPRs");
4906 
4907   unsigned UserSGPRCount =
4908       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
4909 
4910   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4911     return TokError("too many user SGPRs enabled");
4912   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4913                   UserSGPRCount);
4914 
4915   if (isGFX90A()) {
4916     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4917       return TokError(".amdhsa_accum_offset directive is required");
4918     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4919       return TokError("accum_offset should be in range [4..256] in "
4920                       "increments of 4");
4921     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4922       return TokError("accum_offset exceeds total VGPR allocation");
4923     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4924                     (AccumOffset / 4 - 1));
4925   }
4926 
4927   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4928       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4929       ReserveFlatScr);
4930   return false;
4931 }
4932 
4933 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4934   uint32_t Major;
4935   uint32_t Minor;
4936 
4937   if (ParseDirectiveMajorMinor(Major, Minor))
4938     return true;
4939 
4940   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4941   return false;
4942 }
4943 
4944 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4945   uint32_t Major;
4946   uint32_t Minor;
4947   uint32_t Stepping;
4948   StringRef VendorName;
4949   StringRef ArchName;
4950 
4951   // If this directive has no arguments, then use the ISA version for the
4952   // targeted GPU.
4953   if (isToken(AsmToken::EndOfStatement)) {
4954     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4955     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4956                                                         ISA.Stepping,
4957                                                         "AMD", "AMDGPU");
4958     return false;
4959   }
4960 
4961   if (ParseDirectiveMajorMinor(Major, Minor))
4962     return true;
4963 
4964   if (!trySkipToken(AsmToken::Comma))
4965     return TokError("stepping version number required, comma expected");
4966 
4967   if (ParseAsAbsoluteExpression(Stepping))
4968     return TokError("invalid stepping version");
4969 
4970   if (!trySkipToken(AsmToken::Comma))
4971     return TokError("vendor name required, comma expected");
4972 
4973   if (!parseString(VendorName, "invalid vendor name"))
4974     return true;
4975 
4976   if (!trySkipToken(AsmToken::Comma))
4977     return TokError("arch name required, comma expected");
4978 
4979   if (!parseString(ArchName, "invalid arch name"))
4980     return true;
4981 
4982   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4983                                                       VendorName, ArchName);
4984   return false;
4985 }
4986 
4987 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4988                                                amd_kernel_code_t &Header) {
4989   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4990   // assembly for backwards compatibility.
4991   if (ID == "max_scratch_backing_memory_byte_size") {
4992     Parser.eatToEndOfStatement();
4993     return false;
4994   }
4995 
4996   SmallString<40> ErrStr;
4997   raw_svector_ostream Err(ErrStr);
4998   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4999     return TokError(Err.str());
5000   }
5001   Lex();
5002 
5003   if (ID == "enable_wavefront_size32") {
5004     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5005       if (!isGFX10Plus())
5006         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5007       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5008         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5009     } else {
5010       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5011         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5012     }
5013   }
5014 
5015   if (ID == "wavefront_size") {
5016     if (Header.wavefront_size == 5) {
5017       if (!isGFX10Plus())
5018         return TokError("wavefront_size=5 is only allowed on GFX10+");
5019       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5020         return TokError("wavefront_size=5 requires +WavefrontSize32");
5021     } else if (Header.wavefront_size == 6) {
5022       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5023         return TokError("wavefront_size=6 requires +WavefrontSize64");
5024     }
5025   }
5026 
5027   if (ID == "enable_wgp_mode") {
5028     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5029         !isGFX10Plus())
5030       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5031   }
5032 
5033   if (ID == "enable_mem_ordered") {
5034     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5035         !isGFX10Plus())
5036       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5037   }
5038 
5039   if (ID == "enable_fwd_progress") {
5040     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5041         !isGFX10Plus())
5042       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5043   }
5044 
5045   return false;
5046 }
5047 
5048 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5049   amd_kernel_code_t Header;
5050   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5051 
5052   while (true) {
5053     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5054     // will set the current token to EndOfStatement.
5055     while(trySkipToken(AsmToken::EndOfStatement));
5056 
5057     StringRef ID;
5058     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5059       return true;
5060 
5061     if (ID == ".end_amd_kernel_code_t")
5062       break;
5063 
5064     if (ParseAMDKernelCodeTValue(ID, Header))
5065       return true;
5066   }
5067 
5068   getTargetStreamer().EmitAMDKernelCodeT(Header);
5069 
5070   return false;
5071 }
5072 
5073 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5074   StringRef KernelName;
5075   if (!parseId(KernelName, "expected symbol name"))
5076     return true;
5077 
5078   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5079                                            ELF::STT_AMDGPU_HSA_KERNEL);
5080 
5081   KernelScope.initialize(getContext());
5082   return false;
5083 }
5084 
5085 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5086   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5087     return Error(getLoc(),
5088                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5089                  "architectures");
5090   }
5091 
5092   auto TargetIDDirective = getLexer().getTok().getStringContents();
5093   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5094     return Error(getParser().getTok().getLoc(), "target id must match options");
5095 
5096   getTargetStreamer().EmitISAVersion();
5097   Lex();
5098 
5099   return false;
5100 }
5101 
5102 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5103   const char *AssemblerDirectiveBegin;
5104   const char *AssemblerDirectiveEnd;
5105   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5106       isHsaAbiVersion3AndAbove(&getSTI())
5107           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5108                             HSAMD::V3::AssemblerDirectiveEnd)
5109           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5110                             HSAMD::AssemblerDirectiveEnd);
5111 
5112   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5113     return Error(getLoc(),
5114                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5115                  "not available on non-amdhsa OSes")).str());
5116   }
5117 
5118   std::string HSAMetadataString;
5119   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5120                           HSAMetadataString))
5121     return true;
5122 
5123   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5124     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5125       return Error(getLoc(), "invalid HSA metadata");
5126   } else {
5127     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5128       return Error(getLoc(), "invalid HSA metadata");
5129   }
5130 
5131   return false;
5132 }
5133 
5134 /// Common code to parse out a block of text (typically YAML) between start and
5135 /// end directives.
5136 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5137                                           const char *AssemblerDirectiveEnd,
5138                                           std::string &CollectString) {
5139 
5140   raw_string_ostream CollectStream(CollectString);
5141 
5142   getLexer().setSkipSpace(false);
5143 
5144   bool FoundEnd = false;
5145   while (!isToken(AsmToken::Eof)) {
5146     while (isToken(AsmToken::Space)) {
5147       CollectStream << getTokenStr();
5148       Lex();
5149     }
5150 
5151     if (trySkipId(AssemblerDirectiveEnd)) {
5152       FoundEnd = true;
5153       break;
5154     }
5155 
5156     CollectStream << Parser.parseStringToEndOfStatement()
5157                   << getContext().getAsmInfo()->getSeparatorString();
5158 
5159     Parser.eatToEndOfStatement();
5160   }
5161 
5162   getLexer().setSkipSpace(true);
5163 
5164   if (isToken(AsmToken::Eof) && !FoundEnd) {
5165     return TokError(Twine("expected directive ") +
5166                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5167   }
5168 
5169   CollectStream.flush();
5170   return false;
5171 }
5172 
5173 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5174 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5175   std::string String;
5176   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5177                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5178     return true;
5179 
5180   auto PALMetadata = getTargetStreamer().getPALMetadata();
5181   if (!PALMetadata->setFromString(String))
5182     return Error(getLoc(), "invalid PAL metadata");
5183   return false;
5184 }
5185 
5186 /// Parse the assembler directive for old linear-format PAL metadata.
5187 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5188   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5189     return Error(getLoc(),
5190                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5191                  "not available on non-amdpal OSes")).str());
5192   }
5193 
5194   auto PALMetadata = getTargetStreamer().getPALMetadata();
5195   PALMetadata->setLegacy();
5196   for (;;) {
5197     uint32_t Key, Value;
5198     if (ParseAsAbsoluteExpression(Key)) {
5199       return TokError(Twine("invalid value in ") +
5200                       Twine(PALMD::AssemblerDirective));
5201     }
5202     if (!trySkipToken(AsmToken::Comma)) {
5203       return TokError(Twine("expected an even number of values in ") +
5204                       Twine(PALMD::AssemblerDirective));
5205     }
5206     if (ParseAsAbsoluteExpression(Value)) {
5207       return TokError(Twine("invalid value in ") +
5208                       Twine(PALMD::AssemblerDirective));
5209     }
5210     PALMetadata->setRegister(Key, Value);
5211     if (!trySkipToken(AsmToken::Comma))
5212       break;
5213   }
5214   return false;
5215 }
5216 
5217 /// ParseDirectiveAMDGPULDS
5218 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5219 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5220   if (getParser().checkForValidSection())
5221     return true;
5222 
5223   StringRef Name;
5224   SMLoc NameLoc = getLoc();
5225   if (getParser().parseIdentifier(Name))
5226     return TokError("expected identifier in directive");
5227 
5228   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5229   if (parseToken(AsmToken::Comma, "expected ','"))
5230     return true;
5231 
5232   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5233 
5234   int64_t Size;
5235   SMLoc SizeLoc = getLoc();
5236   if (getParser().parseAbsoluteExpression(Size))
5237     return true;
5238   if (Size < 0)
5239     return Error(SizeLoc, "size must be non-negative");
5240   if (Size > LocalMemorySize)
5241     return Error(SizeLoc, "size is too large");
5242 
5243   int64_t Alignment = 4;
5244   if (trySkipToken(AsmToken::Comma)) {
5245     SMLoc AlignLoc = getLoc();
5246     if (getParser().parseAbsoluteExpression(Alignment))
5247       return true;
5248     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5249       return Error(AlignLoc, "alignment must be a power of two");
5250 
5251     // Alignment larger than the size of LDS is possible in theory, as long
5252     // as the linker manages to place to symbol at address 0, but we do want
5253     // to make sure the alignment fits nicely into a 32-bit integer.
5254     if (Alignment >= 1u << 31)
5255       return Error(AlignLoc, "alignment is too large");
5256   }
5257 
5258   if (parseToken(AsmToken::EndOfStatement,
5259                  "unexpected token in '.amdgpu_lds' directive"))
5260     return true;
5261 
5262   Symbol->redefineIfPossible();
5263   if (!Symbol->isUndefined())
5264     return Error(NameLoc, "invalid symbol redefinition");
5265 
5266   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5267   return false;
5268 }
5269 
5270 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5271   StringRef IDVal = DirectiveID.getString();
5272 
5273   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5274     if (IDVal == ".amdhsa_kernel")
5275      return ParseDirectiveAMDHSAKernel();
5276 
5277     // TODO: Restructure/combine with PAL metadata directive.
5278     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5279       return ParseDirectiveHSAMetadata();
5280   } else {
5281     if (IDVal == ".hsa_code_object_version")
5282       return ParseDirectiveHSACodeObjectVersion();
5283 
5284     if (IDVal == ".hsa_code_object_isa")
5285       return ParseDirectiveHSACodeObjectISA();
5286 
5287     if (IDVal == ".amd_kernel_code_t")
5288       return ParseDirectiveAMDKernelCodeT();
5289 
5290     if (IDVal == ".amdgpu_hsa_kernel")
5291       return ParseDirectiveAMDGPUHsaKernel();
5292 
5293     if (IDVal == ".amd_amdgpu_isa")
5294       return ParseDirectiveISAVersion();
5295 
5296     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5297       return ParseDirectiveHSAMetadata();
5298   }
5299 
5300   if (IDVal == ".amdgcn_target")
5301     return ParseDirectiveAMDGCNTarget();
5302 
5303   if (IDVal == ".amdgpu_lds")
5304     return ParseDirectiveAMDGPULDS();
5305 
5306   if (IDVal == PALMD::AssemblerDirectiveBegin)
5307     return ParseDirectivePALMetadataBegin();
5308 
5309   if (IDVal == PALMD::AssemblerDirective)
5310     return ParseDirectivePALMetadata();
5311 
5312   return true;
5313 }
5314 
5315 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5316                                            unsigned RegNo) {
5317 
5318   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5319        R.isValid(); ++R) {
5320     if (*R == RegNo)
5321       return isGFX9Plus();
5322   }
5323 
5324   // GFX10 has 2 more SGPRs 104 and 105.
5325   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5326        R.isValid(); ++R) {
5327     if (*R == RegNo)
5328       return hasSGPR104_SGPR105();
5329   }
5330 
5331   switch (RegNo) {
5332   case AMDGPU::SRC_SHARED_BASE:
5333   case AMDGPU::SRC_SHARED_LIMIT:
5334   case AMDGPU::SRC_PRIVATE_BASE:
5335   case AMDGPU::SRC_PRIVATE_LIMIT:
5336   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5337     return isGFX9Plus();
5338   case AMDGPU::TBA:
5339   case AMDGPU::TBA_LO:
5340   case AMDGPU::TBA_HI:
5341   case AMDGPU::TMA:
5342   case AMDGPU::TMA_LO:
5343   case AMDGPU::TMA_HI:
5344     return !isGFX9Plus();
5345   case AMDGPU::XNACK_MASK:
5346   case AMDGPU::XNACK_MASK_LO:
5347   case AMDGPU::XNACK_MASK_HI:
5348     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5349   case AMDGPU::SGPR_NULL:
5350     return isGFX10Plus();
5351   default:
5352     break;
5353   }
5354 
5355   if (isCI())
5356     return true;
5357 
5358   if (isSI() || isGFX10Plus()) {
5359     // No flat_scr on SI.
5360     // On GFX10 flat scratch is not a valid register operand and can only be
5361     // accessed with s_setreg/s_getreg.
5362     switch (RegNo) {
5363     case AMDGPU::FLAT_SCR:
5364     case AMDGPU::FLAT_SCR_LO:
5365     case AMDGPU::FLAT_SCR_HI:
5366       return false;
5367     default:
5368       return true;
5369     }
5370   }
5371 
5372   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5373   // SI/CI have.
5374   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5375        R.isValid(); ++R) {
5376     if (*R == RegNo)
5377       return hasSGPR102_SGPR103();
5378   }
5379 
5380   return true;
5381 }
5382 
5383 OperandMatchResultTy
5384 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5385                               OperandMode Mode) {
5386   // Try to parse with a custom parser
5387   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5388 
5389   // If we successfully parsed the operand or if there as an error parsing,
5390   // we are done.
5391   //
5392   // If we are parsing after we reach EndOfStatement then this means we
5393   // are appending default values to the Operands list.  This is only done
5394   // by custom parser, so we shouldn't continue on to the generic parsing.
5395   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5396       isToken(AsmToken::EndOfStatement))
5397     return ResTy;
5398 
5399   SMLoc RBraceLoc;
5400   SMLoc LBraceLoc = getLoc();
5401   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5402     unsigned Prefix = Operands.size();
5403 
5404     for (;;) {
5405       auto Loc = getLoc();
5406       ResTy = parseReg(Operands);
5407       if (ResTy == MatchOperand_NoMatch)
5408         Error(Loc, "expected a register");
5409       if (ResTy != MatchOperand_Success)
5410         return MatchOperand_ParseFail;
5411 
5412       RBraceLoc = getLoc();
5413       if (trySkipToken(AsmToken::RBrac))
5414         break;
5415 
5416       if (!skipToken(AsmToken::Comma,
5417                      "expected a comma or a closing square bracket")) {
5418         return MatchOperand_ParseFail;
5419       }
5420     }
5421 
5422     if (Operands.size() - Prefix > 1) {
5423       Operands.insert(Operands.begin() + Prefix,
5424                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5425       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5426     }
5427 
5428     return MatchOperand_Success;
5429   }
5430 
5431   return parseRegOrImm(Operands);
5432 }
5433 
5434 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5435   // Clear any forced encodings from the previous instruction.
5436   setForcedEncodingSize(0);
5437   setForcedDPP(false);
5438   setForcedSDWA(false);
5439 
5440   if (Name.endswith("_e64")) {
5441     setForcedEncodingSize(64);
5442     return Name.substr(0, Name.size() - 4);
5443   } else if (Name.endswith("_e32")) {
5444     setForcedEncodingSize(32);
5445     return Name.substr(0, Name.size() - 4);
5446   } else if (Name.endswith("_dpp")) {
5447     setForcedDPP(true);
5448     return Name.substr(0, Name.size() - 4);
5449   } else if (Name.endswith("_sdwa")) {
5450     setForcedSDWA(true);
5451     return Name.substr(0, Name.size() - 5);
5452   }
5453   return Name;
5454 }
5455 
5456 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5457                                        StringRef Name,
5458                                        SMLoc NameLoc, OperandVector &Operands) {
5459   // Add the instruction mnemonic
5460   Name = parseMnemonicSuffix(Name);
5461   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5462 
5463   bool IsMIMG = Name.startswith("image_");
5464 
5465   while (!trySkipToken(AsmToken::EndOfStatement)) {
5466     OperandMode Mode = OperandMode_Default;
5467     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5468       Mode = OperandMode_NSA;
5469     CPolSeen = 0;
5470     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5471 
5472     if (Res != MatchOperand_Success) {
5473       checkUnsupportedInstruction(Name, NameLoc);
5474       if (!Parser.hasPendingError()) {
5475         // FIXME: use real operand location rather than the current location.
5476         StringRef Msg =
5477           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5478                                             "not a valid operand.";
5479         Error(getLoc(), Msg);
5480       }
5481       while (!trySkipToken(AsmToken::EndOfStatement)) {
5482         lex();
5483       }
5484       return true;
5485     }
5486 
5487     // Eat the comma or space if there is one.
5488     trySkipToken(AsmToken::Comma);
5489   }
5490 
5491   return false;
5492 }
5493 
5494 //===----------------------------------------------------------------------===//
5495 // Utility functions
5496 //===----------------------------------------------------------------------===//
5497 
5498 OperandMatchResultTy
5499 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5500 
5501   if (!trySkipId(Prefix, AsmToken::Colon))
5502     return MatchOperand_NoMatch;
5503 
5504   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5505 }
5506 
5507 OperandMatchResultTy
5508 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5509                                     AMDGPUOperand::ImmTy ImmTy,
5510                                     bool (*ConvertResult)(int64_t&)) {
5511   SMLoc S = getLoc();
5512   int64_t Value = 0;
5513 
5514   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5515   if (Res != MatchOperand_Success)
5516     return Res;
5517 
5518   if (ConvertResult && !ConvertResult(Value)) {
5519     Error(S, "invalid " + StringRef(Prefix) + " value.");
5520   }
5521 
5522   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5523   return MatchOperand_Success;
5524 }
5525 
5526 OperandMatchResultTy
5527 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5528                                              OperandVector &Operands,
5529                                              AMDGPUOperand::ImmTy ImmTy,
5530                                              bool (*ConvertResult)(int64_t&)) {
5531   SMLoc S = getLoc();
5532   if (!trySkipId(Prefix, AsmToken::Colon))
5533     return MatchOperand_NoMatch;
5534 
5535   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5536     return MatchOperand_ParseFail;
5537 
5538   unsigned Val = 0;
5539   const unsigned MaxSize = 4;
5540 
5541   // FIXME: How to verify the number of elements matches the number of src
5542   // operands?
5543   for (int I = 0; ; ++I) {
5544     int64_t Op;
5545     SMLoc Loc = getLoc();
5546     if (!parseExpr(Op))
5547       return MatchOperand_ParseFail;
5548 
5549     if (Op != 0 && Op != 1) {
5550       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5551       return MatchOperand_ParseFail;
5552     }
5553 
5554     Val |= (Op << I);
5555 
5556     if (trySkipToken(AsmToken::RBrac))
5557       break;
5558 
5559     if (I + 1 == MaxSize) {
5560       Error(getLoc(), "expected a closing square bracket");
5561       return MatchOperand_ParseFail;
5562     }
5563 
5564     if (!skipToken(AsmToken::Comma, "expected a comma"))
5565       return MatchOperand_ParseFail;
5566   }
5567 
5568   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5569   return MatchOperand_Success;
5570 }
5571 
5572 OperandMatchResultTy
5573 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5574                                AMDGPUOperand::ImmTy ImmTy) {
5575   int64_t Bit;
5576   SMLoc S = getLoc();
5577 
5578   if (trySkipId(Name)) {
5579     Bit = 1;
5580   } else if (trySkipId("no", Name)) {
5581     Bit = 0;
5582   } else {
5583     return MatchOperand_NoMatch;
5584   }
5585 
5586   if (Name == "r128" && !hasMIMG_R128()) {
5587     Error(S, "r128 modifier is not supported on this GPU");
5588     return MatchOperand_ParseFail;
5589   }
5590   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5591     Error(S, "a16 modifier is not supported on this GPU");
5592     return MatchOperand_ParseFail;
5593   }
5594 
5595   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5596     ImmTy = AMDGPUOperand::ImmTyR128A16;
5597 
5598   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5599   return MatchOperand_Success;
5600 }
5601 
5602 OperandMatchResultTy
5603 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5604   unsigned CPolOn = 0;
5605   unsigned CPolOff = 0;
5606   SMLoc S = getLoc();
5607 
5608   if (trySkipId("glc"))
5609     CPolOn = AMDGPU::CPol::GLC;
5610   else if (trySkipId("noglc"))
5611     CPolOff = AMDGPU::CPol::GLC;
5612   else if (trySkipId("slc"))
5613     CPolOn = AMDGPU::CPol::SLC;
5614   else if (trySkipId("noslc"))
5615     CPolOff = AMDGPU::CPol::SLC;
5616   else if (trySkipId("dlc"))
5617     CPolOn = AMDGPU::CPol::DLC;
5618   else if (trySkipId("nodlc"))
5619     CPolOff = AMDGPU::CPol::DLC;
5620   else if (trySkipId("scc"))
5621     CPolOn = AMDGPU::CPol::SCC;
5622   else if (trySkipId("noscc"))
5623     CPolOff = AMDGPU::CPol::SCC;
5624   else
5625     return MatchOperand_NoMatch;
5626 
5627   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5628     Error(S, "dlc modifier is not supported on this GPU");
5629     return MatchOperand_ParseFail;
5630   }
5631 
5632   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5633     Error(S, "scc modifier is not supported on this GPU");
5634     return MatchOperand_ParseFail;
5635   }
5636 
5637   if (CPolSeen & (CPolOn | CPolOff)) {
5638     Error(S, "duplicate cache policy modifier");
5639     return MatchOperand_ParseFail;
5640   }
5641 
5642   CPolSeen |= (CPolOn | CPolOff);
5643 
5644   for (unsigned I = 1; I != Operands.size(); ++I) {
5645     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5646     if (Op.isCPol()) {
5647       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5648       return MatchOperand_Success;
5649     }
5650   }
5651 
5652   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5653                                               AMDGPUOperand::ImmTyCPol));
5654 
5655   return MatchOperand_Success;
5656 }
5657 
5658 static void addOptionalImmOperand(
5659   MCInst& Inst, const OperandVector& Operands,
5660   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5661   AMDGPUOperand::ImmTy ImmT,
5662   int64_t Default = 0) {
5663   auto i = OptionalIdx.find(ImmT);
5664   if (i != OptionalIdx.end()) {
5665     unsigned Idx = i->second;
5666     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5667   } else {
5668     Inst.addOperand(MCOperand::createImm(Default));
5669   }
5670 }
5671 
5672 OperandMatchResultTy
5673 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5674                                        StringRef &Value,
5675                                        SMLoc &StringLoc) {
5676   if (!trySkipId(Prefix, AsmToken::Colon))
5677     return MatchOperand_NoMatch;
5678 
5679   StringLoc = getLoc();
5680   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5681                                                   : MatchOperand_ParseFail;
5682 }
5683 
5684 //===----------------------------------------------------------------------===//
5685 // MTBUF format
5686 //===----------------------------------------------------------------------===//
5687 
5688 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5689                                   int64_t MaxVal,
5690                                   int64_t &Fmt) {
5691   int64_t Val;
5692   SMLoc Loc = getLoc();
5693 
5694   auto Res = parseIntWithPrefix(Pref, Val);
5695   if (Res == MatchOperand_ParseFail)
5696     return false;
5697   if (Res == MatchOperand_NoMatch)
5698     return true;
5699 
5700   if (Val < 0 || Val > MaxVal) {
5701     Error(Loc, Twine("out of range ", StringRef(Pref)));
5702     return false;
5703   }
5704 
5705   Fmt = Val;
5706   return true;
5707 }
5708 
5709 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5710 // values to live in a joint format operand in the MCInst encoding.
5711 OperandMatchResultTy
5712 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5713   using namespace llvm::AMDGPU::MTBUFFormat;
5714 
5715   int64_t Dfmt = DFMT_UNDEF;
5716   int64_t Nfmt = NFMT_UNDEF;
5717 
5718   // dfmt and nfmt can appear in either order, and each is optional.
5719   for (int I = 0; I < 2; ++I) {
5720     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5721       return MatchOperand_ParseFail;
5722 
5723     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5724       return MatchOperand_ParseFail;
5725     }
5726     // Skip optional comma between dfmt/nfmt
5727     // but guard against 2 commas following each other.
5728     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5729         !peekToken().is(AsmToken::Comma)) {
5730       trySkipToken(AsmToken::Comma);
5731     }
5732   }
5733 
5734   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5735     return MatchOperand_NoMatch;
5736 
5737   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5738   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5739 
5740   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5741   return MatchOperand_Success;
5742 }
5743 
5744 OperandMatchResultTy
5745 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5746   using namespace llvm::AMDGPU::MTBUFFormat;
5747 
5748   int64_t Fmt = UFMT_UNDEF;
5749 
5750   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5751     return MatchOperand_ParseFail;
5752 
5753   if (Fmt == UFMT_UNDEF)
5754     return MatchOperand_NoMatch;
5755 
5756   Format = Fmt;
5757   return MatchOperand_Success;
5758 }
5759 
5760 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5761                                     int64_t &Nfmt,
5762                                     StringRef FormatStr,
5763                                     SMLoc Loc) {
5764   using namespace llvm::AMDGPU::MTBUFFormat;
5765   int64_t Format;
5766 
5767   Format = getDfmt(FormatStr);
5768   if (Format != DFMT_UNDEF) {
5769     Dfmt = Format;
5770     return true;
5771   }
5772 
5773   Format = getNfmt(FormatStr, getSTI());
5774   if (Format != NFMT_UNDEF) {
5775     Nfmt = Format;
5776     return true;
5777   }
5778 
5779   Error(Loc, "unsupported format");
5780   return false;
5781 }
5782 
5783 OperandMatchResultTy
5784 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5785                                           SMLoc FormatLoc,
5786                                           int64_t &Format) {
5787   using namespace llvm::AMDGPU::MTBUFFormat;
5788 
5789   int64_t Dfmt = DFMT_UNDEF;
5790   int64_t Nfmt = NFMT_UNDEF;
5791   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5792     return MatchOperand_ParseFail;
5793 
5794   if (trySkipToken(AsmToken::Comma)) {
5795     StringRef Str;
5796     SMLoc Loc = getLoc();
5797     if (!parseId(Str, "expected a format string") ||
5798         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5799       return MatchOperand_ParseFail;
5800     }
5801     if (Dfmt == DFMT_UNDEF) {
5802       Error(Loc, "duplicate numeric format");
5803       return MatchOperand_ParseFail;
5804     } else if (Nfmt == NFMT_UNDEF) {
5805       Error(Loc, "duplicate data format");
5806       return MatchOperand_ParseFail;
5807     }
5808   }
5809 
5810   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5811   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5812 
5813   if (isGFX10Plus()) {
5814     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5815     if (Ufmt == UFMT_UNDEF) {
5816       Error(FormatLoc, "unsupported format");
5817       return MatchOperand_ParseFail;
5818     }
5819     Format = Ufmt;
5820   } else {
5821     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5822   }
5823 
5824   return MatchOperand_Success;
5825 }
5826 
5827 OperandMatchResultTy
5828 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5829                                             SMLoc Loc,
5830                                             int64_t &Format) {
5831   using namespace llvm::AMDGPU::MTBUFFormat;
5832 
5833   auto Id = getUnifiedFormat(FormatStr);
5834   if (Id == UFMT_UNDEF)
5835     return MatchOperand_NoMatch;
5836 
5837   if (!isGFX10Plus()) {
5838     Error(Loc, "unified format is not supported on this GPU");
5839     return MatchOperand_ParseFail;
5840   }
5841 
5842   Format = Id;
5843   return MatchOperand_Success;
5844 }
5845 
5846 OperandMatchResultTy
5847 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5848   using namespace llvm::AMDGPU::MTBUFFormat;
5849   SMLoc Loc = getLoc();
5850 
5851   if (!parseExpr(Format))
5852     return MatchOperand_ParseFail;
5853   if (!isValidFormatEncoding(Format, getSTI())) {
5854     Error(Loc, "out of range format");
5855     return MatchOperand_ParseFail;
5856   }
5857 
5858   return MatchOperand_Success;
5859 }
5860 
5861 OperandMatchResultTy
5862 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5863   using namespace llvm::AMDGPU::MTBUFFormat;
5864 
5865   if (!trySkipId("format", AsmToken::Colon))
5866     return MatchOperand_NoMatch;
5867 
5868   if (trySkipToken(AsmToken::LBrac)) {
5869     StringRef FormatStr;
5870     SMLoc Loc = getLoc();
5871     if (!parseId(FormatStr, "expected a format string"))
5872       return MatchOperand_ParseFail;
5873 
5874     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5875     if (Res == MatchOperand_NoMatch)
5876       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5877     if (Res != MatchOperand_Success)
5878       return Res;
5879 
5880     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5881       return MatchOperand_ParseFail;
5882 
5883     return MatchOperand_Success;
5884   }
5885 
5886   return parseNumericFormat(Format);
5887 }
5888 
5889 OperandMatchResultTy
5890 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5891   using namespace llvm::AMDGPU::MTBUFFormat;
5892 
5893   int64_t Format = getDefaultFormatEncoding(getSTI());
5894   OperandMatchResultTy Res;
5895   SMLoc Loc = getLoc();
5896 
5897   // Parse legacy format syntax.
5898   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5899   if (Res == MatchOperand_ParseFail)
5900     return Res;
5901 
5902   bool FormatFound = (Res == MatchOperand_Success);
5903 
5904   Operands.push_back(
5905     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5906 
5907   if (FormatFound)
5908     trySkipToken(AsmToken::Comma);
5909 
5910   if (isToken(AsmToken::EndOfStatement)) {
5911     // We are expecting an soffset operand,
5912     // but let matcher handle the error.
5913     return MatchOperand_Success;
5914   }
5915 
5916   // Parse soffset.
5917   Res = parseRegOrImm(Operands);
5918   if (Res != MatchOperand_Success)
5919     return Res;
5920 
5921   trySkipToken(AsmToken::Comma);
5922 
5923   if (!FormatFound) {
5924     Res = parseSymbolicOrNumericFormat(Format);
5925     if (Res == MatchOperand_ParseFail)
5926       return Res;
5927     if (Res == MatchOperand_Success) {
5928       auto Size = Operands.size();
5929       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5930       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5931       Op.setImm(Format);
5932     }
5933     return MatchOperand_Success;
5934   }
5935 
5936   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5937     Error(getLoc(), "duplicate format");
5938     return MatchOperand_ParseFail;
5939   }
5940   return MatchOperand_Success;
5941 }
5942 
5943 //===----------------------------------------------------------------------===//
5944 // ds
5945 //===----------------------------------------------------------------------===//
5946 
5947 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5948                                     const OperandVector &Operands) {
5949   OptionalImmIndexMap OptionalIdx;
5950 
5951   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5952     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5953 
5954     // Add the register arguments
5955     if (Op.isReg()) {
5956       Op.addRegOperands(Inst, 1);
5957       continue;
5958     }
5959 
5960     // Handle optional arguments
5961     OptionalIdx[Op.getImmTy()] = i;
5962   }
5963 
5964   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5965   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5966   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5967 
5968   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5969 }
5970 
5971 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5972                                 bool IsGdsHardcoded) {
5973   OptionalImmIndexMap OptionalIdx;
5974 
5975   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5976     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5977 
5978     // Add the register arguments
5979     if (Op.isReg()) {
5980       Op.addRegOperands(Inst, 1);
5981       continue;
5982     }
5983 
5984     if (Op.isToken() && Op.getToken() == "gds") {
5985       IsGdsHardcoded = true;
5986       continue;
5987     }
5988 
5989     // Handle optional arguments
5990     OptionalIdx[Op.getImmTy()] = i;
5991   }
5992 
5993   AMDGPUOperand::ImmTy OffsetType =
5994     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5995      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5996      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5997                                                       AMDGPUOperand::ImmTyOffset;
5998 
5999   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6000 
6001   if (!IsGdsHardcoded) {
6002     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6003   }
6004   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6005 }
6006 
6007 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6008   OptionalImmIndexMap OptionalIdx;
6009 
6010   unsigned OperandIdx[4];
6011   unsigned EnMask = 0;
6012   int SrcIdx = 0;
6013 
6014   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6015     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6016 
6017     // Add the register arguments
6018     if (Op.isReg()) {
6019       assert(SrcIdx < 4);
6020       OperandIdx[SrcIdx] = Inst.size();
6021       Op.addRegOperands(Inst, 1);
6022       ++SrcIdx;
6023       continue;
6024     }
6025 
6026     if (Op.isOff()) {
6027       assert(SrcIdx < 4);
6028       OperandIdx[SrcIdx] = Inst.size();
6029       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6030       ++SrcIdx;
6031       continue;
6032     }
6033 
6034     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6035       Op.addImmOperands(Inst, 1);
6036       continue;
6037     }
6038 
6039     if (Op.isToken() && Op.getToken() == "done")
6040       continue;
6041 
6042     // Handle optional arguments
6043     OptionalIdx[Op.getImmTy()] = i;
6044   }
6045 
6046   assert(SrcIdx == 4);
6047 
6048   bool Compr = false;
6049   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6050     Compr = true;
6051     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6052     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6053     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6054   }
6055 
6056   for (auto i = 0; i < SrcIdx; ++i) {
6057     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6058       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6059     }
6060   }
6061 
6062   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6063   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6064 
6065   Inst.addOperand(MCOperand::createImm(EnMask));
6066 }
6067 
6068 //===----------------------------------------------------------------------===//
6069 // s_waitcnt
6070 //===----------------------------------------------------------------------===//
6071 
6072 static bool
6073 encodeCnt(
6074   const AMDGPU::IsaVersion ISA,
6075   int64_t &IntVal,
6076   int64_t CntVal,
6077   bool Saturate,
6078   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6079   unsigned (*decode)(const IsaVersion &Version, unsigned))
6080 {
6081   bool Failed = false;
6082 
6083   IntVal = encode(ISA, IntVal, CntVal);
6084   if (CntVal != decode(ISA, IntVal)) {
6085     if (Saturate) {
6086       IntVal = encode(ISA, IntVal, -1);
6087     } else {
6088       Failed = true;
6089     }
6090   }
6091   return Failed;
6092 }
6093 
6094 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6095 
6096   SMLoc CntLoc = getLoc();
6097   StringRef CntName = getTokenStr();
6098 
6099   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6100       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6101     return false;
6102 
6103   int64_t CntVal;
6104   SMLoc ValLoc = getLoc();
6105   if (!parseExpr(CntVal))
6106     return false;
6107 
6108   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6109 
6110   bool Failed = true;
6111   bool Sat = CntName.endswith("_sat");
6112 
6113   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6114     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6115   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6116     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6117   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6118     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6119   } else {
6120     Error(CntLoc, "invalid counter name " + CntName);
6121     return false;
6122   }
6123 
6124   if (Failed) {
6125     Error(ValLoc, "too large value for " + CntName);
6126     return false;
6127   }
6128 
6129   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6130     return false;
6131 
6132   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6133     if (isToken(AsmToken::EndOfStatement)) {
6134       Error(getLoc(), "expected a counter name");
6135       return false;
6136     }
6137   }
6138 
6139   return true;
6140 }
6141 
6142 OperandMatchResultTy
6143 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6144   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6145   int64_t Waitcnt = getWaitcntBitMask(ISA);
6146   SMLoc S = getLoc();
6147 
6148   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6149     while (!isToken(AsmToken::EndOfStatement)) {
6150       if (!parseCnt(Waitcnt))
6151         return MatchOperand_ParseFail;
6152     }
6153   } else {
6154     if (!parseExpr(Waitcnt))
6155       return MatchOperand_ParseFail;
6156   }
6157 
6158   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6159   return MatchOperand_Success;
6160 }
6161 
6162 bool
6163 AMDGPUOperand::isSWaitCnt() const {
6164   return isImm();
6165 }
6166 
6167 //===----------------------------------------------------------------------===//
6168 // hwreg
6169 //===----------------------------------------------------------------------===//
6170 
6171 bool
6172 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6173                                 OperandInfoTy &Offset,
6174                                 OperandInfoTy &Width) {
6175   using namespace llvm::AMDGPU::Hwreg;
6176 
6177   // The register may be specified by name or using a numeric code
6178   HwReg.Loc = getLoc();
6179   if (isToken(AsmToken::Identifier) &&
6180       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6181     HwReg.IsSymbolic = true;
6182     lex(); // skip register name
6183   } else if (!parseExpr(HwReg.Id, "a register name")) {
6184     return false;
6185   }
6186 
6187   if (trySkipToken(AsmToken::RParen))
6188     return true;
6189 
6190   // parse optional params
6191   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6192     return false;
6193 
6194   Offset.Loc = getLoc();
6195   if (!parseExpr(Offset.Id))
6196     return false;
6197 
6198   if (!skipToken(AsmToken::Comma, "expected a comma"))
6199     return false;
6200 
6201   Width.Loc = getLoc();
6202   return parseExpr(Width.Id) &&
6203          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6204 }
6205 
6206 bool
6207 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6208                                const OperandInfoTy &Offset,
6209                                const OperandInfoTy &Width) {
6210 
6211   using namespace llvm::AMDGPU::Hwreg;
6212 
6213   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6214     Error(HwReg.Loc,
6215           "specified hardware register is not supported on this GPU");
6216     return false;
6217   }
6218   if (!isValidHwreg(HwReg.Id)) {
6219     Error(HwReg.Loc,
6220           "invalid code of hardware register: only 6-bit values are legal");
6221     return false;
6222   }
6223   if (!isValidHwregOffset(Offset.Id)) {
6224     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6225     return false;
6226   }
6227   if (!isValidHwregWidth(Width.Id)) {
6228     Error(Width.Loc,
6229           "invalid bitfield width: only values from 1 to 32 are legal");
6230     return false;
6231   }
6232   return true;
6233 }
6234 
6235 OperandMatchResultTy
6236 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6237   using namespace llvm::AMDGPU::Hwreg;
6238 
6239   int64_t ImmVal = 0;
6240   SMLoc Loc = getLoc();
6241 
6242   if (trySkipId("hwreg", AsmToken::LParen)) {
6243     OperandInfoTy HwReg(ID_UNKNOWN_);
6244     OperandInfoTy Offset(OFFSET_DEFAULT_);
6245     OperandInfoTy Width(WIDTH_DEFAULT_);
6246     if (parseHwregBody(HwReg, Offset, Width) &&
6247         validateHwreg(HwReg, Offset, Width)) {
6248       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6249     } else {
6250       return MatchOperand_ParseFail;
6251     }
6252   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6253     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6254       Error(Loc, "invalid immediate: only 16-bit values are legal");
6255       return MatchOperand_ParseFail;
6256     }
6257   } else {
6258     return MatchOperand_ParseFail;
6259   }
6260 
6261   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6262   return MatchOperand_Success;
6263 }
6264 
6265 bool AMDGPUOperand::isHwreg() const {
6266   return isImmTy(ImmTyHwreg);
6267 }
6268 
6269 //===----------------------------------------------------------------------===//
6270 // sendmsg
6271 //===----------------------------------------------------------------------===//
6272 
6273 bool
6274 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6275                                   OperandInfoTy &Op,
6276                                   OperandInfoTy &Stream) {
6277   using namespace llvm::AMDGPU::SendMsg;
6278 
6279   Msg.Loc = getLoc();
6280   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6281     Msg.IsSymbolic = true;
6282     lex(); // skip message name
6283   } else if (!parseExpr(Msg.Id, "a message name")) {
6284     return false;
6285   }
6286 
6287   if (trySkipToken(AsmToken::Comma)) {
6288     Op.IsDefined = true;
6289     Op.Loc = getLoc();
6290     if (isToken(AsmToken::Identifier) &&
6291         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6292       lex(); // skip operation name
6293     } else if (!parseExpr(Op.Id, "an operation name")) {
6294       return false;
6295     }
6296 
6297     if (trySkipToken(AsmToken::Comma)) {
6298       Stream.IsDefined = true;
6299       Stream.Loc = getLoc();
6300       if (!parseExpr(Stream.Id))
6301         return false;
6302     }
6303   }
6304 
6305   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6306 }
6307 
6308 bool
6309 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6310                                  const OperandInfoTy &Op,
6311                                  const OperandInfoTy &Stream) {
6312   using namespace llvm::AMDGPU::SendMsg;
6313 
6314   // Validation strictness depends on whether message is specified
6315   // in a symbolc or in a numeric form. In the latter case
6316   // only encoding possibility is checked.
6317   bool Strict = Msg.IsSymbolic;
6318 
6319   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6320     Error(Msg.Loc, "invalid message id");
6321     return false;
6322   }
6323   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6324     if (Op.IsDefined) {
6325       Error(Op.Loc, "message does not support operations");
6326     } else {
6327       Error(Msg.Loc, "missing message operation");
6328     }
6329     return false;
6330   }
6331   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6332     Error(Op.Loc, "invalid operation id");
6333     return false;
6334   }
6335   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6336     Error(Stream.Loc, "message operation does not support streams");
6337     return false;
6338   }
6339   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6340     Error(Stream.Loc, "invalid message stream id");
6341     return false;
6342   }
6343   return true;
6344 }
6345 
6346 OperandMatchResultTy
6347 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6348   using namespace llvm::AMDGPU::SendMsg;
6349 
6350   int64_t ImmVal = 0;
6351   SMLoc Loc = getLoc();
6352 
6353   if (trySkipId("sendmsg", AsmToken::LParen)) {
6354     OperandInfoTy Msg(ID_UNKNOWN_);
6355     OperandInfoTy Op(OP_NONE_);
6356     OperandInfoTy Stream(STREAM_ID_NONE_);
6357     if (parseSendMsgBody(Msg, Op, Stream) &&
6358         validateSendMsg(Msg, Op, Stream)) {
6359       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6360     } else {
6361       return MatchOperand_ParseFail;
6362     }
6363   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6364     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6365       Error(Loc, "invalid immediate: only 16-bit values are legal");
6366       return MatchOperand_ParseFail;
6367     }
6368   } else {
6369     return MatchOperand_ParseFail;
6370   }
6371 
6372   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6373   return MatchOperand_Success;
6374 }
6375 
6376 bool AMDGPUOperand::isSendMsg() const {
6377   return isImmTy(ImmTySendMsg);
6378 }
6379 
6380 //===----------------------------------------------------------------------===//
6381 // v_interp
6382 //===----------------------------------------------------------------------===//
6383 
6384 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6385   StringRef Str;
6386   SMLoc S = getLoc();
6387 
6388   if (!parseId(Str))
6389     return MatchOperand_NoMatch;
6390 
6391   int Slot = StringSwitch<int>(Str)
6392     .Case("p10", 0)
6393     .Case("p20", 1)
6394     .Case("p0", 2)
6395     .Default(-1);
6396 
6397   if (Slot == -1) {
6398     Error(S, "invalid interpolation slot");
6399     return MatchOperand_ParseFail;
6400   }
6401 
6402   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6403                                               AMDGPUOperand::ImmTyInterpSlot));
6404   return MatchOperand_Success;
6405 }
6406 
6407 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6408   StringRef Str;
6409   SMLoc S = getLoc();
6410 
6411   if (!parseId(Str))
6412     return MatchOperand_NoMatch;
6413 
6414   if (!Str.startswith("attr")) {
6415     Error(S, "invalid interpolation attribute");
6416     return MatchOperand_ParseFail;
6417   }
6418 
6419   StringRef Chan = Str.take_back(2);
6420   int AttrChan = StringSwitch<int>(Chan)
6421     .Case(".x", 0)
6422     .Case(".y", 1)
6423     .Case(".z", 2)
6424     .Case(".w", 3)
6425     .Default(-1);
6426   if (AttrChan == -1) {
6427     Error(S, "invalid or missing interpolation attribute channel");
6428     return MatchOperand_ParseFail;
6429   }
6430 
6431   Str = Str.drop_back(2).drop_front(4);
6432 
6433   uint8_t Attr;
6434   if (Str.getAsInteger(10, Attr)) {
6435     Error(S, "invalid or missing interpolation attribute number");
6436     return MatchOperand_ParseFail;
6437   }
6438 
6439   if (Attr > 63) {
6440     Error(S, "out of bounds interpolation attribute number");
6441     return MatchOperand_ParseFail;
6442   }
6443 
6444   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6445 
6446   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6447                                               AMDGPUOperand::ImmTyInterpAttr));
6448   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6449                                               AMDGPUOperand::ImmTyAttrChan));
6450   return MatchOperand_Success;
6451 }
6452 
6453 //===----------------------------------------------------------------------===//
6454 // exp
6455 //===----------------------------------------------------------------------===//
6456 
6457 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6458   using namespace llvm::AMDGPU::Exp;
6459 
6460   StringRef Str;
6461   SMLoc S = getLoc();
6462 
6463   if (!parseId(Str))
6464     return MatchOperand_NoMatch;
6465 
6466   unsigned Id = getTgtId(Str);
6467   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6468     Error(S, (Id == ET_INVALID) ?
6469                 "invalid exp target" :
6470                 "exp target is not supported on this GPU");
6471     return MatchOperand_ParseFail;
6472   }
6473 
6474   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6475                                               AMDGPUOperand::ImmTyExpTgt));
6476   return MatchOperand_Success;
6477 }
6478 
6479 //===----------------------------------------------------------------------===//
6480 // parser helpers
6481 //===----------------------------------------------------------------------===//
6482 
6483 bool
6484 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6485   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6486 }
6487 
6488 bool
6489 AMDGPUAsmParser::isId(const StringRef Id) const {
6490   return isId(getToken(), Id);
6491 }
6492 
6493 bool
6494 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6495   return getTokenKind() == Kind;
6496 }
6497 
6498 bool
6499 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6500   if (isId(Id)) {
6501     lex();
6502     return true;
6503   }
6504   return false;
6505 }
6506 
6507 bool
6508 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6509   if (isToken(AsmToken::Identifier)) {
6510     StringRef Tok = getTokenStr();
6511     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6512       lex();
6513       return true;
6514     }
6515   }
6516   return false;
6517 }
6518 
6519 bool
6520 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6521   if (isId(Id) && peekToken().is(Kind)) {
6522     lex();
6523     lex();
6524     return true;
6525   }
6526   return false;
6527 }
6528 
6529 bool
6530 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6531   if (isToken(Kind)) {
6532     lex();
6533     return true;
6534   }
6535   return false;
6536 }
6537 
6538 bool
6539 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6540                            const StringRef ErrMsg) {
6541   if (!trySkipToken(Kind)) {
6542     Error(getLoc(), ErrMsg);
6543     return false;
6544   }
6545   return true;
6546 }
6547 
6548 bool
6549 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6550   SMLoc S = getLoc();
6551 
6552   const MCExpr *Expr;
6553   if (Parser.parseExpression(Expr))
6554     return false;
6555 
6556   if (Expr->evaluateAsAbsolute(Imm))
6557     return true;
6558 
6559   if (Expected.empty()) {
6560     Error(S, "expected absolute expression");
6561   } else {
6562     Error(S, Twine("expected ", Expected) +
6563              Twine(" or an absolute expression"));
6564   }
6565   return false;
6566 }
6567 
6568 bool
6569 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6570   SMLoc S = getLoc();
6571 
6572   const MCExpr *Expr;
6573   if (Parser.parseExpression(Expr))
6574     return false;
6575 
6576   int64_t IntVal;
6577   if (Expr->evaluateAsAbsolute(IntVal)) {
6578     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6579   } else {
6580     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6581   }
6582   return true;
6583 }
6584 
6585 bool
6586 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6587   if (isToken(AsmToken::String)) {
6588     Val = getToken().getStringContents();
6589     lex();
6590     return true;
6591   } else {
6592     Error(getLoc(), ErrMsg);
6593     return false;
6594   }
6595 }
6596 
6597 bool
6598 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6599   if (isToken(AsmToken::Identifier)) {
6600     Val = getTokenStr();
6601     lex();
6602     return true;
6603   } else {
6604     if (!ErrMsg.empty())
6605       Error(getLoc(), ErrMsg);
6606     return false;
6607   }
6608 }
6609 
6610 AsmToken
6611 AMDGPUAsmParser::getToken() const {
6612   return Parser.getTok();
6613 }
6614 
6615 AsmToken
6616 AMDGPUAsmParser::peekToken() {
6617   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6618 }
6619 
6620 void
6621 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6622   auto TokCount = getLexer().peekTokens(Tokens);
6623 
6624   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6625     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6626 }
6627 
6628 AsmToken::TokenKind
6629 AMDGPUAsmParser::getTokenKind() const {
6630   return getLexer().getKind();
6631 }
6632 
6633 SMLoc
6634 AMDGPUAsmParser::getLoc() const {
6635   return getToken().getLoc();
6636 }
6637 
6638 StringRef
6639 AMDGPUAsmParser::getTokenStr() const {
6640   return getToken().getString();
6641 }
6642 
6643 void
6644 AMDGPUAsmParser::lex() {
6645   Parser.Lex();
6646 }
6647 
6648 SMLoc
6649 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6650                                const OperandVector &Operands) const {
6651   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6652     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6653     if (Test(Op))
6654       return Op.getStartLoc();
6655   }
6656   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6657 }
6658 
6659 SMLoc
6660 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6661                            const OperandVector &Operands) const {
6662   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6663   return getOperandLoc(Test, Operands);
6664 }
6665 
6666 SMLoc
6667 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6668                            const OperandVector &Operands) const {
6669   auto Test = [=](const AMDGPUOperand& Op) {
6670     return Op.isRegKind() && Op.getReg() == Reg;
6671   };
6672   return getOperandLoc(Test, Operands);
6673 }
6674 
6675 SMLoc
6676 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6677   auto Test = [](const AMDGPUOperand& Op) {
6678     return Op.IsImmKindLiteral() || Op.isExpr();
6679   };
6680   return getOperandLoc(Test, Operands);
6681 }
6682 
6683 SMLoc
6684 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6685   auto Test = [](const AMDGPUOperand& Op) {
6686     return Op.isImmKindConst();
6687   };
6688   return getOperandLoc(Test, Operands);
6689 }
6690 
6691 //===----------------------------------------------------------------------===//
6692 // swizzle
6693 //===----------------------------------------------------------------------===//
6694 
6695 LLVM_READNONE
6696 static unsigned
6697 encodeBitmaskPerm(const unsigned AndMask,
6698                   const unsigned OrMask,
6699                   const unsigned XorMask) {
6700   using namespace llvm::AMDGPU::Swizzle;
6701 
6702   return BITMASK_PERM_ENC |
6703          (AndMask << BITMASK_AND_SHIFT) |
6704          (OrMask  << BITMASK_OR_SHIFT)  |
6705          (XorMask << BITMASK_XOR_SHIFT);
6706 }
6707 
6708 bool
6709 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6710                                      const unsigned MinVal,
6711                                      const unsigned MaxVal,
6712                                      const StringRef ErrMsg,
6713                                      SMLoc &Loc) {
6714   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6715     return false;
6716   }
6717   Loc = getLoc();
6718   if (!parseExpr(Op)) {
6719     return false;
6720   }
6721   if (Op < MinVal || Op > MaxVal) {
6722     Error(Loc, ErrMsg);
6723     return false;
6724   }
6725 
6726   return true;
6727 }
6728 
6729 bool
6730 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6731                                       const unsigned MinVal,
6732                                       const unsigned MaxVal,
6733                                       const StringRef ErrMsg) {
6734   SMLoc Loc;
6735   for (unsigned i = 0; i < OpNum; ++i) {
6736     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6737       return false;
6738   }
6739 
6740   return true;
6741 }
6742 
6743 bool
6744 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6745   using namespace llvm::AMDGPU::Swizzle;
6746 
6747   int64_t Lane[LANE_NUM];
6748   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6749                            "expected a 2-bit lane id")) {
6750     Imm = QUAD_PERM_ENC;
6751     for (unsigned I = 0; I < LANE_NUM; ++I) {
6752       Imm |= Lane[I] << (LANE_SHIFT * I);
6753     }
6754     return true;
6755   }
6756   return false;
6757 }
6758 
6759 bool
6760 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6761   using namespace llvm::AMDGPU::Swizzle;
6762 
6763   SMLoc Loc;
6764   int64_t GroupSize;
6765   int64_t LaneIdx;
6766 
6767   if (!parseSwizzleOperand(GroupSize,
6768                            2, 32,
6769                            "group size must be in the interval [2,32]",
6770                            Loc)) {
6771     return false;
6772   }
6773   if (!isPowerOf2_64(GroupSize)) {
6774     Error(Loc, "group size must be a power of two");
6775     return false;
6776   }
6777   if (parseSwizzleOperand(LaneIdx,
6778                           0, GroupSize - 1,
6779                           "lane id must be in the interval [0,group size - 1]",
6780                           Loc)) {
6781     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6782     return true;
6783   }
6784   return false;
6785 }
6786 
6787 bool
6788 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6789   using namespace llvm::AMDGPU::Swizzle;
6790 
6791   SMLoc Loc;
6792   int64_t GroupSize;
6793 
6794   if (!parseSwizzleOperand(GroupSize,
6795                            2, 32,
6796                            "group size must be in the interval [2,32]",
6797                            Loc)) {
6798     return false;
6799   }
6800   if (!isPowerOf2_64(GroupSize)) {
6801     Error(Loc, "group size must be a power of two");
6802     return false;
6803   }
6804 
6805   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6806   return true;
6807 }
6808 
6809 bool
6810 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6811   using namespace llvm::AMDGPU::Swizzle;
6812 
6813   SMLoc Loc;
6814   int64_t GroupSize;
6815 
6816   if (!parseSwizzleOperand(GroupSize,
6817                            1, 16,
6818                            "group size must be in the interval [1,16]",
6819                            Loc)) {
6820     return false;
6821   }
6822   if (!isPowerOf2_64(GroupSize)) {
6823     Error(Loc, "group size must be a power of two");
6824     return false;
6825   }
6826 
6827   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6828   return true;
6829 }
6830 
6831 bool
6832 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6833   using namespace llvm::AMDGPU::Swizzle;
6834 
6835   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6836     return false;
6837   }
6838 
6839   StringRef Ctl;
6840   SMLoc StrLoc = getLoc();
6841   if (!parseString(Ctl)) {
6842     return false;
6843   }
6844   if (Ctl.size() != BITMASK_WIDTH) {
6845     Error(StrLoc, "expected a 5-character mask");
6846     return false;
6847   }
6848 
6849   unsigned AndMask = 0;
6850   unsigned OrMask = 0;
6851   unsigned XorMask = 0;
6852 
6853   for (size_t i = 0; i < Ctl.size(); ++i) {
6854     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6855     switch(Ctl[i]) {
6856     default:
6857       Error(StrLoc, "invalid mask");
6858       return false;
6859     case '0':
6860       break;
6861     case '1':
6862       OrMask |= Mask;
6863       break;
6864     case 'p':
6865       AndMask |= Mask;
6866       break;
6867     case 'i':
6868       AndMask |= Mask;
6869       XorMask |= Mask;
6870       break;
6871     }
6872   }
6873 
6874   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6875   return true;
6876 }
6877 
6878 bool
6879 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6880 
6881   SMLoc OffsetLoc = getLoc();
6882 
6883   if (!parseExpr(Imm, "a swizzle macro")) {
6884     return false;
6885   }
6886   if (!isUInt<16>(Imm)) {
6887     Error(OffsetLoc, "expected a 16-bit offset");
6888     return false;
6889   }
6890   return true;
6891 }
6892 
6893 bool
6894 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6895   using namespace llvm::AMDGPU::Swizzle;
6896 
6897   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6898 
6899     SMLoc ModeLoc = getLoc();
6900     bool Ok = false;
6901 
6902     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6903       Ok = parseSwizzleQuadPerm(Imm);
6904     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6905       Ok = parseSwizzleBitmaskPerm(Imm);
6906     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6907       Ok = parseSwizzleBroadcast(Imm);
6908     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6909       Ok = parseSwizzleSwap(Imm);
6910     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6911       Ok = parseSwizzleReverse(Imm);
6912     } else {
6913       Error(ModeLoc, "expected a swizzle mode");
6914     }
6915 
6916     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6917   }
6918 
6919   return false;
6920 }
6921 
6922 OperandMatchResultTy
6923 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6924   SMLoc S = getLoc();
6925   int64_t Imm = 0;
6926 
6927   if (trySkipId("offset")) {
6928 
6929     bool Ok = false;
6930     if (skipToken(AsmToken::Colon, "expected a colon")) {
6931       if (trySkipId("swizzle")) {
6932         Ok = parseSwizzleMacro(Imm);
6933       } else {
6934         Ok = parseSwizzleOffset(Imm);
6935       }
6936     }
6937 
6938     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6939 
6940     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6941   } else {
6942     // Swizzle "offset" operand is optional.
6943     // If it is omitted, try parsing other optional operands.
6944     return parseOptionalOpr(Operands);
6945   }
6946 }
6947 
6948 bool
6949 AMDGPUOperand::isSwizzle() const {
6950   return isImmTy(ImmTySwizzle);
6951 }
6952 
6953 //===----------------------------------------------------------------------===//
6954 // VGPR Index Mode
6955 //===----------------------------------------------------------------------===//
6956 
6957 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6958 
6959   using namespace llvm::AMDGPU::VGPRIndexMode;
6960 
6961   if (trySkipToken(AsmToken::RParen)) {
6962     return OFF;
6963   }
6964 
6965   int64_t Imm = 0;
6966 
6967   while (true) {
6968     unsigned Mode = 0;
6969     SMLoc S = getLoc();
6970 
6971     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6972       if (trySkipId(IdSymbolic[ModeId])) {
6973         Mode = 1 << ModeId;
6974         break;
6975       }
6976     }
6977 
6978     if (Mode == 0) {
6979       Error(S, (Imm == 0)?
6980                "expected a VGPR index mode or a closing parenthesis" :
6981                "expected a VGPR index mode");
6982       return UNDEF;
6983     }
6984 
6985     if (Imm & Mode) {
6986       Error(S, "duplicate VGPR index mode");
6987       return UNDEF;
6988     }
6989     Imm |= Mode;
6990 
6991     if (trySkipToken(AsmToken::RParen))
6992       break;
6993     if (!skipToken(AsmToken::Comma,
6994                    "expected a comma or a closing parenthesis"))
6995       return UNDEF;
6996   }
6997 
6998   return Imm;
6999 }
7000 
7001 OperandMatchResultTy
7002 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7003 
7004   using namespace llvm::AMDGPU::VGPRIndexMode;
7005 
7006   int64_t Imm = 0;
7007   SMLoc S = getLoc();
7008 
7009   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7010     Imm = parseGPRIdxMacro();
7011     if (Imm == UNDEF)
7012       return MatchOperand_ParseFail;
7013   } else {
7014     if (getParser().parseAbsoluteExpression(Imm))
7015       return MatchOperand_ParseFail;
7016     if (Imm < 0 || !isUInt<4>(Imm)) {
7017       Error(S, "invalid immediate: only 4-bit values are legal");
7018       return MatchOperand_ParseFail;
7019     }
7020   }
7021 
7022   Operands.push_back(
7023       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7024   return MatchOperand_Success;
7025 }
7026 
7027 bool AMDGPUOperand::isGPRIdxMode() const {
7028   return isImmTy(ImmTyGprIdxMode);
7029 }
7030 
7031 //===----------------------------------------------------------------------===//
7032 // sopp branch targets
7033 //===----------------------------------------------------------------------===//
7034 
7035 OperandMatchResultTy
7036 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7037 
7038   // Make sure we are not parsing something
7039   // that looks like a label or an expression but is not.
7040   // This will improve error messages.
7041   if (isRegister() || isModifier())
7042     return MatchOperand_NoMatch;
7043 
7044   if (!parseExpr(Operands))
7045     return MatchOperand_ParseFail;
7046 
7047   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7048   assert(Opr.isImm() || Opr.isExpr());
7049   SMLoc Loc = Opr.getStartLoc();
7050 
7051   // Currently we do not support arbitrary expressions as branch targets.
7052   // Only labels and absolute expressions are accepted.
7053   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7054     Error(Loc, "expected an absolute expression or a label");
7055   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7056     Error(Loc, "expected a 16-bit signed jump offset");
7057   }
7058 
7059   return MatchOperand_Success;
7060 }
7061 
7062 //===----------------------------------------------------------------------===//
7063 // Boolean holding registers
7064 //===----------------------------------------------------------------------===//
7065 
7066 OperandMatchResultTy
7067 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7068   return parseReg(Operands);
7069 }
7070 
7071 //===----------------------------------------------------------------------===//
7072 // mubuf
7073 //===----------------------------------------------------------------------===//
7074 
7075 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7076   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7077 }
7078 
7079 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7080                                    const OperandVector &Operands,
7081                                    bool IsAtomic,
7082                                    bool IsLds) {
7083   bool IsLdsOpcode = IsLds;
7084   bool HasLdsModifier = false;
7085   OptionalImmIndexMap OptionalIdx;
7086   unsigned FirstOperandIdx = 1;
7087   bool IsAtomicReturn = false;
7088 
7089   if (IsAtomic) {
7090     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7091       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7092       if (!Op.isCPol())
7093         continue;
7094       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7095       break;
7096     }
7097 
7098     if (!IsAtomicReturn) {
7099       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7100       if (NewOpc != -1)
7101         Inst.setOpcode(NewOpc);
7102     }
7103 
7104     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7105                       SIInstrFlags::IsAtomicRet;
7106   }
7107 
7108   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7109     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7110 
7111     // Add the register arguments
7112     if (Op.isReg()) {
7113       Op.addRegOperands(Inst, 1);
7114       // Insert a tied src for atomic return dst.
7115       // This cannot be postponed as subsequent calls to
7116       // addImmOperands rely on correct number of MC operands.
7117       if (IsAtomicReturn && i == FirstOperandIdx)
7118         Op.addRegOperands(Inst, 1);
7119       continue;
7120     }
7121 
7122     // Handle the case where soffset is an immediate
7123     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7124       Op.addImmOperands(Inst, 1);
7125       continue;
7126     }
7127 
7128     HasLdsModifier |= Op.isLDS();
7129 
7130     // Handle tokens like 'offen' which are sometimes hard-coded into the
7131     // asm string.  There are no MCInst operands for these.
7132     if (Op.isToken()) {
7133       continue;
7134     }
7135     assert(Op.isImm());
7136 
7137     // Handle optional arguments
7138     OptionalIdx[Op.getImmTy()] = i;
7139   }
7140 
7141   // This is a workaround for an llvm quirk which may result in an
7142   // incorrect instruction selection. Lds and non-lds versions of
7143   // MUBUF instructions are identical except that lds versions
7144   // have mandatory 'lds' modifier. However this modifier follows
7145   // optional modifiers and llvm asm matcher regards this 'lds'
7146   // modifier as an optional one. As a result, an lds version
7147   // of opcode may be selected even if it has no 'lds' modifier.
7148   if (IsLdsOpcode && !HasLdsModifier) {
7149     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7150     if (NoLdsOpcode != -1) { // Got lds version - correct it.
7151       Inst.setOpcode(NoLdsOpcode);
7152       IsLdsOpcode = false;
7153     }
7154   }
7155 
7156   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7157   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7158 
7159   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7160     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7161   }
7162   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7163 }
7164 
7165 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7166   OptionalImmIndexMap OptionalIdx;
7167 
7168   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7169     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7170 
7171     // Add the register arguments
7172     if (Op.isReg()) {
7173       Op.addRegOperands(Inst, 1);
7174       continue;
7175     }
7176 
7177     // Handle the case where soffset is an immediate
7178     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7179       Op.addImmOperands(Inst, 1);
7180       continue;
7181     }
7182 
7183     // Handle tokens like 'offen' which are sometimes hard-coded into the
7184     // asm string.  There are no MCInst operands for these.
7185     if (Op.isToken()) {
7186       continue;
7187     }
7188     assert(Op.isImm());
7189 
7190     // Handle optional arguments
7191     OptionalIdx[Op.getImmTy()] = i;
7192   }
7193 
7194   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7195                         AMDGPUOperand::ImmTyOffset);
7196   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7197   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7198   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7199   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7200 }
7201 
7202 //===----------------------------------------------------------------------===//
7203 // mimg
7204 //===----------------------------------------------------------------------===//
7205 
7206 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7207                               bool IsAtomic) {
7208   unsigned I = 1;
7209   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7210   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7211     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7212   }
7213 
7214   if (IsAtomic) {
7215     // Add src, same as dst
7216     assert(Desc.getNumDefs() == 1);
7217     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7218   }
7219 
7220   OptionalImmIndexMap OptionalIdx;
7221 
7222   for (unsigned E = Operands.size(); I != E; ++I) {
7223     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7224 
7225     // Add the register arguments
7226     if (Op.isReg()) {
7227       Op.addRegOperands(Inst, 1);
7228     } else if (Op.isImmModifier()) {
7229       OptionalIdx[Op.getImmTy()] = I;
7230     } else if (!Op.isToken()) {
7231       llvm_unreachable("unexpected operand type");
7232     }
7233   }
7234 
7235   bool IsGFX10Plus = isGFX10Plus();
7236 
7237   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7238   if (IsGFX10Plus)
7239     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7240   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7241   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7242   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7243   if (IsGFX10Plus)
7244     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7245   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7246     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7247   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7248   if (!IsGFX10Plus)
7249     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7250   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7251 }
7252 
7253 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7254   cvtMIMG(Inst, Operands, true);
7255 }
7256 
7257 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7258   OptionalImmIndexMap OptionalIdx;
7259   bool IsAtomicReturn = false;
7260 
7261   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7262     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7263     if (!Op.isCPol())
7264       continue;
7265     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7266     break;
7267   }
7268 
7269   if (!IsAtomicReturn) {
7270     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7271     if (NewOpc != -1)
7272       Inst.setOpcode(NewOpc);
7273   }
7274 
7275   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7276                     SIInstrFlags::IsAtomicRet;
7277 
7278   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7279     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7280 
7281     // Add the register arguments
7282     if (Op.isReg()) {
7283       Op.addRegOperands(Inst, 1);
7284       if (IsAtomicReturn && i == 1)
7285         Op.addRegOperands(Inst, 1);
7286       continue;
7287     }
7288 
7289     // Handle the case where soffset is an immediate
7290     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7291       Op.addImmOperands(Inst, 1);
7292       continue;
7293     }
7294 
7295     // Handle tokens like 'offen' which are sometimes hard-coded into the
7296     // asm string.  There are no MCInst operands for these.
7297     if (Op.isToken()) {
7298       continue;
7299     }
7300     assert(Op.isImm());
7301 
7302     // Handle optional arguments
7303     OptionalIdx[Op.getImmTy()] = i;
7304   }
7305 
7306   if ((int)Inst.getNumOperands() <=
7307       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7308     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7309   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7310 }
7311 
7312 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7313                                       const OperandVector &Operands) {
7314   for (unsigned I = 1; I < Operands.size(); ++I) {
7315     auto &Operand = (AMDGPUOperand &)*Operands[I];
7316     if (Operand.isReg())
7317       Operand.addRegOperands(Inst, 1);
7318   }
7319 
7320   Inst.addOperand(MCOperand::createImm(1)); // a16
7321 }
7322 
7323 //===----------------------------------------------------------------------===//
7324 // smrd
7325 //===----------------------------------------------------------------------===//
7326 
7327 bool AMDGPUOperand::isSMRDOffset8() const {
7328   return isImm() && isUInt<8>(getImm());
7329 }
7330 
7331 bool AMDGPUOperand::isSMEMOffset() const {
7332   return isImm(); // Offset range is checked later by validator.
7333 }
7334 
7335 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7336   // 32-bit literals are only supported on CI and we only want to use them
7337   // when the offset is > 8-bits.
7338   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7339 }
7340 
7341 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7342   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7343 }
7344 
7345 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7346   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7347 }
7348 
7349 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7350   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7351 }
7352 
7353 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7354   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7355 }
7356 
7357 //===----------------------------------------------------------------------===//
7358 // vop3
7359 //===----------------------------------------------------------------------===//
7360 
7361 static bool ConvertOmodMul(int64_t &Mul) {
7362   if (Mul != 1 && Mul != 2 && Mul != 4)
7363     return false;
7364 
7365   Mul >>= 1;
7366   return true;
7367 }
7368 
7369 static bool ConvertOmodDiv(int64_t &Div) {
7370   if (Div == 1) {
7371     Div = 0;
7372     return true;
7373   }
7374 
7375   if (Div == 2) {
7376     Div = 3;
7377     return true;
7378   }
7379 
7380   return false;
7381 }
7382 
7383 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7384 // This is intentional and ensures compatibility with sp3.
7385 // See bug 35397 for details.
7386 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7387   if (BoundCtrl == 0 || BoundCtrl == 1) {
7388     BoundCtrl = 1;
7389     return true;
7390   }
7391   return false;
7392 }
7393 
7394 // Note: the order in this table matches the order of operands in AsmString.
7395 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7396   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7397   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7398   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7399   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7400   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7401   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7402   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7403   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7404   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7405   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7406   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7407   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7408   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7409   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7410   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7411   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7412   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7413   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7414   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7415   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7416   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7417   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7418   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7419   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7420   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7421   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7422   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7423   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7424   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7425   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7426   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7427   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7428   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7429   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7430   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7431   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7432   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7433   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7434   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7435   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7436   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7437 };
7438 
7439 void AMDGPUAsmParser::onBeginOfFile() {
7440   if (!getParser().getStreamer().getTargetStreamer() ||
7441       getSTI().getTargetTriple().getArch() == Triple::r600)
7442     return;
7443 
7444   if (!getTargetStreamer().getTargetID())
7445     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7446 
7447   if (isHsaAbiVersion3AndAbove(&getSTI()))
7448     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7449 }
7450 
7451 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7452 
7453   OperandMatchResultTy res = parseOptionalOpr(Operands);
7454 
7455   // This is a hack to enable hardcoded mandatory operands which follow
7456   // optional operands.
7457   //
7458   // Current design assumes that all operands after the first optional operand
7459   // are also optional. However implementation of some instructions violates
7460   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7461   //
7462   // To alleviate this problem, we have to (implicitly) parse extra operands
7463   // to make sure autogenerated parser of custom operands never hit hardcoded
7464   // mandatory operands.
7465 
7466   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7467     if (res != MatchOperand_Success ||
7468         isToken(AsmToken::EndOfStatement))
7469       break;
7470 
7471     trySkipToken(AsmToken::Comma);
7472     res = parseOptionalOpr(Operands);
7473   }
7474 
7475   return res;
7476 }
7477 
7478 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7479   OperandMatchResultTy res;
7480   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7481     // try to parse any optional operand here
7482     if (Op.IsBit) {
7483       res = parseNamedBit(Op.Name, Operands, Op.Type);
7484     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7485       res = parseOModOperand(Operands);
7486     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7487                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7488                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7489       res = parseSDWASel(Operands, Op.Name, Op.Type);
7490     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7491       res = parseSDWADstUnused(Operands);
7492     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7493                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7494                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7495                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7496       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7497                                         Op.ConvertResult);
7498     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7499       res = parseDim(Operands);
7500     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7501       res = parseCPol(Operands);
7502     } else {
7503       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7504     }
7505     if (res != MatchOperand_NoMatch) {
7506       return res;
7507     }
7508   }
7509   return MatchOperand_NoMatch;
7510 }
7511 
7512 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7513   StringRef Name = getTokenStr();
7514   if (Name == "mul") {
7515     return parseIntWithPrefix("mul", Operands,
7516                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7517   }
7518 
7519   if (Name == "div") {
7520     return parseIntWithPrefix("div", Operands,
7521                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7522   }
7523 
7524   return MatchOperand_NoMatch;
7525 }
7526 
7527 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7528   cvtVOP3P(Inst, Operands);
7529 
7530   int Opc = Inst.getOpcode();
7531 
7532   int SrcNum;
7533   const int Ops[] = { AMDGPU::OpName::src0,
7534                       AMDGPU::OpName::src1,
7535                       AMDGPU::OpName::src2 };
7536   for (SrcNum = 0;
7537        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7538        ++SrcNum);
7539   assert(SrcNum > 0);
7540 
7541   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7542   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7543 
7544   if ((OpSel & (1 << SrcNum)) != 0) {
7545     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7546     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7547     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7548   }
7549 }
7550 
7551 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7552       // 1. This operand is input modifiers
7553   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7554       // 2. This is not last operand
7555       && Desc.NumOperands > (OpNum + 1)
7556       // 3. Next operand is register class
7557       && Desc.OpInfo[OpNum + 1].RegClass != -1
7558       // 4. Next register is not tied to any other operand
7559       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7560 }
7561 
7562 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7563 {
7564   OptionalImmIndexMap OptionalIdx;
7565   unsigned Opc = Inst.getOpcode();
7566 
7567   unsigned I = 1;
7568   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7569   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7570     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7571   }
7572 
7573   for (unsigned E = Operands.size(); I != E; ++I) {
7574     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7575     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7576       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7577     } else if (Op.isInterpSlot() ||
7578                Op.isInterpAttr() ||
7579                Op.isAttrChan()) {
7580       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7581     } else if (Op.isImmModifier()) {
7582       OptionalIdx[Op.getImmTy()] = I;
7583     } else {
7584       llvm_unreachable("unhandled operand type");
7585     }
7586   }
7587 
7588   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7589     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7590   }
7591 
7592   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7593     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7594   }
7595 
7596   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7597     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7598   }
7599 }
7600 
7601 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7602                               OptionalImmIndexMap &OptionalIdx) {
7603   unsigned Opc = Inst.getOpcode();
7604 
7605   unsigned I = 1;
7606   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7607   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7608     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7609   }
7610 
7611   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7612     // This instruction has src modifiers
7613     for (unsigned E = Operands.size(); I != E; ++I) {
7614       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7615       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7616         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7617       } else if (Op.isImmModifier()) {
7618         OptionalIdx[Op.getImmTy()] = I;
7619       } else if (Op.isRegOrImm()) {
7620         Op.addRegOrImmOperands(Inst, 1);
7621       } else {
7622         llvm_unreachable("unhandled operand type");
7623       }
7624     }
7625   } else {
7626     // No src modifiers
7627     for (unsigned E = Operands.size(); I != E; ++I) {
7628       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7629       if (Op.isMod()) {
7630         OptionalIdx[Op.getImmTy()] = I;
7631       } else {
7632         Op.addRegOrImmOperands(Inst, 1);
7633       }
7634     }
7635   }
7636 
7637   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7638     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7639   }
7640 
7641   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7642     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7643   }
7644 
7645   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7646   // it has src2 register operand that is tied to dst operand
7647   // we don't allow modifiers for this operand in assembler so src2_modifiers
7648   // should be 0.
7649   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7650       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7651       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7652       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7653       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7654       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7655       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7656       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7657       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7658       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7659       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7660     auto it = Inst.begin();
7661     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7662     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7663     ++it;
7664     // Copy the operand to ensure it's not invalidated when Inst grows.
7665     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7666   }
7667 }
7668 
7669 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7670   OptionalImmIndexMap OptionalIdx;
7671   cvtVOP3(Inst, Operands, OptionalIdx);
7672 }
7673 
7674 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7675                                OptionalImmIndexMap &OptIdx) {
7676   const int Opc = Inst.getOpcode();
7677   const MCInstrDesc &Desc = MII.get(Opc);
7678 
7679   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7680 
7681   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7682     assert(!IsPacked);
7683     Inst.addOperand(Inst.getOperand(0));
7684   }
7685 
7686   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7687   // instruction, and then figure out where to actually put the modifiers
7688 
7689   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7690   if (OpSelIdx != -1) {
7691     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7692   }
7693 
7694   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7695   if (OpSelHiIdx != -1) {
7696     int DefaultVal = IsPacked ? -1 : 0;
7697     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7698                           DefaultVal);
7699   }
7700 
7701   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7702   if (NegLoIdx != -1) {
7703     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7704     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7705   }
7706 
7707   const int Ops[] = { AMDGPU::OpName::src0,
7708                       AMDGPU::OpName::src1,
7709                       AMDGPU::OpName::src2 };
7710   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7711                          AMDGPU::OpName::src1_modifiers,
7712                          AMDGPU::OpName::src2_modifiers };
7713 
7714   unsigned OpSel = 0;
7715   unsigned OpSelHi = 0;
7716   unsigned NegLo = 0;
7717   unsigned NegHi = 0;
7718 
7719   if (OpSelIdx != -1)
7720     OpSel = Inst.getOperand(OpSelIdx).getImm();
7721 
7722   if (OpSelHiIdx != -1)
7723     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7724 
7725   if (NegLoIdx != -1) {
7726     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7727     NegLo = Inst.getOperand(NegLoIdx).getImm();
7728     NegHi = Inst.getOperand(NegHiIdx).getImm();
7729   }
7730 
7731   for (int J = 0; J < 3; ++J) {
7732     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7733     if (OpIdx == -1)
7734       break;
7735 
7736     uint32_t ModVal = 0;
7737 
7738     if ((OpSel & (1 << J)) != 0)
7739       ModVal |= SISrcMods::OP_SEL_0;
7740 
7741     if ((OpSelHi & (1 << J)) != 0)
7742       ModVal |= SISrcMods::OP_SEL_1;
7743 
7744     if ((NegLo & (1 << J)) != 0)
7745       ModVal |= SISrcMods::NEG;
7746 
7747     if ((NegHi & (1 << J)) != 0)
7748       ModVal |= SISrcMods::NEG_HI;
7749 
7750     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7751 
7752     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7753   }
7754 }
7755 
7756 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7757   OptionalImmIndexMap OptIdx;
7758   cvtVOP3(Inst, Operands, OptIdx);
7759   cvtVOP3P(Inst, Operands, OptIdx);
7760 }
7761 
7762 //===----------------------------------------------------------------------===//
7763 // dpp
7764 //===----------------------------------------------------------------------===//
7765 
7766 bool AMDGPUOperand::isDPP8() const {
7767   return isImmTy(ImmTyDPP8);
7768 }
7769 
7770 bool AMDGPUOperand::isDPPCtrl() const {
7771   using namespace AMDGPU::DPP;
7772 
7773   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7774   if (result) {
7775     int64_t Imm = getImm();
7776     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7777            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7778            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7779            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7780            (Imm == DppCtrl::WAVE_SHL1) ||
7781            (Imm == DppCtrl::WAVE_ROL1) ||
7782            (Imm == DppCtrl::WAVE_SHR1) ||
7783            (Imm == DppCtrl::WAVE_ROR1) ||
7784            (Imm == DppCtrl::ROW_MIRROR) ||
7785            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7786            (Imm == DppCtrl::BCAST15) ||
7787            (Imm == DppCtrl::BCAST31) ||
7788            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7789            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7790   }
7791   return false;
7792 }
7793 
7794 //===----------------------------------------------------------------------===//
7795 // mAI
7796 //===----------------------------------------------------------------------===//
7797 
7798 bool AMDGPUOperand::isBLGP() const {
7799   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7800 }
7801 
7802 bool AMDGPUOperand::isCBSZ() const {
7803   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7804 }
7805 
7806 bool AMDGPUOperand::isABID() const {
7807   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7808 }
7809 
7810 bool AMDGPUOperand::isS16Imm() const {
7811   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7812 }
7813 
7814 bool AMDGPUOperand::isU16Imm() const {
7815   return isImm() && isUInt<16>(getImm());
7816 }
7817 
7818 //===----------------------------------------------------------------------===//
7819 // dim
7820 //===----------------------------------------------------------------------===//
7821 
7822 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7823   // We want to allow "dim:1D" etc.,
7824   // but the initial 1 is tokenized as an integer.
7825   std::string Token;
7826   if (isToken(AsmToken::Integer)) {
7827     SMLoc Loc = getToken().getEndLoc();
7828     Token = std::string(getTokenStr());
7829     lex();
7830     if (getLoc() != Loc)
7831       return false;
7832   }
7833 
7834   StringRef Suffix;
7835   if (!parseId(Suffix))
7836     return false;
7837   Token += Suffix;
7838 
7839   StringRef DimId = Token;
7840   if (DimId.startswith("SQ_RSRC_IMG_"))
7841     DimId = DimId.drop_front(12);
7842 
7843   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7844   if (!DimInfo)
7845     return false;
7846 
7847   Encoding = DimInfo->Encoding;
7848   return true;
7849 }
7850 
7851 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7852   if (!isGFX10Plus())
7853     return MatchOperand_NoMatch;
7854 
7855   SMLoc S = getLoc();
7856 
7857   if (!trySkipId("dim", AsmToken::Colon))
7858     return MatchOperand_NoMatch;
7859 
7860   unsigned Encoding;
7861   SMLoc Loc = getLoc();
7862   if (!parseDimId(Encoding)) {
7863     Error(Loc, "invalid dim value");
7864     return MatchOperand_ParseFail;
7865   }
7866 
7867   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7868                                               AMDGPUOperand::ImmTyDim));
7869   return MatchOperand_Success;
7870 }
7871 
7872 //===----------------------------------------------------------------------===//
7873 // dpp
7874 //===----------------------------------------------------------------------===//
7875 
7876 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7877   SMLoc S = getLoc();
7878 
7879   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7880     return MatchOperand_NoMatch;
7881 
7882   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7883 
7884   int64_t Sels[8];
7885 
7886   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7887     return MatchOperand_ParseFail;
7888 
7889   for (size_t i = 0; i < 8; ++i) {
7890     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7891       return MatchOperand_ParseFail;
7892 
7893     SMLoc Loc = getLoc();
7894     if (getParser().parseAbsoluteExpression(Sels[i]))
7895       return MatchOperand_ParseFail;
7896     if (0 > Sels[i] || 7 < Sels[i]) {
7897       Error(Loc, "expected a 3-bit value");
7898       return MatchOperand_ParseFail;
7899     }
7900   }
7901 
7902   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7903     return MatchOperand_ParseFail;
7904 
7905   unsigned DPP8 = 0;
7906   for (size_t i = 0; i < 8; ++i)
7907     DPP8 |= (Sels[i] << (i * 3));
7908 
7909   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7910   return MatchOperand_Success;
7911 }
7912 
7913 bool
7914 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7915                                     const OperandVector &Operands) {
7916   if (Ctrl == "row_newbcast")
7917     return isGFX90A();
7918 
7919   if (Ctrl == "row_share" ||
7920       Ctrl == "row_xmask")
7921     return isGFX10Plus();
7922 
7923   if (Ctrl == "wave_shl" ||
7924       Ctrl == "wave_shr" ||
7925       Ctrl == "wave_rol" ||
7926       Ctrl == "wave_ror" ||
7927       Ctrl == "row_bcast")
7928     return isVI() || isGFX9();
7929 
7930   return Ctrl == "row_mirror" ||
7931          Ctrl == "row_half_mirror" ||
7932          Ctrl == "quad_perm" ||
7933          Ctrl == "row_shl" ||
7934          Ctrl == "row_shr" ||
7935          Ctrl == "row_ror";
7936 }
7937 
7938 int64_t
7939 AMDGPUAsmParser::parseDPPCtrlPerm() {
7940   // quad_perm:[%d,%d,%d,%d]
7941 
7942   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7943     return -1;
7944 
7945   int64_t Val = 0;
7946   for (int i = 0; i < 4; ++i) {
7947     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7948       return -1;
7949 
7950     int64_t Temp;
7951     SMLoc Loc = getLoc();
7952     if (getParser().parseAbsoluteExpression(Temp))
7953       return -1;
7954     if (Temp < 0 || Temp > 3) {
7955       Error(Loc, "expected a 2-bit value");
7956       return -1;
7957     }
7958 
7959     Val += (Temp << i * 2);
7960   }
7961 
7962   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7963     return -1;
7964 
7965   return Val;
7966 }
7967 
7968 int64_t
7969 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7970   using namespace AMDGPU::DPP;
7971 
7972   // sel:%d
7973 
7974   int64_t Val;
7975   SMLoc Loc = getLoc();
7976 
7977   if (getParser().parseAbsoluteExpression(Val))
7978     return -1;
7979 
7980   struct DppCtrlCheck {
7981     int64_t Ctrl;
7982     int Lo;
7983     int Hi;
7984   };
7985 
7986   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7987     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7988     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7989     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7990     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7991     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7992     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7993     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7994     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7995     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7996     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7997     .Default({-1, 0, 0});
7998 
7999   bool Valid;
8000   if (Check.Ctrl == -1) {
8001     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8002     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8003   } else {
8004     Valid = Check.Lo <= Val && Val <= Check.Hi;
8005     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8006   }
8007 
8008   if (!Valid) {
8009     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8010     return -1;
8011   }
8012 
8013   return Val;
8014 }
8015 
8016 OperandMatchResultTy
8017 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8018   using namespace AMDGPU::DPP;
8019 
8020   if (!isToken(AsmToken::Identifier) ||
8021       !isSupportedDPPCtrl(getTokenStr(), Operands))
8022     return MatchOperand_NoMatch;
8023 
8024   SMLoc S = getLoc();
8025   int64_t Val = -1;
8026   StringRef Ctrl;
8027 
8028   parseId(Ctrl);
8029 
8030   if (Ctrl == "row_mirror") {
8031     Val = DppCtrl::ROW_MIRROR;
8032   } else if (Ctrl == "row_half_mirror") {
8033     Val = DppCtrl::ROW_HALF_MIRROR;
8034   } else {
8035     if (skipToken(AsmToken::Colon, "expected a colon")) {
8036       if (Ctrl == "quad_perm") {
8037         Val = parseDPPCtrlPerm();
8038       } else {
8039         Val = parseDPPCtrlSel(Ctrl);
8040       }
8041     }
8042   }
8043 
8044   if (Val == -1)
8045     return MatchOperand_ParseFail;
8046 
8047   Operands.push_back(
8048     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8049   return MatchOperand_Success;
8050 }
8051 
8052 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8053   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8054 }
8055 
8056 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8057   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8058 }
8059 
8060 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8061   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8062 }
8063 
8064 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8065   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8066 }
8067 
8068 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8069   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8070 }
8071 
8072 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8073   OptionalImmIndexMap OptionalIdx;
8074 
8075   unsigned Opc = Inst.getOpcode();
8076   bool HasModifiers =
8077       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8078   unsigned I = 1;
8079   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8080   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8081     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8082   }
8083 
8084   int Fi = 0;
8085   for (unsigned E = Operands.size(); I != E; ++I) {
8086     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8087                                             MCOI::TIED_TO);
8088     if (TiedTo != -1) {
8089       assert((unsigned)TiedTo < Inst.getNumOperands());
8090       // handle tied old or src2 for MAC instructions
8091       Inst.addOperand(Inst.getOperand(TiedTo));
8092     }
8093     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8094     // Add the register arguments
8095     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8096       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8097       // Skip it.
8098       continue;
8099     }
8100 
8101     if (IsDPP8) {
8102       if (Op.isDPP8()) {
8103         Op.addImmOperands(Inst, 1);
8104       } else if (HasModifiers &&
8105                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8106         Op.addRegWithFPInputModsOperands(Inst, 2);
8107       } else if (Op.isFI()) {
8108         Fi = Op.getImm();
8109       } else if (Op.isReg()) {
8110         Op.addRegOperands(Inst, 1);
8111       } else {
8112         llvm_unreachable("Invalid operand type");
8113       }
8114     } else {
8115       if (HasModifiers &&
8116           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8117         Op.addRegWithFPInputModsOperands(Inst, 2);
8118       } else if (Op.isReg()) {
8119         Op.addRegOperands(Inst, 1);
8120       } else if (Op.isDPPCtrl()) {
8121         Op.addImmOperands(Inst, 1);
8122       } else if (Op.isImm()) {
8123         // Handle optional arguments
8124         OptionalIdx[Op.getImmTy()] = I;
8125       } else {
8126         llvm_unreachable("Invalid operand type");
8127       }
8128     }
8129   }
8130 
8131   if (IsDPP8) {
8132     using namespace llvm::AMDGPU::DPP;
8133     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8134   } else {
8135     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8136     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8137     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8138     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8139       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8140     }
8141   }
8142 }
8143 
8144 //===----------------------------------------------------------------------===//
8145 // sdwa
8146 //===----------------------------------------------------------------------===//
8147 
8148 OperandMatchResultTy
8149 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8150                               AMDGPUOperand::ImmTy Type) {
8151   using namespace llvm::AMDGPU::SDWA;
8152 
8153   SMLoc S = getLoc();
8154   StringRef Value;
8155   OperandMatchResultTy res;
8156 
8157   SMLoc StringLoc;
8158   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8159   if (res != MatchOperand_Success) {
8160     return res;
8161   }
8162 
8163   int64_t Int;
8164   Int = StringSwitch<int64_t>(Value)
8165         .Case("BYTE_0", SdwaSel::BYTE_0)
8166         .Case("BYTE_1", SdwaSel::BYTE_1)
8167         .Case("BYTE_2", SdwaSel::BYTE_2)
8168         .Case("BYTE_3", SdwaSel::BYTE_3)
8169         .Case("WORD_0", SdwaSel::WORD_0)
8170         .Case("WORD_1", SdwaSel::WORD_1)
8171         .Case("DWORD", SdwaSel::DWORD)
8172         .Default(0xffffffff);
8173 
8174   if (Int == 0xffffffff) {
8175     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8176     return MatchOperand_ParseFail;
8177   }
8178 
8179   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8180   return MatchOperand_Success;
8181 }
8182 
8183 OperandMatchResultTy
8184 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8185   using namespace llvm::AMDGPU::SDWA;
8186 
8187   SMLoc S = getLoc();
8188   StringRef Value;
8189   OperandMatchResultTy res;
8190 
8191   SMLoc StringLoc;
8192   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8193   if (res != MatchOperand_Success) {
8194     return res;
8195   }
8196 
8197   int64_t Int;
8198   Int = StringSwitch<int64_t>(Value)
8199         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8200         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8201         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8202         .Default(0xffffffff);
8203 
8204   if (Int == 0xffffffff) {
8205     Error(StringLoc, "invalid dst_unused value");
8206     return MatchOperand_ParseFail;
8207   }
8208 
8209   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8210   return MatchOperand_Success;
8211 }
8212 
8213 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8214   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8215 }
8216 
8217 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8218   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8219 }
8220 
8221 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8222   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8223 }
8224 
8225 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8226   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8227 }
8228 
8229 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8230   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8231 }
8232 
8233 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8234                               uint64_t BasicInstType,
8235                               bool SkipDstVcc,
8236                               bool SkipSrcVcc) {
8237   using namespace llvm::AMDGPU::SDWA;
8238 
8239   OptionalImmIndexMap OptionalIdx;
8240   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8241   bool SkippedVcc = false;
8242 
8243   unsigned I = 1;
8244   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8245   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8246     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8247   }
8248 
8249   for (unsigned E = Operands.size(); I != E; ++I) {
8250     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8251     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8252         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8253       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8254       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8255       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8256       // Skip VCC only if we didn't skip it on previous iteration.
8257       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8258       if (BasicInstType == SIInstrFlags::VOP2 &&
8259           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8260            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8261         SkippedVcc = true;
8262         continue;
8263       } else if (BasicInstType == SIInstrFlags::VOPC &&
8264                  Inst.getNumOperands() == 0) {
8265         SkippedVcc = true;
8266         continue;
8267       }
8268     }
8269     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8270       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8271     } else if (Op.isImm()) {
8272       // Handle optional arguments
8273       OptionalIdx[Op.getImmTy()] = I;
8274     } else {
8275       llvm_unreachable("Invalid operand type");
8276     }
8277     SkippedVcc = false;
8278   }
8279 
8280   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8281       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8282       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8283     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8284     switch (BasicInstType) {
8285     case SIInstrFlags::VOP1:
8286       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8287       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8288         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8289       }
8290       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8291       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8292       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8293       break;
8294 
8295     case SIInstrFlags::VOP2:
8296       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8297       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8298         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8299       }
8300       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8301       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8302       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8303       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8304       break;
8305 
8306     case SIInstrFlags::VOPC:
8307       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8308         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8309       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8310       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8311       break;
8312 
8313     default:
8314       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8315     }
8316   }
8317 
8318   // special case v_mac_{f16, f32}:
8319   // it has src2 register operand that is tied to dst operand
8320   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8321       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8322     auto it = Inst.begin();
8323     std::advance(
8324       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8325     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8326   }
8327 }
8328 
8329 //===----------------------------------------------------------------------===//
8330 // mAI
8331 //===----------------------------------------------------------------------===//
8332 
8333 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8334   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8335 }
8336 
8337 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8338   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8339 }
8340 
8341 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8342   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8343 }
8344 
8345 /// Force static initialization.
8346 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8347   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8348   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8349 }
8350 
8351 #define GET_REGISTER_MATCHER
8352 #define GET_MATCHER_IMPLEMENTATION
8353 #define GET_MNEMONIC_SPELL_CHECKER
8354 #define GET_MNEMONIC_CHECKER
8355 #include "AMDGPUGenAsmMatcher.inc"
8356 
8357 // This fuction should be defined after auto-generated include so that we have
8358 // MatchClassKind enum defined
8359 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8360                                                      unsigned Kind) {
8361   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8362   // But MatchInstructionImpl() expects to meet token and fails to validate
8363   // operand. This method checks if we are given immediate operand but expect to
8364   // get corresponding token.
8365   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8366   switch (Kind) {
8367   case MCK_addr64:
8368     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8369   case MCK_gds:
8370     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8371   case MCK_lds:
8372     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8373   case MCK_idxen:
8374     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8375   case MCK_offen:
8376     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8377   case MCK_SSrcB32:
8378     // When operands have expression values, they will return true for isToken,
8379     // because it is not possible to distinguish between a token and an
8380     // expression at parse time. MatchInstructionImpl() will always try to
8381     // match an operand as a token, when isToken returns true, and when the
8382     // name of the expression is not a valid token, the match will fail,
8383     // so we need to handle it here.
8384     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8385   case MCK_SSrcF32:
8386     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8387   case MCK_SoppBrTarget:
8388     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8389   case MCK_VReg32OrOff:
8390     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8391   case MCK_InterpSlot:
8392     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8393   case MCK_Attr:
8394     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8395   case MCK_AttrChan:
8396     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8397   case MCK_ImmSMEMOffset:
8398     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8399   case MCK_SReg_64:
8400   case MCK_SReg_64_XEXEC:
8401     // Null is defined as a 32-bit register but
8402     // it should also be enabled with 64-bit operands.
8403     // The following code enables it for SReg_64 operands
8404     // used as source and destination. Remaining source
8405     // operands are handled in isInlinableImm.
8406     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8407   default:
8408     return Match_InvalidOperand;
8409   }
8410 }
8411 
8412 //===----------------------------------------------------------------------===//
8413 // endpgm
8414 //===----------------------------------------------------------------------===//
8415 
8416 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8417   SMLoc S = getLoc();
8418   int64_t Imm = 0;
8419 
8420   if (!parseExpr(Imm)) {
8421     // The operand is optional, if not present default to 0
8422     Imm = 0;
8423   }
8424 
8425   if (!isUInt<16>(Imm)) {
8426     Error(S, "expected a 16-bit value");
8427     return MatchOperand_ParseFail;
8428   }
8429 
8430   Operands.push_back(
8431       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8432   return MatchOperand_Success;
8433 }
8434 
8435 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8436