1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/Support/AMDGPUMetadata.h"
32 #include "llvm/Support/AMDHSAKernelDescriptor.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/MachineValueType.h"
35 #include "llvm/Support/TargetParser.h"
36 #include "llvm/Support/TargetRegistry.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53   enum KindTy {
54     Token,
55     Immediate,
56     Register,
57     Expression
58   } Kind;
59 
60   SMLoc StartLoc, EndLoc;
61   const AMDGPUAsmParser *AsmParser;
62 
63 public:
64   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66 
67   using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69   struct Modifiers {
70     bool Abs = false;
71     bool Neg = false;
72     bool Sext = false;
73 
74     bool hasFPModifiers() const { return Abs || Neg; }
75     bool hasIntModifiers() const { return Sext; }
76     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78     int64_t getFPModifiersOperand() const {
79       int64_t Operand = 0;
80       Operand |= Abs ? SISrcMods::ABS : 0u;
81       Operand |= Neg ? SISrcMods::NEG : 0u;
82       return Operand;
83     }
84 
85     int64_t getIntModifiersOperand() const {
86       int64_t Operand = 0;
87       Operand |= Sext ? SISrcMods::SEXT : 0u;
88       return Operand;
89     }
90 
91     int64_t getModifiersOperand() const {
92       assert(!(hasFPModifiers() && hasIntModifiers())
93            && "fp and int modifiers should not be used simultaneously");
94       if (hasFPModifiers()) {
95         return getFPModifiersOperand();
96       } else if (hasIntModifiers()) {
97         return getIntModifiersOperand();
98       } else {
99         return 0;
100       }
101     }
102 
103     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104   };
105 
106   enum ImmTy {
107     ImmTyNone,
108     ImmTyGDS,
109     ImmTyLDS,
110     ImmTyOffen,
111     ImmTyIdxen,
112     ImmTyAddr64,
113     ImmTyOffset,
114     ImmTyInstOffset,
115     ImmTyOffset0,
116     ImmTyOffset1,
117     ImmTyDLC,
118     ImmTySCCB,
119     ImmTyGLC,
120     ImmTySLC,
121     ImmTySWZ,
122     ImmTyTFE,
123     ImmTyD16,
124     ImmTyClampSI,
125     ImmTyOModSI,
126     ImmTyDPP8,
127     ImmTyDppCtrl,
128     ImmTyDppRowMask,
129     ImmTyDppBankMask,
130     ImmTyDppBoundCtrl,
131     ImmTyDppFi,
132     ImmTySdwaDstSel,
133     ImmTySdwaSrc0Sel,
134     ImmTySdwaSrc1Sel,
135     ImmTySdwaDstUnused,
136     ImmTyDMask,
137     ImmTyDim,
138     ImmTyUNorm,
139     ImmTyDA,
140     ImmTyR128A16,
141     ImmTyA16,
142     ImmTyLWE,
143     ImmTyExpTgt,
144     ImmTyExpCompr,
145     ImmTyExpVM,
146     ImmTyFORMAT,
147     ImmTyHwreg,
148     ImmTyOff,
149     ImmTySendMsg,
150     ImmTyInterpSlot,
151     ImmTyInterpAttr,
152     ImmTyAttrChan,
153     ImmTyOpSel,
154     ImmTyOpSelHi,
155     ImmTyNegLo,
156     ImmTyNegHi,
157     ImmTySwizzle,
158     ImmTyGprIdxMode,
159     ImmTyHigh,
160     ImmTyBLGP,
161     ImmTyCBSZ,
162     ImmTyABID,
163     ImmTyEndpgm,
164   };
165 
166   enum ImmKindTy {
167     ImmKindTyNone,
168     ImmKindTyLiteral,
169     ImmKindTyConst,
170   };
171 
172 private:
173   struct TokOp {
174     const char *Data;
175     unsigned Length;
176   };
177 
178   struct ImmOp {
179     int64_t Val;
180     ImmTy Type;
181     bool IsFPImm;
182     mutable ImmKindTy Kind;
183     Modifiers Mods;
184   };
185 
186   struct RegOp {
187     unsigned RegNo;
188     Modifiers Mods;
189   };
190 
191   union {
192     TokOp Tok;
193     ImmOp Imm;
194     RegOp Reg;
195     const MCExpr *Expr;
196   };
197 
198 public:
199   bool isToken() const override {
200     if (Kind == Token)
201       return true;
202 
203     // When parsing operands, we can't always tell if something was meant to be
204     // a token, like 'gds', or an expression that references a global variable.
205     // In this case, we assume the string is an expression, and if we need to
206     // interpret is a token, then we treat the symbol name as the token.
207     return isSymbolRefExpr();
208   }
209 
210   bool isSymbolRefExpr() const {
211     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
212   }
213 
214   bool isImm() const override {
215     return Kind == Immediate;
216   }
217 
218   void setImmKindNone() const {
219     assert(isImm());
220     Imm.Kind = ImmKindTyNone;
221   }
222 
223   void setImmKindLiteral() const {
224     assert(isImm());
225     Imm.Kind = ImmKindTyLiteral;
226   }
227 
228   void setImmKindConst() const {
229     assert(isImm());
230     Imm.Kind = ImmKindTyConst;
231   }
232 
233   bool IsImmKindLiteral() const {
234     return isImm() && Imm.Kind == ImmKindTyLiteral;
235   }
236 
237   bool isImmKindConst() const {
238     return isImm() && Imm.Kind == ImmKindTyConst;
239   }
240 
241   bool isInlinableImm(MVT type) const;
242   bool isLiteralImm(MVT type) const;
243 
244   bool isRegKind() const {
245     return Kind == Register;
246   }
247 
248   bool isReg() const override {
249     return isRegKind() && !hasModifiers();
250   }
251 
252   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
253     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
254   }
255 
256   bool isRegOrImmWithInt16InputMods() const {
257     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
258   }
259 
260   bool isRegOrImmWithInt32InputMods() const {
261     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
262   }
263 
264   bool isRegOrImmWithInt64InputMods() const {
265     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
266   }
267 
268   bool isRegOrImmWithFP16InputMods() const {
269     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
270   }
271 
272   bool isRegOrImmWithFP32InputMods() const {
273     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
274   }
275 
276   bool isRegOrImmWithFP64InputMods() const {
277     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
278   }
279 
280   bool isVReg() const {
281     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
282            isRegClass(AMDGPU::VReg_64RegClassID) ||
283            isRegClass(AMDGPU::VReg_96RegClassID) ||
284            isRegClass(AMDGPU::VReg_128RegClassID) ||
285            isRegClass(AMDGPU::VReg_160RegClassID) ||
286            isRegClass(AMDGPU::VReg_192RegClassID) ||
287            isRegClass(AMDGPU::VReg_256RegClassID) ||
288            isRegClass(AMDGPU::VReg_512RegClassID) ||
289            isRegClass(AMDGPU::VReg_1024RegClassID);
290   }
291 
292   bool isVReg32() const {
293     return isRegClass(AMDGPU::VGPR_32RegClassID);
294   }
295 
296   bool isVReg32OrOff() const {
297     return isOff() || isVReg32();
298   }
299 
300   bool isNull() const {
301     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
302   }
303 
304   bool isVRegWithInputMods() const;
305 
306   bool isSDWAOperand(MVT type) const;
307   bool isSDWAFP16Operand() const;
308   bool isSDWAFP32Operand() const;
309   bool isSDWAInt16Operand() const;
310   bool isSDWAInt32Operand() const;
311 
312   bool isImmTy(ImmTy ImmT) const {
313     return isImm() && Imm.Type == ImmT;
314   }
315 
316   bool isImmModifier() const {
317     return isImm() && Imm.Type != ImmTyNone;
318   }
319 
320   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
321   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
322   bool isDMask() const { return isImmTy(ImmTyDMask); }
323   bool isDim() const { return isImmTy(ImmTyDim); }
324   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
325   bool isDA() const { return isImmTy(ImmTyDA); }
326   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
327   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
328   bool isLWE() const { return isImmTy(ImmTyLWE); }
329   bool isOff() const { return isImmTy(ImmTyOff); }
330   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
331   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
332   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
333   bool isOffen() const { return isImmTy(ImmTyOffen); }
334   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
335   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
336   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
337   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
338   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
339 
340   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
341   bool isGDS() const { return isImmTy(ImmTyGDS); }
342   bool isLDS() const { return isImmTy(ImmTyLDS); }
343   bool isDLC() const { return isImmTy(ImmTyDLC); }
344   bool isSCCB() const { return isImmTy(ImmTySCCB); }
345   bool isGLC() const { return isImmTy(ImmTyGLC); }
346   // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced
347   // value of the GLC operand.
348   bool isGLC_1() const { return isImmTy(ImmTyGLC); }
349   bool isSLC() const { return isImmTy(ImmTySLC); }
350   bool isSWZ() const { return isImmTy(ImmTySWZ); }
351   bool isTFE() const { return isImmTy(ImmTyTFE); }
352   bool isD16() const { return isImmTy(ImmTyD16); }
353   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
354   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
355   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
356   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
357   bool isFI() const { return isImmTy(ImmTyDppFi); }
358   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
359   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
360   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
361   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
362   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
363   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
364   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
365   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
366   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
367   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
368   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
369   bool isHigh() const { return isImmTy(ImmTyHigh); }
370 
371   bool isMod() const {
372     return isClampSI() || isOModSI();
373   }
374 
375   bool isRegOrImm() const {
376     return isReg() || isImm();
377   }
378 
379   bool isRegClass(unsigned RCID) const;
380 
381   bool isInlineValue() const;
382 
383   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
384     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
385   }
386 
387   bool isSCSrcB16() const {
388     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
389   }
390 
391   bool isSCSrcV2B16() const {
392     return isSCSrcB16();
393   }
394 
395   bool isSCSrcB32() const {
396     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
397   }
398 
399   bool isSCSrcB64() const {
400     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
401   }
402 
403   bool isBoolReg() const;
404 
405   bool isSCSrcF16() const {
406     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
407   }
408 
409   bool isSCSrcV2F16() const {
410     return isSCSrcF16();
411   }
412 
413   bool isSCSrcF32() const {
414     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
415   }
416 
417   bool isSCSrcF64() const {
418     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
419   }
420 
421   bool isSSrcB32() const {
422     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
423   }
424 
425   bool isSSrcB16() const {
426     return isSCSrcB16() || isLiteralImm(MVT::i16);
427   }
428 
429   bool isSSrcV2B16() const {
430     llvm_unreachable("cannot happen");
431     return isSSrcB16();
432   }
433 
434   bool isSSrcB64() const {
435     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
436     // See isVSrc64().
437     return isSCSrcB64() || isLiteralImm(MVT::i64);
438   }
439 
440   bool isSSrcF32() const {
441     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
442   }
443 
444   bool isSSrcF64() const {
445     return isSCSrcB64() || isLiteralImm(MVT::f64);
446   }
447 
448   bool isSSrcF16() const {
449     return isSCSrcB16() || isLiteralImm(MVT::f16);
450   }
451 
452   bool isSSrcV2F16() const {
453     llvm_unreachable("cannot happen");
454     return isSSrcF16();
455   }
456 
457   bool isSSrcV2FP32() const {
458     llvm_unreachable("cannot happen");
459     return isSSrcF32();
460   }
461 
462   bool isSCSrcV2FP32() const {
463     llvm_unreachable("cannot happen");
464     return isSCSrcF32();
465   }
466 
467   bool isSSrcV2INT32() const {
468     llvm_unreachable("cannot happen");
469     return isSSrcB32();
470   }
471 
472   bool isSCSrcV2INT32() const {
473     llvm_unreachable("cannot happen");
474     return isSCSrcB32();
475   }
476 
477   bool isSSrcOrLdsB32() const {
478     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
479            isLiteralImm(MVT::i32) || isExpr();
480   }
481 
482   bool isVCSrcB32() const {
483     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
484   }
485 
486   bool isVCSrcB64() const {
487     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
488   }
489 
490   bool isVCSrcB16() const {
491     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
492   }
493 
494   bool isVCSrcV2B16() const {
495     return isVCSrcB16();
496   }
497 
498   bool isVCSrcF32() const {
499     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
500   }
501 
502   bool isVCSrcF64() const {
503     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
504   }
505 
506   bool isVCSrcF16() const {
507     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
508   }
509 
510   bool isVCSrcV2F16() const {
511     return isVCSrcF16();
512   }
513 
514   bool isVSrcB32() const {
515     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
516   }
517 
518   bool isVSrcB64() const {
519     return isVCSrcF64() || isLiteralImm(MVT::i64);
520   }
521 
522   bool isVSrcB16() const {
523     return isVCSrcB16() || isLiteralImm(MVT::i16);
524   }
525 
526   bool isVSrcV2B16() const {
527     return isVSrcB16() || isLiteralImm(MVT::v2i16);
528   }
529 
530   bool isVCSrcV2FP32() const {
531     return isVCSrcF64();
532   }
533 
534   bool isVSrcV2FP32() const {
535     return isVSrcF64() || isLiteralImm(MVT::v2f32);
536   }
537 
538   bool isVCSrcV2INT32() const {
539     return isVCSrcB64();
540   }
541 
542   bool isVSrcV2INT32() const {
543     return isVSrcB64() || isLiteralImm(MVT::v2i32);
544   }
545 
546   bool isVSrcF32() const {
547     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
548   }
549 
550   bool isVSrcF64() const {
551     return isVCSrcF64() || isLiteralImm(MVT::f64);
552   }
553 
554   bool isVSrcF16() const {
555     return isVCSrcF16() || isLiteralImm(MVT::f16);
556   }
557 
558   bool isVSrcV2F16() const {
559     return isVSrcF16() || isLiteralImm(MVT::v2f16);
560   }
561 
562   bool isVISrcB32() const {
563     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
564   }
565 
566   bool isVISrcB16() const {
567     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
568   }
569 
570   bool isVISrcV2B16() const {
571     return isVISrcB16();
572   }
573 
574   bool isVISrcF32() const {
575     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
576   }
577 
578   bool isVISrcF16() const {
579     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
580   }
581 
582   bool isVISrcV2F16() const {
583     return isVISrcF16() || isVISrcB32();
584   }
585 
586   bool isVISrc_64B64() const {
587     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
588   }
589 
590   bool isVISrc_64F64() const {
591     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
592   }
593 
594   bool isVISrc_64V2FP32() const {
595     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
596   }
597 
598   bool isVISrc_64V2INT32() const {
599     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
600   }
601 
602   bool isVISrc_256B64() const {
603     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
604   }
605 
606   bool isVISrc_256F64() const {
607     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
608   }
609 
610   bool isVISrc_128B16() const {
611     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
612   }
613 
614   bool isVISrc_128V2B16() const {
615     return isVISrc_128B16();
616   }
617 
618   bool isVISrc_128B32() const {
619     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
620   }
621 
622   bool isVISrc_128F32() const {
623     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
624   }
625 
626   bool isVISrc_256V2FP32() const {
627     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
628   }
629 
630   bool isVISrc_256V2INT32() const {
631     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
632   }
633 
634   bool isVISrc_512B32() const {
635     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
636   }
637 
638   bool isVISrc_512B16() const {
639     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
640   }
641 
642   bool isVISrc_512V2B16() const {
643     return isVISrc_512B16();
644   }
645 
646   bool isVISrc_512F32() const {
647     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
648   }
649 
650   bool isVISrc_512F16() const {
651     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
652   }
653 
654   bool isVISrc_512V2F16() const {
655     return isVISrc_512F16() || isVISrc_512B32();
656   }
657 
658   bool isVISrc_1024B32() const {
659     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
660   }
661 
662   bool isVISrc_1024B16() const {
663     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
664   }
665 
666   bool isVISrc_1024V2B16() const {
667     return isVISrc_1024B16();
668   }
669 
670   bool isVISrc_1024F32() const {
671     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
672   }
673 
674   bool isVISrc_1024F16() const {
675     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
676   }
677 
678   bool isVISrc_1024V2F16() const {
679     return isVISrc_1024F16() || isVISrc_1024B32();
680   }
681 
682   bool isAISrcB32() const {
683     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
684   }
685 
686   bool isAISrcB16() const {
687     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
688   }
689 
690   bool isAISrcV2B16() const {
691     return isAISrcB16();
692   }
693 
694   bool isAISrcF32() const {
695     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
696   }
697 
698   bool isAISrcF16() const {
699     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
700   }
701 
702   bool isAISrcV2F16() const {
703     return isAISrcF16() || isAISrcB32();
704   }
705 
706   bool isAISrc_64B64() const {
707     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
708   }
709 
710   bool isAISrc_64F64() const {
711     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
712   }
713 
714   bool isAISrc_128B32() const {
715     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
716   }
717 
718   bool isAISrc_128B16() const {
719     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
720   }
721 
722   bool isAISrc_128V2B16() const {
723     return isAISrc_128B16();
724   }
725 
726   bool isAISrc_128F32() const {
727     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
728   }
729 
730   bool isAISrc_128F16() const {
731     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
732   }
733 
734   bool isAISrc_128V2F16() const {
735     return isAISrc_128F16() || isAISrc_128B32();
736   }
737 
738   bool isVISrc_128F16() const {
739     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
740   }
741 
742   bool isVISrc_128V2F16() const {
743     return isVISrc_128F16() || isVISrc_128B32();
744   }
745 
746   bool isAISrc_256B64() const {
747     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
748   }
749 
750   bool isAISrc_256F64() const {
751     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
752   }
753 
754   bool isAISrc_512B32() const {
755     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
756   }
757 
758   bool isAISrc_512B16() const {
759     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
760   }
761 
762   bool isAISrc_512V2B16() const {
763     return isAISrc_512B16();
764   }
765 
766   bool isAISrc_512F32() const {
767     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
768   }
769 
770   bool isAISrc_512F16() const {
771     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
772   }
773 
774   bool isAISrc_512V2F16() const {
775     return isAISrc_512F16() || isAISrc_512B32();
776   }
777 
778   bool isAISrc_1024B32() const {
779     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
780   }
781 
782   bool isAISrc_1024B16() const {
783     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
784   }
785 
786   bool isAISrc_1024V2B16() const {
787     return isAISrc_1024B16();
788   }
789 
790   bool isAISrc_1024F32() const {
791     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
792   }
793 
794   bool isAISrc_1024F16() const {
795     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
796   }
797 
798   bool isAISrc_1024V2F16() const {
799     return isAISrc_1024F16() || isAISrc_1024B32();
800   }
801 
802   bool isKImmFP32() const {
803     return isLiteralImm(MVT::f32);
804   }
805 
806   bool isKImmFP16() const {
807     return isLiteralImm(MVT::f16);
808   }
809 
810   bool isMem() const override {
811     return false;
812   }
813 
814   bool isExpr() const {
815     return Kind == Expression;
816   }
817 
818   bool isSoppBrTarget() const {
819     return isExpr() || isImm();
820   }
821 
822   bool isSWaitCnt() const;
823   bool isHwreg() const;
824   bool isSendMsg() const;
825   bool isSwizzle() const;
826   bool isSMRDOffset8() const;
827   bool isSMEMOffset() const;
828   bool isSMRDLiteralOffset() const;
829   bool isDPP8() const;
830   bool isDPPCtrl() const;
831   bool isBLGP() const;
832   bool isCBSZ() const;
833   bool isABID() const;
834   bool isGPRIdxMode() const;
835   bool isS16Imm() const;
836   bool isU16Imm() const;
837   bool isEndpgm() const;
838 
839   StringRef getExpressionAsToken() const {
840     assert(isExpr());
841     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
842     return S->getSymbol().getName();
843   }
844 
845   StringRef getToken() const {
846     assert(isToken());
847 
848     if (Kind == Expression)
849       return getExpressionAsToken();
850 
851     return StringRef(Tok.Data, Tok.Length);
852   }
853 
854   int64_t getImm() const {
855     assert(isImm());
856     return Imm.Val;
857   }
858 
859   void setImm(int64_t Val) {
860     assert(isImm());
861     Imm.Val = Val;
862   }
863 
864   ImmTy getImmTy() const {
865     assert(isImm());
866     return Imm.Type;
867   }
868 
869   unsigned getReg() const override {
870     assert(isRegKind());
871     return Reg.RegNo;
872   }
873 
874   SMLoc getStartLoc() const override {
875     return StartLoc;
876   }
877 
878   SMLoc getEndLoc() const override {
879     return EndLoc;
880   }
881 
882   SMRange getLocRange() const {
883     return SMRange(StartLoc, EndLoc);
884   }
885 
886   Modifiers getModifiers() const {
887     assert(isRegKind() || isImmTy(ImmTyNone));
888     return isRegKind() ? Reg.Mods : Imm.Mods;
889   }
890 
891   void setModifiers(Modifiers Mods) {
892     assert(isRegKind() || isImmTy(ImmTyNone));
893     if (isRegKind())
894       Reg.Mods = Mods;
895     else
896       Imm.Mods = Mods;
897   }
898 
899   bool hasModifiers() const {
900     return getModifiers().hasModifiers();
901   }
902 
903   bool hasFPModifiers() const {
904     return getModifiers().hasFPModifiers();
905   }
906 
907   bool hasIntModifiers() const {
908     return getModifiers().hasIntModifiers();
909   }
910 
911   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
912 
913   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
914 
915   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
916 
917   template <unsigned Bitwidth>
918   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
919 
920   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
921     addKImmFPOperands<16>(Inst, N);
922   }
923 
924   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
925     addKImmFPOperands<32>(Inst, N);
926   }
927 
928   void addRegOperands(MCInst &Inst, unsigned N) const;
929 
930   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
931     addRegOperands(Inst, N);
932   }
933 
934   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
935     if (isRegKind())
936       addRegOperands(Inst, N);
937     else if (isExpr())
938       Inst.addOperand(MCOperand::createExpr(Expr));
939     else
940       addImmOperands(Inst, N);
941   }
942 
943   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
944     Modifiers Mods = getModifiers();
945     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
946     if (isRegKind()) {
947       addRegOperands(Inst, N);
948     } else {
949       addImmOperands(Inst, N, false);
950     }
951   }
952 
953   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
954     assert(!hasIntModifiers());
955     addRegOrImmWithInputModsOperands(Inst, N);
956   }
957 
958   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
959     assert(!hasFPModifiers());
960     addRegOrImmWithInputModsOperands(Inst, N);
961   }
962 
963   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
964     Modifiers Mods = getModifiers();
965     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
966     assert(isRegKind());
967     addRegOperands(Inst, N);
968   }
969 
970   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
971     assert(!hasIntModifiers());
972     addRegWithInputModsOperands(Inst, N);
973   }
974 
975   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
976     assert(!hasFPModifiers());
977     addRegWithInputModsOperands(Inst, N);
978   }
979 
980   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
981     if (isImm())
982       addImmOperands(Inst, N);
983     else {
984       assert(isExpr());
985       Inst.addOperand(MCOperand::createExpr(Expr));
986     }
987   }
988 
989   static void printImmTy(raw_ostream& OS, ImmTy Type) {
990     switch (Type) {
991     case ImmTyNone: OS << "None"; break;
992     case ImmTyGDS: OS << "GDS"; break;
993     case ImmTyLDS: OS << "LDS"; break;
994     case ImmTyOffen: OS << "Offen"; break;
995     case ImmTyIdxen: OS << "Idxen"; break;
996     case ImmTyAddr64: OS << "Addr64"; break;
997     case ImmTyOffset: OS << "Offset"; break;
998     case ImmTyInstOffset: OS << "InstOffset"; break;
999     case ImmTyOffset0: OS << "Offset0"; break;
1000     case ImmTyOffset1: OS << "Offset1"; break;
1001     case ImmTyDLC: OS << "DLC"; break;
1002     case ImmTySCCB: OS << "SCCB"; break;
1003     case ImmTyGLC: OS << "GLC"; break;
1004     case ImmTySLC: OS << "SLC"; break;
1005     case ImmTySWZ: OS << "SWZ"; break;
1006     case ImmTyTFE: OS << "TFE"; break;
1007     case ImmTyD16: OS << "D16"; break;
1008     case ImmTyFORMAT: OS << "FORMAT"; break;
1009     case ImmTyClampSI: OS << "ClampSI"; break;
1010     case ImmTyOModSI: OS << "OModSI"; break;
1011     case ImmTyDPP8: OS << "DPP8"; break;
1012     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1013     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1014     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1015     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1016     case ImmTyDppFi: OS << "FI"; break;
1017     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1018     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1019     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1020     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1021     case ImmTyDMask: OS << "DMask"; break;
1022     case ImmTyDim: OS << "Dim"; break;
1023     case ImmTyUNorm: OS << "UNorm"; break;
1024     case ImmTyDA: OS << "DA"; break;
1025     case ImmTyR128A16: OS << "R128A16"; break;
1026     case ImmTyA16: OS << "A16"; break;
1027     case ImmTyLWE: OS << "LWE"; break;
1028     case ImmTyOff: OS << "Off"; break;
1029     case ImmTyExpTgt: OS << "ExpTgt"; break;
1030     case ImmTyExpCompr: OS << "ExpCompr"; break;
1031     case ImmTyExpVM: OS << "ExpVM"; break;
1032     case ImmTyHwreg: OS << "Hwreg"; break;
1033     case ImmTySendMsg: OS << "SendMsg"; break;
1034     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1035     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1036     case ImmTyAttrChan: OS << "AttrChan"; break;
1037     case ImmTyOpSel: OS << "OpSel"; break;
1038     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1039     case ImmTyNegLo: OS << "NegLo"; break;
1040     case ImmTyNegHi: OS << "NegHi"; break;
1041     case ImmTySwizzle: OS << "Swizzle"; break;
1042     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1043     case ImmTyHigh: OS << "High"; break;
1044     case ImmTyBLGP: OS << "BLGP"; break;
1045     case ImmTyCBSZ: OS << "CBSZ"; break;
1046     case ImmTyABID: OS << "ABID"; break;
1047     case ImmTyEndpgm: OS << "Endpgm"; break;
1048     }
1049   }
1050 
1051   void print(raw_ostream &OS) const override {
1052     switch (Kind) {
1053     case Register:
1054       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1055       break;
1056     case Immediate:
1057       OS << '<' << getImm();
1058       if (getImmTy() != ImmTyNone) {
1059         OS << " type: "; printImmTy(OS, getImmTy());
1060       }
1061       OS << " mods: " << Imm.Mods << '>';
1062       break;
1063     case Token:
1064       OS << '\'' << getToken() << '\'';
1065       break;
1066     case Expression:
1067       OS << "<expr " << *Expr << '>';
1068       break;
1069     }
1070   }
1071 
1072   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1073                                       int64_t Val, SMLoc Loc,
1074                                       ImmTy Type = ImmTyNone,
1075                                       bool IsFPImm = false) {
1076     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1077     Op->Imm.Val = Val;
1078     Op->Imm.IsFPImm = IsFPImm;
1079     Op->Imm.Kind = ImmKindTyNone;
1080     Op->Imm.Type = Type;
1081     Op->Imm.Mods = Modifiers();
1082     Op->StartLoc = Loc;
1083     Op->EndLoc = Loc;
1084     return Op;
1085   }
1086 
1087   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1088                                         StringRef Str, SMLoc Loc,
1089                                         bool HasExplicitEncodingSize = true) {
1090     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1091     Res->Tok.Data = Str.data();
1092     Res->Tok.Length = Str.size();
1093     Res->StartLoc = Loc;
1094     Res->EndLoc = Loc;
1095     return Res;
1096   }
1097 
1098   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1099                                       unsigned RegNo, SMLoc S,
1100                                       SMLoc E) {
1101     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1102     Op->Reg.RegNo = RegNo;
1103     Op->Reg.Mods = Modifiers();
1104     Op->StartLoc = S;
1105     Op->EndLoc = E;
1106     return Op;
1107   }
1108 
1109   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1110                                        const class MCExpr *Expr, SMLoc S) {
1111     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1112     Op->Expr = Expr;
1113     Op->StartLoc = S;
1114     Op->EndLoc = S;
1115     return Op;
1116   }
1117 };
1118 
1119 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1120   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1121   return OS;
1122 }
1123 
1124 //===----------------------------------------------------------------------===//
1125 // AsmParser
1126 //===----------------------------------------------------------------------===//
1127 
1128 // Holds info related to the current kernel, e.g. count of SGPRs used.
1129 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1130 // .amdgpu_hsa_kernel or at EOF.
1131 class KernelScopeInfo {
1132   int SgprIndexUnusedMin = -1;
1133   int VgprIndexUnusedMin = -1;
1134   MCContext *Ctx = nullptr;
1135 
1136   void usesSgprAt(int i) {
1137     if (i >= SgprIndexUnusedMin) {
1138       SgprIndexUnusedMin = ++i;
1139       if (Ctx) {
1140         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1141         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1142       }
1143     }
1144   }
1145 
1146   void usesVgprAt(int i) {
1147     if (i >= VgprIndexUnusedMin) {
1148       VgprIndexUnusedMin = ++i;
1149       if (Ctx) {
1150         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1151         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1152       }
1153     }
1154   }
1155 
1156 public:
1157   KernelScopeInfo() = default;
1158 
1159   void initialize(MCContext &Context) {
1160     Ctx = &Context;
1161     usesSgprAt(SgprIndexUnusedMin = -1);
1162     usesVgprAt(VgprIndexUnusedMin = -1);
1163   }
1164 
1165   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1166     switch (RegKind) {
1167       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1168       case IS_AGPR: // fall through
1169       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1170       default: break;
1171     }
1172   }
1173 };
1174 
1175 class AMDGPUAsmParser : public MCTargetAsmParser {
1176   MCAsmParser &Parser;
1177 
1178   // Number of extra operands parsed after the first optional operand.
1179   // This may be necessary to skip hardcoded mandatory operands.
1180   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1181 
1182   unsigned ForcedEncodingSize = 0;
1183   bool ForcedDPP = false;
1184   bool ForcedSDWA = false;
1185   KernelScopeInfo KernelScope;
1186 
1187   /// @name Auto-generated Match Functions
1188   /// {
1189 
1190 #define GET_ASSEMBLER_HEADER
1191 #include "AMDGPUGenAsmMatcher.inc"
1192 
1193   /// }
1194 
1195 private:
1196   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1197   bool OutOfRangeError(SMRange Range);
1198   /// Calculate VGPR/SGPR blocks required for given target, reserved
1199   /// registers, and user-specified NextFreeXGPR values.
1200   ///
1201   /// \param Features [in] Target features, used for bug corrections.
1202   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1203   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1204   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1205   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1206   /// descriptor field, if valid.
1207   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1208   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1209   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1210   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1211   /// \param VGPRBlocks [out] Result VGPR block count.
1212   /// \param SGPRBlocks [out] Result SGPR block count.
1213   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1214                           bool FlatScrUsed, bool XNACKUsed,
1215                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1216                           SMRange VGPRRange, unsigned NextFreeSGPR,
1217                           SMRange SGPRRange, unsigned &VGPRBlocks,
1218                           unsigned &SGPRBlocks);
1219   bool ParseDirectiveAMDGCNTarget();
1220   bool ParseDirectiveAMDHSAKernel();
1221   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1222   bool ParseDirectiveHSACodeObjectVersion();
1223   bool ParseDirectiveHSACodeObjectISA();
1224   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1225   bool ParseDirectiveAMDKernelCodeT();
1226   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1227   bool ParseDirectiveAMDGPUHsaKernel();
1228 
1229   bool ParseDirectiveISAVersion();
1230   bool ParseDirectiveHSAMetadata();
1231   bool ParseDirectivePALMetadataBegin();
1232   bool ParseDirectivePALMetadata();
1233   bool ParseDirectiveAMDGPULDS();
1234 
1235   /// Common code to parse out a block of text (typically YAML) between start and
1236   /// end directives.
1237   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1238                            const char *AssemblerDirectiveEnd,
1239                            std::string &CollectString);
1240 
1241   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1242                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1243   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1244                            unsigned &RegNum, unsigned &RegWidth,
1245                            bool RestoreOnFailure = false);
1246   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1247                            unsigned &RegNum, unsigned &RegWidth,
1248                            SmallVectorImpl<AsmToken> &Tokens);
1249   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1250                            unsigned &RegWidth,
1251                            SmallVectorImpl<AsmToken> &Tokens);
1252   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1253                            unsigned &RegWidth,
1254                            SmallVectorImpl<AsmToken> &Tokens);
1255   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1256                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1257   bool ParseRegRange(unsigned& Num, unsigned& Width);
1258   unsigned getRegularReg(RegisterKind RegKind,
1259                          unsigned RegNum,
1260                          unsigned RegWidth,
1261                          SMLoc Loc);
1262 
1263   bool isRegister();
1264   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1265   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1266   void initializeGprCountSymbol(RegisterKind RegKind);
1267   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1268                              unsigned RegWidth);
1269   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1270                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1271   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1272                  bool IsGdsHardcoded);
1273 
1274 public:
1275   enum AMDGPUMatchResultTy {
1276     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1277   };
1278   enum OperandMode {
1279     OperandMode_Default,
1280     OperandMode_NSA,
1281   };
1282 
1283   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1284 
1285   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1286                const MCInstrInfo &MII,
1287                const MCTargetOptions &Options)
1288       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1289     MCAsmParserExtension::Initialize(Parser);
1290 
1291     if (getFeatureBits().none()) {
1292       // Set default features.
1293       copySTI().ToggleFeature("southern-islands");
1294     }
1295 
1296     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1297 
1298     {
1299       // TODO: make those pre-defined variables read-only.
1300       // Currently there is none suitable machinery in the core llvm-mc for this.
1301       // MCSymbol::isRedefinable is intended for another purpose, and
1302       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1303       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1304       MCContext &Ctx = getContext();
1305       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1306         MCSymbol *Sym =
1307             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1308         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1309         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1310         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1311         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1312         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1313       } else {
1314         MCSymbol *Sym =
1315             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1316         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1317         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1318         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1319         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1320         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1321       }
1322       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1323         initializeGprCountSymbol(IS_VGPR);
1324         initializeGprCountSymbol(IS_SGPR);
1325       } else
1326         KernelScope.initialize(getContext());
1327     }
1328   }
1329 
1330   bool hasXNACK() const {
1331     return AMDGPU::hasXNACK(getSTI());
1332   }
1333 
1334   bool hasMIMG_R128() const {
1335     return AMDGPU::hasMIMG_R128(getSTI());
1336   }
1337 
1338   bool hasPackedD16() const {
1339     return AMDGPU::hasPackedD16(getSTI());
1340   }
1341 
1342   bool hasGFX10A16() const {
1343     return AMDGPU::hasGFX10A16(getSTI());
1344   }
1345 
1346   bool isSI() const {
1347     return AMDGPU::isSI(getSTI());
1348   }
1349 
1350   bool isCI() const {
1351     return AMDGPU::isCI(getSTI());
1352   }
1353 
1354   bool isVI() const {
1355     return AMDGPU::isVI(getSTI());
1356   }
1357 
1358   bool isGFX9() const {
1359     return AMDGPU::isGFX9(getSTI());
1360   }
1361 
1362   bool isGFX90A() const {
1363     return AMDGPU::isGFX90A(getSTI());
1364   }
1365 
1366   bool isGFX9Plus() const {
1367     return AMDGPU::isGFX9Plus(getSTI());
1368   }
1369 
1370   bool isGFX10() const {
1371     return AMDGPU::isGFX10(getSTI());
1372   }
1373 
1374   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1375 
1376   bool isGFX10_BEncoding() const {
1377     return AMDGPU::isGFX10_BEncoding(getSTI());
1378   }
1379 
1380   bool hasInv2PiInlineImm() const {
1381     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1382   }
1383 
1384   bool hasFlatOffsets() const {
1385     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1386   }
1387 
1388   bool hasSGPR102_SGPR103() const {
1389     return !isVI() && !isGFX9();
1390   }
1391 
1392   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1393 
1394   bool hasIntClamp() const {
1395     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1396   }
1397 
1398   AMDGPUTargetStreamer &getTargetStreamer() {
1399     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1400     return static_cast<AMDGPUTargetStreamer &>(TS);
1401   }
1402 
1403   const MCRegisterInfo *getMRI() const {
1404     // We need this const_cast because for some reason getContext() is not const
1405     // in MCAsmParser.
1406     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1407   }
1408 
1409   const MCInstrInfo *getMII() const {
1410     return &MII;
1411   }
1412 
1413   const FeatureBitset &getFeatureBits() const {
1414     return getSTI().getFeatureBits();
1415   }
1416 
1417   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1418   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1419   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1420 
1421   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1422   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1423   bool isForcedDPP() const { return ForcedDPP; }
1424   bool isForcedSDWA() const { return ForcedSDWA; }
1425   ArrayRef<unsigned> getMatchedVariants() const;
1426   StringRef getMatchedVariantName() const;
1427 
1428   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1429   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1430                      bool RestoreOnFailure);
1431   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1432   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1433                                         SMLoc &EndLoc) override;
1434   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1435   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1436                                       unsigned Kind) override;
1437   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1438                                OperandVector &Operands, MCStreamer &Out,
1439                                uint64_t &ErrorInfo,
1440                                bool MatchingInlineAsm) override;
1441   bool ParseDirective(AsmToken DirectiveID) override;
1442   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1443                                     OperandMode Mode = OperandMode_Default);
1444   StringRef parseMnemonicSuffix(StringRef Name);
1445   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1446                         SMLoc NameLoc, OperandVector &Operands) override;
1447   //bool ProcessInstruction(MCInst &Inst);
1448 
1449   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1450 
1451   OperandMatchResultTy
1452   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1453                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1454                      bool (*ConvertResult)(int64_t &) = nullptr);
1455 
1456   OperandMatchResultTy
1457   parseOperandArrayWithPrefix(const char *Prefix,
1458                               OperandVector &Operands,
1459                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1460                               bool (*ConvertResult)(int64_t&) = nullptr);
1461 
1462   OperandMatchResultTy
1463   parseNamedBit(StringRef Name, OperandVector &Operands,
1464                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1465   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1466                                              StringRef &Value,
1467                                              SMLoc &StringLoc);
1468 
1469   bool isModifier();
1470   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1471   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1472   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1473   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1474   bool parseSP3NegModifier();
1475   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1476   OperandMatchResultTy parseReg(OperandVector &Operands);
1477   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1478   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1479   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1480   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1481   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1482   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1483   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1484   OperandMatchResultTy parseUfmt(int64_t &Format);
1485   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1486   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1487   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1488   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1489   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1490   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1491   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1492 
1493   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1494   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1495   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1496   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1497 
1498   bool parseCnt(int64_t &IntVal);
1499   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1500   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1501 
1502 private:
1503   struct OperandInfoTy {
1504     SMLoc Loc;
1505     int64_t Id;
1506     bool IsSymbolic = false;
1507     bool IsDefined = false;
1508 
1509     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1510   };
1511 
1512   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1513   bool validateSendMsg(const OperandInfoTy &Msg,
1514                        const OperandInfoTy &Op,
1515                        const OperandInfoTy &Stream);
1516 
1517   bool parseHwregBody(OperandInfoTy &HwReg,
1518                       OperandInfoTy &Offset,
1519                       OperandInfoTy &Width);
1520   bool validateHwreg(const OperandInfoTy &HwReg,
1521                      const OperandInfoTy &Offset,
1522                      const OperandInfoTy &Width);
1523 
1524   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1525   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1526 
1527   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1528                       const OperandVector &Operands) const;
1529   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1530   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1531   SMLoc getLitLoc(const OperandVector &Operands) const;
1532   SMLoc getConstLoc(const OperandVector &Operands) const;
1533 
1534   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1535   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1536   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1537   bool validateSOPLiteral(const MCInst &Inst) const;
1538   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1539   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1540   bool validateIntClampSupported(const MCInst &Inst);
1541   bool validateMIMGAtomicDMask(const MCInst &Inst);
1542   bool validateMIMGGatherDMask(const MCInst &Inst);
1543   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1544   bool validateMIMGDataSize(const MCInst &Inst);
1545   bool validateMIMGAddrSize(const MCInst &Inst);
1546   bool validateMIMGD16(const MCInst &Inst);
1547   bool validateMIMGDim(const MCInst &Inst);
1548   bool validateLdsDirect(const MCInst &Inst);
1549   bool validateOpSel(const MCInst &Inst);
1550   bool validateVccOperand(unsigned Reg) const;
1551   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1552   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1553   bool validateAGPRLdSt(const MCInst &Inst) const;
1554   bool validateVGPRAlign(const MCInst &Inst) const;
1555   bool validateDivScale(const MCInst &Inst);
1556   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1557                              const SMLoc &IDLoc);
1558   unsigned getConstantBusLimit(unsigned Opcode) const;
1559   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1560   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1561   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1562 
1563   bool isSupportedMnemo(StringRef Mnemo,
1564                         const FeatureBitset &FBS);
1565   bool isSupportedMnemo(StringRef Mnemo,
1566                         const FeatureBitset &FBS,
1567                         ArrayRef<unsigned> Variants);
1568   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1569 
1570   bool isId(const StringRef Id) const;
1571   bool isId(const AsmToken &Token, const StringRef Id) const;
1572   bool isToken(const AsmToken::TokenKind Kind) const;
1573   bool trySkipId(const StringRef Id);
1574   bool trySkipId(const StringRef Pref, const StringRef Id);
1575   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1576   bool trySkipToken(const AsmToken::TokenKind Kind);
1577   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1578   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1579   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1580 
1581   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1582   AsmToken::TokenKind getTokenKind() const;
1583   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1584   bool parseExpr(OperandVector &Operands);
1585   StringRef getTokenStr() const;
1586   AsmToken peekToken();
1587   AsmToken getToken() const;
1588   SMLoc getLoc() const;
1589   void lex();
1590 
1591 public:
1592   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1593   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1594 
1595   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1596   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1597   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1598   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1599   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1600   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1601 
1602   bool parseSwizzleOperand(int64_t &Op,
1603                            const unsigned MinVal,
1604                            const unsigned MaxVal,
1605                            const StringRef ErrMsg,
1606                            SMLoc &Loc);
1607   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1608                             const unsigned MinVal,
1609                             const unsigned MaxVal,
1610                             const StringRef ErrMsg);
1611   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1612   bool parseSwizzleOffset(int64_t &Imm);
1613   bool parseSwizzleMacro(int64_t &Imm);
1614   bool parseSwizzleQuadPerm(int64_t &Imm);
1615   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1616   bool parseSwizzleBroadcast(int64_t &Imm);
1617   bool parseSwizzleSwap(int64_t &Imm);
1618   bool parseSwizzleReverse(int64_t &Imm);
1619 
1620   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1621   int64_t parseGPRIdxMacro();
1622 
1623   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1624   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1625   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1626   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1627   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1628 
1629   AMDGPUOperand::Ptr defaultDLC() const;
1630   AMDGPUOperand::Ptr defaultSCCB() const;
1631   AMDGPUOperand::Ptr defaultGLC() const;
1632   AMDGPUOperand::Ptr defaultGLC_1() const;
1633   AMDGPUOperand::Ptr defaultSLC() const;
1634 
1635   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1636   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1637   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1638   AMDGPUOperand::Ptr defaultFlatOffset() const;
1639 
1640   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1641 
1642   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1643                OptionalImmIndexMap &OptionalIdx);
1644   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1645   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1646   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1647 
1648   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1649 
1650   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1651                bool IsAtomic = false);
1652   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1653   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1654 
1655   bool parseDimId(unsigned &Encoding);
1656   OperandMatchResultTy parseDim(OperandVector &Operands);
1657   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1658   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1659   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1660   int64_t parseDPPCtrlSel(StringRef Ctrl);
1661   int64_t parseDPPCtrlPerm();
1662   AMDGPUOperand::Ptr defaultRowMask() const;
1663   AMDGPUOperand::Ptr defaultBankMask() const;
1664   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1665   AMDGPUOperand::Ptr defaultFI() const;
1666   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1667   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1668 
1669   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1670                                     AMDGPUOperand::ImmTy Type);
1671   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1672   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1673   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1674   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1675   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1676   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1677   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1678                uint64_t BasicInstType,
1679                bool SkipDstVcc = false,
1680                bool SkipSrcVcc = false);
1681 
1682   AMDGPUOperand::Ptr defaultBLGP() const;
1683   AMDGPUOperand::Ptr defaultCBSZ() const;
1684   AMDGPUOperand::Ptr defaultABID() const;
1685 
1686   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1687   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1688 };
1689 
1690 struct OptionalOperand {
1691   const char *Name;
1692   AMDGPUOperand::ImmTy Type;
1693   bool IsBit;
1694   bool (*ConvertResult)(int64_t&);
1695 };
1696 
1697 } // end anonymous namespace
1698 
1699 // May be called with integer type with equivalent bitwidth.
1700 static const fltSemantics *getFltSemantics(unsigned Size) {
1701   switch (Size) {
1702   case 4:
1703     return &APFloat::IEEEsingle();
1704   case 8:
1705     return &APFloat::IEEEdouble();
1706   case 2:
1707     return &APFloat::IEEEhalf();
1708   default:
1709     llvm_unreachable("unsupported fp type");
1710   }
1711 }
1712 
1713 static const fltSemantics *getFltSemantics(MVT VT) {
1714   return getFltSemantics(VT.getSizeInBits() / 8);
1715 }
1716 
1717 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1718   switch (OperandType) {
1719   case AMDGPU::OPERAND_REG_IMM_INT32:
1720   case AMDGPU::OPERAND_REG_IMM_FP32:
1721   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1722   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1723   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1724   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1725   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1726   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1727   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1728   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1729     return &APFloat::IEEEsingle();
1730   case AMDGPU::OPERAND_REG_IMM_INT64:
1731   case AMDGPU::OPERAND_REG_IMM_FP64:
1732   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1733   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1734   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1735     return &APFloat::IEEEdouble();
1736   case AMDGPU::OPERAND_REG_IMM_INT16:
1737   case AMDGPU::OPERAND_REG_IMM_FP16:
1738   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1739   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1740   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1741   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1742   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1743   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1744   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1745   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1746   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1747   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1748     return &APFloat::IEEEhalf();
1749   default:
1750     llvm_unreachable("unsupported fp type");
1751   }
1752 }
1753 
1754 //===----------------------------------------------------------------------===//
1755 // Operand
1756 //===----------------------------------------------------------------------===//
1757 
1758 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1759   bool Lost;
1760 
1761   // Convert literal to single precision
1762   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1763                                                APFloat::rmNearestTiesToEven,
1764                                                &Lost);
1765   // We allow precision lost but not overflow or underflow
1766   if (Status != APFloat::opOK &&
1767       Lost &&
1768       ((Status & APFloat::opOverflow)  != 0 ||
1769        (Status & APFloat::opUnderflow) != 0)) {
1770     return false;
1771   }
1772 
1773   return true;
1774 }
1775 
1776 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1777   return isUIntN(Size, Val) || isIntN(Size, Val);
1778 }
1779 
1780 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1781   if (VT.getScalarType() == MVT::i16) {
1782     // FP immediate values are broken.
1783     return isInlinableIntLiteral(Val);
1784   }
1785 
1786   // f16/v2f16 operands work correctly for all values.
1787   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1788 }
1789 
1790 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1791 
1792   // This is a hack to enable named inline values like
1793   // shared_base with both 32-bit and 64-bit operands.
1794   // Note that these values are defined as
1795   // 32-bit operands only.
1796   if (isInlineValue()) {
1797     return true;
1798   }
1799 
1800   if (!isImmTy(ImmTyNone)) {
1801     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1802     return false;
1803   }
1804   // TODO: We should avoid using host float here. It would be better to
1805   // check the float bit values which is what a few other places do.
1806   // We've had bot failures before due to weird NaN support on mips hosts.
1807 
1808   APInt Literal(64, Imm.Val);
1809 
1810   if (Imm.IsFPImm) { // We got fp literal token
1811     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1812       return AMDGPU::isInlinableLiteral64(Imm.Val,
1813                                           AsmParser->hasInv2PiInlineImm());
1814     }
1815 
1816     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1817     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1818       return false;
1819 
1820     if (type.getScalarSizeInBits() == 16) {
1821       return isInlineableLiteralOp16(
1822         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1823         type, AsmParser->hasInv2PiInlineImm());
1824     }
1825 
1826     // Check if single precision literal is inlinable
1827     return AMDGPU::isInlinableLiteral32(
1828       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1829       AsmParser->hasInv2PiInlineImm());
1830   }
1831 
1832   // We got int literal token.
1833   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1834     return AMDGPU::isInlinableLiteral64(Imm.Val,
1835                                         AsmParser->hasInv2PiInlineImm());
1836   }
1837 
1838   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1839     return false;
1840   }
1841 
1842   if (type.getScalarSizeInBits() == 16) {
1843     return isInlineableLiteralOp16(
1844       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1845       type, AsmParser->hasInv2PiInlineImm());
1846   }
1847 
1848   return AMDGPU::isInlinableLiteral32(
1849     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1850     AsmParser->hasInv2PiInlineImm());
1851 }
1852 
1853 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1854   // Check that this immediate can be added as literal
1855   if (!isImmTy(ImmTyNone)) {
1856     return false;
1857   }
1858 
1859   if (!Imm.IsFPImm) {
1860     // We got int literal token.
1861 
1862     if (type == MVT::f64 && hasFPModifiers()) {
1863       // Cannot apply fp modifiers to int literals preserving the same semantics
1864       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1865       // disable these cases.
1866       return false;
1867     }
1868 
1869     unsigned Size = type.getSizeInBits();
1870     if (Size == 64)
1871       Size = 32;
1872 
1873     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1874     // types.
1875     return isSafeTruncation(Imm.Val, Size);
1876   }
1877 
1878   // We got fp literal token
1879   if (type == MVT::f64) { // Expected 64-bit fp operand
1880     // We would set low 64-bits of literal to zeroes but we accept this literals
1881     return true;
1882   }
1883 
1884   if (type == MVT::i64) { // Expected 64-bit int operand
1885     // We don't allow fp literals in 64-bit integer instructions. It is
1886     // unclear how we should encode them.
1887     return false;
1888   }
1889 
1890   // We allow fp literals with f16x2 operands assuming that the specified
1891   // literal goes into the lower half and the upper half is zero. We also
1892   // require that the literal may be losslesly converted to f16.
1893   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1894                      (type == MVT::v2i16)? MVT::i16 :
1895                      (type == MVT::v2f32)? MVT::f32 : type;
1896 
1897   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1898   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1899 }
1900 
1901 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1902   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1903 }
1904 
1905 bool AMDGPUOperand::isVRegWithInputMods() const {
1906   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1907          // GFX90A allows DPP on 64-bit operands.
1908          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1909           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1910 }
1911 
1912 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1913   if (AsmParser->isVI())
1914     return isVReg32();
1915   else if (AsmParser->isGFX9Plus())
1916     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1917   else
1918     return false;
1919 }
1920 
1921 bool AMDGPUOperand::isSDWAFP16Operand() const {
1922   return isSDWAOperand(MVT::f16);
1923 }
1924 
1925 bool AMDGPUOperand::isSDWAFP32Operand() const {
1926   return isSDWAOperand(MVT::f32);
1927 }
1928 
1929 bool AMDGPUOperand::isSDWAInt16Operand() const {
1930   return isSDWAOperand(MVT::i16);
1931 }
1932 
1933 bool AMDGPUOperand::isSDWAInt32Operand() const {
1934   return isSDWAOperand(MVT::i32);
1935 }
1936 
1937 bool AMDGPUOperand::isBoolReg() const {
1938   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1939          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1940 }
1941 
1942 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1943 {
1944   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1945   assert(Size == 2 || Size == 4 || Size == 8);
1946 
1947   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1948 
1949   if (Imm.Mods.Abs) {
1950     Val &= ~FpSignMask;
1951   }
1952   if (Imm.Mods.Neg) {
1953     Val ^= FpSignMask;
1954   }
1955 
1956   return Val;
1957 }
1958 
1959 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1960   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1961                              Inst.getNumOperands())) {
1962     addLiteralImmOperand(Inst, Imm.Val,
1963                          ApplyModifiers &
1964                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1965   } else {
1966     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1967     Inst.addOperand(MCOperand::createImm(Imm.Val));
1968     setImmKindNone();
1969   }
1970 }
1971 
1972 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1973   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1974   auto OpNum = Inst.getNumOperands();
1975   // Check that this operand accepts literals
1976   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1977 
1978   if (ApplyModifiers) {
1979     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1980     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1981     Val = applyInputFPModifiers(Val, Size);
1982   }
1983 
1984   APInt Literal(64, Val);
1985   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1986 
1987   if (Imm.IsFPImm) { // We got fp literal token
1988     switch (OpTy) {
1989     case AMDGPU::OPERAND_REG_IMM_INT64:
1990     case AMDGPU::OPERAND_REG_IMM_FP64:
1991     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1992     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1993     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1994       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1995                                        AsmParser->hasInv2PiInlineImm())) {
1996         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1997         setImmKindConst();
1998         return;
1999       }
2000 
2001       // Non-inlineable
2002       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2003         // For fp operands we check if low 32 bits are zeros
2004         if (Literal.getLoBits(32) != 0) {
2005           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2006           "Can't encode literal as exact 64-bit floating-point operand. "
2007           "Low 32-bits will be set to zero");
2008         }
2009 
2010         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2011         setImmKindLiteral();
2012         return;
2013       }
2014 
2015       // We don't allow fp literals in 64-bit integer instructions. It is
2016       // unclear how we should encode them. This case should be checked earlier
2017       // in predicate methods (isLiteralImm())
2018       llvm_unreachable("fp literal in 64-bit integer instruction.");
2019 
2020     case AMDGPU::OPERAND_REG_IMM_INT32:
2021     case AMDGPU::OPERAND_REG_IMM_FP32:
2022     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2023     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2024     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2025     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2026     case AMDGPU::OPERAND_REG_IMM_INT16:
2027     case AMDGPU::OPERAND_REG_IMM_FP16:
2028     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2029     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2030     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2031     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2032     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2033     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2034     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2035     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2036     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2037     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2038     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2039     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2040     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2041     case AMDGPU::OPERAND_REG_IMM_V2INT32: {
2042       bool lost;
2043       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2044       // Convert literal to single precision
2045       FPLiteral.convert(*getOpFltSemantics(OpTy),
2046                         APFloat::rmNearestTiesToEven, &lost);
2047       // We allow precision lost but not overflow or underflow. This should be
2048       // checked earlier in isLiteralImm()
2049 
2050       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2051       Inst.addOperand(MCOperand::createImm(ImmVal));
2052       setImmKindLiteral();
2053       return;
2054     }
2055     default:
2056       llvm_unreachable("invalid operand size");
2057     }
2058 
2059     return;
2060   }
2061 
2062   // We got int literal token.
2063   // Only sign extend inline immediates.
2064   switch (OpTy) {
2065   case AMDGPU::OPERAND_REG_IMM_INT32:
2066   case AMDGPU::OPERAND_REG_IMM_FP32:
2067   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2068   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2069   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2070   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2071   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2072   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2073   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2074   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2075   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2076   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2077     if (isSafeTruncation(Val, 32) &&
2078         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2079                                      AsmParser->hasInv2PiInlineImm())) {
2080       Inst.addOperand(MCOperand::createImm(Val));
2081       setImmKindConst();
2082       return;
2083     }
2084 
2085     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2086     setImmKindLiteral();
2087     return;
2088 
2089   case AMDGPU::OPERAND_REG_IMM_INT64:
2090   case AMDGPU::OPERAND_REG_IMM_FP64:
2091   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2092   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2093   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2094     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2095       Inst.addOperand(MCOperand::createImm(Val));
2096       setImmKindConst();
2097       return;
2098     }
2099 
2100     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2101     setImmKindLiteral();
2102     return;
2103 
2104   case AMDGPU::OPERAND_REG_IMM_INT16:
2105   case AMDGPU::OPERAND_REG_IMM_FP16:
2106   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2107   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2108   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2109   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2110     if (isSafeTruncation(Val, 16) &&
2111         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2112                                      AsmParser->hasInv2PiInlineImm())) {
2113       Inst.addOperand(MCOperand::createImm(Val));
2114       setImmKindConst();
2115       return;
2116     }
2117 
2118     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2119     setImmKindLiteral();
2120     return;
2121 
2122   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2123   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2124   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2125   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2126     assert(isSafeTruncation(Val, 16));
2127     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2128                                         AsmParser->hasInv2PiInlineImm()));
2129 
2130     Inst.addOperand(MCOperand::createImm(Val));
2131     return;
2132   }
2133   default:
2134     llvm_unreachable("invalid operand size");
2135   }
2136 }
2137 
2138 template <unsigned Bitwidth>
2139 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2140   APInt Literal(64, Imm.Val);
2141   setImmKindNone();
2142 
2143   if (!Imm.IsFPImm) {
2144     // We got int literal token.
2145     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2146     return;
2147   }
2148 
2149   bool Lost;
2150   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2151   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2152                     APFloat::rmNearestTiesToEven, &Lost);
2153   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2154 }
2155 
2156 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2157   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2158 }
2159 
2160 static bool isInlineValue(unsigned Reg) {
2161   switch (Reg) {
2162   case AMDGPU::SRC_SHARED_BASE:
2163   case AMDGPU::SRC_SHARED_LIMIT:
2164   case AMDGPU::SRC_PRIVATE_BASE:
2165   case AMDGPU::SRC_PRIVATE_LIMIT:
2166   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2167     return true;
2168   case AMDGPU::SRC_VCCZ:
2169   case AMDGPU::SRC_EXECZ:
2170   case AMDGPU::SRC_SCC:
2171     return true;
2172   case AMDGPU::SGPR_NULL:
2173     return true;
2174   default:
2175     return false;
2176   }
2177 }
2178 
2179 bool AMDGPUOperand::isInlineValue() const {
2180   return isRegKind() && ::isInlineValue(getReg());
2181 }
2182 
2183 //===----------------------------------------------------------------------===//
2184 // AsmParser
2185 //===----------------------------------------------------------------------===//
2186 
2187 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2188   if (Is == IS_VGPR) {
2189     switch (RegWidth) {
2190       default: return -1;
2191       case 1: return AMDGPU::VGPR_32RegClassID;
2192       case 2: return AMDGPU::VReg_64RegClassID;
2193       case 3: return AMDGPU::VReg_96RegClassID;
2194       case 4: return AMDGPU::VReg_128RegClassID;
2195       case 5: return AMDGPU::VReg_160RegClassID;
2196       case 6: return AMDGPU::VReg_192RegClassID;
2197       case 8: return AMDGPU::VReg_256RegClassID;
2198       case 16: return AMDGPU::VReg_512RegClassID;
2199       case 32: return AMDGPU::VReg_1024RegClassID;
2200     }
2201   } else if (Is == IS_TTMP) {
2202     switch (RegWidth) {
2203       default: return -1;
2204       case 1: return AMDGPU::TTMP_32RegClassID;
2205       case 2: return AMDGPU::TTMP_64RegClassID;
2206       case 4: return AMDGPU::TTMP_128RegClassID;
2207       case 8: return AMDGPU::TTMP_256RegClassID;
2208       case 16: return AMDGPU::TTMP_512RegClassID;
2209     }
2210   } else if (Is == IS_SGPR) {
2211     switch (RegWidth) {
2212       default: return -1;
2213       case 1: return AMDGPU::SGPR_32RegClassID;
2214       case 2: return AMDGPU::SGPR_64RegClassID;
2215       case 3: return AMDGPU::SGPR_96RegClassID;
2216       case 4: return AMDGPU::SGPR_128RegClassID;
2217       case 5: return AMDGPU::SGPR_160RegClassID;
2218       case 6: return AMDGPU::SGPR_192RegClassID;
2219       case 8: return AMDGPU::SGPR_256RegClassID;
2220       case 16: return AMDGPU::SGPR_512RegClassID;
2221     }
2222   } else if (Is == IS_AGPR) {
2223     switch (RegWidth) {
2224       default: return -1;
2225       case 1: return AMDGPU::AGPR_32RegClassID;
2226       case 2: return AMDGPU::AReg_64RegClassID;
2227       case 3: return AMDGPU::AReg_96RegClassID;
2228       case 4: return AMDGPU::AReg_128RegClassID;
2229       case 5: return AMDGPU::AReg_160RegClassID;
2230       case 6: return AMDGPU::AReg_192RegClassID;
2231       case 8: return AMDGPU::AReg_256RegClassID;
2232       case 16: return AMDGPU::AReg_512RegClassID;
2233       case 32: return AMDGPU::AReg_1024RegClassID;
2234     }
2235   }
2236   return -1;
2237 }
2238 
2239 static unsigned getSpecialRegForName(StringRef RegName) {
2240   return StringSwitch<unsigned>(RegName)
2241     .Case("exec", AMDGPU::EXEC)
2242     .Case("vcc", AMDGPU::VCC)
2243     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2244     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2245     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2246     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2247     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2248     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2249     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2250     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2251     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2252     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2253     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2254     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2255     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2256     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2257     .Case("m0", AMDGPU::M0)
2258     .Case("vccz", AMDGPU::SRC_VCCZ)
2259     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2260     .Case("execz", AMDGPU::SRC_EXECZ)
2261     .Case("src_execz", AMDGPU::SRC_EXECZ)
2262     .Case("scc", AMDGPU::SRC_SCC)
2263     .Case("src_scc", AMDGPU::SRC_SCC)
2264     .Case("tba", AMDGPU::TBA)
2265     .Case("tma", AMDGPU::TMA)
2266     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2267     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2268     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2269     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2270     .Case("vcc_lo", AMDGPU::VCC_LO)
2271     .Case("vcc_hi", AMDGPU::VCC_HI)
2272     .Case("exec_lo", AMDGPU::EXEC_LO)
2273     .Case("exec_hi", AMDGPU::EXEC_HI)
2274     .Case("tma_lo", AMDGPU::TMA_LO)
2275     .Case("tma_hi", AMDGPU::TMA_HI)
2276     .Case("tba_lo", AMDGPU::TBA_LO)
2277     .Case("tba_hi", AMDGPU::TBA_HI)
2278     .Case("pc", AMDGPU::PC_REG)
2279     .Case("null", AMDGPU::SGPR_NULL)
2280     .Default(AMDGPU::NoRegister);
2281 }
2282 
2283 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2284                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2285   auto R = parseRegister();
2286   if (!R) return true;
2287   assert(R->isReg());
2288   RegNo = R->getReg();
2289   StartLoc = R->getStartLoc();
2290   EndLoc = R->getEndLoc();
2291   return false;
2292 }
2293 
2294 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2295                                     SMLoc &EndLoc) {
2296   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2297 }
2298 
2299 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2300                                                        SMLoc &StartLoc,
2301                                                        SMLoc &EndLoc) {
2302   bool Result =
2303       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2304   bool PendingErrors = getParser().hasPendingError();
2305   getParser().clearPendingErrors();
2306   if (PendingErrors)
2307     return MatchOperand_ParseFail;
2308   if (Result)
2309     return MatchOperand_NoMatch;
2310   return MatchOperand_Success;
2311 }
2312 
2313 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2314                                             RegisterKind RegKind, unsigned Reg1,
2315                                             SMLoc Loc) {
2316   switch (RegKind) {
2317   case IS_SPECIAL:
2318     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2319       Reg = AMDGPU::EXEC;
2320       RegWidth = 2;
2321       return true;
2322     }
2323     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2324       Reg = AMDGPU::FLAT_SCR;
2325       RegWidth = 2;
2326       return true;
2327     }
2328     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2329       Reg = AMDGPU::XNACK_MASK;
2330       RegWidth = 2;
2331       return true;
2332     }
2333     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2334       Reg = AMDGPU::VCC;
2335       RegWidth = 2;
2336       return true;
2337     }
2338     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2339       Reg = AMDGPU::TBA;
2340       RegWidth = 2;
2341       return true;
2342     }
2343     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2344       Reg = AMDGPU::TMA;
2345       RegWidth = 2;
2346       return true;
2347     }
2348     Error(Loc, "register does not fit in the list");
2349     return false;
2350   case IS_VGPR:
2351   case IS_SGPR:
2352   case IS_AGPR:
2353   case IS_TTMP:
2354     if (Reg1 != Reg + RegWidth) {
2355       Error(Loc, "registers in a list must have consecutive indices");
2356       return false;
2357     }
2358     RegWidth++;
2359     return true;
2360   default:
2361     llvm_unreachable("unexpected register kind");
2362   }
2363 }
2364 
2365 struct RegInfo {
2366   StringLiteral Name;
2367   RegisterKind Kind;
2368 };
2369 
2370 static constexpr RegInfo RegularRegisters[] = {
2371   {{"v"},    IS_VGPR},
2372   {{"s"},    IS_SGPR},
2373   {{"ttmp"}, IS_TTMP},
2374   {{"acc"},  IS_AGPR},
2375   {{"a"},    IS_AGPR},
2376 };
2377 
2378 static bool isRegularReg(RegisterKind Kind) {
2379   return Kind == IS_VGPR ||
2380          Kind == IS_SGPR ||
2381          Kind == IS_TTMP ||
2382          Kind == IS_AGPR;
2383 }
2384 
2385 static const RegInfo* getRegularRegInfo(StringRef Str) {
2386   for (const RegInfo &Reg : RegularRegisters)
2387     if (Str.startswith(Reg.Name))
2388       return &Reg;
2389   return nullptr;
2390 }
2391 
2392 static bool getRegNum(StringRef Str, unsigned& Num) {
2393   return !Str.getAsInteger(10, Num);
2394 }
2395 
2396 bool
2397 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2398                             const AsmToken &NextToken) const {
2399 
2400   // A list of consecutive registers: [s0,s1,s2,s3]
2401   if (Token.is(AsmToken::LBrac))
2402     return true;
2403 
2404   if (!Token.is(AsmToken::Identifier))
2405     return false;
2406 
2407   // A single register like s0 or a range of registers like s[0:1]
2408 
2409   StringRef Str = Token.getString();
2410   const RegInfo *Reg = getRegularRegInfo(Str);
2411   if (Reg) {
2412     StringRef RegName = Reg->Name;
2413     StringRef RegSuffix = Str.substr(RegName.size());
2414     if (!RegSuffix.empty()) {
2415       unsigned Num;
2416       // A single register with an index: rXX
2417       if (getRegNum(RegSuffix, Num))
2418         return true;
2419     } else {
2420       // A range of registers: r[XX:YY].
2421       if (NextToken.is(AsmToken::LBrac))
2422         return true;
2423     }
2424   }
2425 
2426   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2427 }
2428 
2429 bool
2430 AMDGPUAsmParser::isRegister()
2431 {
2432   return isRegister(getToken(), peekToken());
2433 }
2434 
2435 unsigned
2436 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2437                                unsigned RegNum,
2438                                unsigned RegWidth,
2439                                SMLoc Loc) {
2440 
2441   assert(isRegularReg(RegKind));
2442 
2443   unsigned AlignSize = 1;
2444   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2445     // SGPR and TTMP registers must be aligned.
2446     // Max required alignment is 4 dwords.
2447     AlignSize = std::min(RegWidth, 4u);
2448   }
2449 
2450   if (RegNum % AlignSize != 0) {
2451     Error(Loc, "invalid register alignment");
2452     return AMDGPU::NoRegister;
2453   }
2454 
2455   unsigned RegIdx = RegNum / AlignSize;
2456   int RCID = getRegClass(RegKind, RegWidth);
2457   if (RCID == -1) {
2458     Error(Loc, "invalid or unsupported register size");
2459     return AMDGPU::NoRegister;
2460   }
2461 
2462   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2463   const MCRegisterClass RC = TRI->getRegClass(RCID);
2464   if (RegIdx >= RC.getNumRegs()) {
2465     Error(Loc, "register index is out of range");
2466     return AMDGPU::NoRegister;
2467   }
2468 
2469   return RC.getRegister(RegIdx);
2470 }
2471 
2472 bool
2473 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2474   int64_t RegLo, RegHi;
2475   if (!skipToken(AsmToken::LBrac, "missing register index"))
2476     return false;
2477 
2478   SMLoc FirstIdxLoc = getLoc();
2479   SMLoc SecondIdxLoc;
2480 
2481   if (!parseExpr(RegLo))
2482     return false;
2483 
2484   if (trySkipToken(AsmToken::Colon)) {
2485     SecondIdxLoc = getLoc();
2486     if (!parseExpr(RegHi))
2487       return false;
2488   } else {
2489     RegHi = RegLo;
2490   }
2491 
2492   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2493     return false;
2494 
2495   if (!isUInt<32>(RegLo)) {
2496     Error(FirstIdxLoc, "invalid register index");
2497     return false;
2498   }
2499 
2500   if (!isUInt<32>(RegHi)) {
2501     Error(SecondIdxLoc, "invalid register index");
2502     return false;
2503   }
2504 
2505   if (RegLo > RegHi) {
2506     Error(FirstIdxLoc, "first register index should not exceed second index");
2507     return false;
2508   }
2509 
2510   Num = static_cast<unsigned>(RegLo);
2511   Width = (RegHi - RegLo) + 1;
2512   return true;
2513 }
2514 
2515 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2516                                           unsigned &RegNum, unsigned &RegWidth,
2517                                           SmallVectorImpl<AsmToken> &Tokens) {
2518   assert(isToken(AsmToken::Identifier));
2519   unsigned Reg = getSpecialRegForName(getTokenStr());
2520   if (Reg) {
2521     RegNum = 0;
2522     RegWidth = 1;
2523     RegKind = IS_SPECIAL;
2524     Tokens.push_back(getToken());
2525     lex(); // skip register name
2526   }
2527   return Reg;
2528 }
2529 
2530 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2531                                           unsigned &RegNum, unsigned &RegWidth,
2532                                           SmallVectorImpl<AsmToken> &Tokens) {
2533   assert(isToken(AsmToken::Identifier));
2534   StringRef RegName = getTokenStr();
2535   auto Loc = getLoc();
2536 
2537   const RegInfo *RI = getRegularRegInfo(RegName);
2538   if (!RI) {
2539     Error(Loc, "invalid register name");
2540     return AMDGPU::NoRegister;
2541   }
2542 
2543   Tokens.push_back(getToken());
2544   lex(); // skip register name
2545 
2546   RegKind = RI->Kind;
2547   StringRef RegSuffix = RegName.substr(RI->Name.size());
2548   if (!RegSuffix.empty()) {
2549     // Single 32-bit register: vXX.
2550     if (!getRegNum(RegSuffix, RegNum)) {
2551       Error(Loc, "invalid register index");
2552       return AMDGPU::NoRegister;
2553     }
2554     RegWidth = 1;
2555   } else {
2556     // Range of registers: v[XX:YY]. ":YY" is optional.
2557     if (!ParseRegRange(RegNum, RegWidth))
2558       return AMDGPU::NoRegister;
2559   }
2560 
2561   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2562 }
2563 
2564 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2565                                        unsigned &RegWidth,
2566                                        SmallVectorImpl<AsmToken> &Tokens) {
2567   unsigned Reg = AMDGPU::NoRegister;
2568   auto ListLoc = getLoc();
2569 
2570   if (!skipToken(AsmToken::LBrac,
2571                  "expected a register or a list of registers")) {
2572     return AMDGPU::NoRegister;
2573   }
2574 
2575   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2576 
2577   auto Loc = getLoc();
2578   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2579     return AMDGPU::NoRegister;
2580   if (RegWidth != 1) {
2581     Error(Loc, "expected a single 32-bit register");
2582     return AMDGPU::NoRegister;
2583   }
2584 
2585   for (; trySkipToken(AsmToken::Comma); ) {
2586     RegisterKind NextRegKind;
2587     unsigned NextReg, NextRegNum, NextRegWidth;
2588     Loc = getLoc();
2589 
2590     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2591                              NextRegNum, NextRegWidth,
2592                              Tokens)) {
2593       return AMDGPU::NoRegister;
2594     }
2595     if (NextRegWidth != 1) {
2596       Error(Loc, "expected a single 32-bit register");
2597       return AMDGPU::NoRegister;
2598     }
2599     if (NextRegKind != RegKind) {
2600       Error(Loc, "registers in a list must be of the same kind");
2601       return AMDGPU::NoRegister;
2602     }
2603     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2604       return AMDGPU::NoRegister;
2605   }
2606 
2607   if (!skipToken(AsmToken::RBrac,
2608                  "expected a comma or a closing square bracket")) {
2609     return AMDGPU::NoRegister;
2610   }
2611 
2612   if (isRegularReg(RegKind))
2613     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2614 
2615   return Reg;
2616 }
2617 
2618 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2619                                           unsigned &RegNum, unsigned &RegWidth,
2620                                           SmallVectorImpl<AsmToken> &Tokens) {
2621   auto Loc = getLoc();
2622   Reg = AMDGPU::NoRegister;
2623 
2624   if (isToken(AsmToken::Identifier)) {
2625     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2626     if (Reg == AMDGPU::NoRegister)
2627       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2628   } else {
2629     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2630   }
2631 
2632   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2633   if (Reg == AMDGPU::NoRegister) {
2634     assert(Parser.hasPendingError());
2635     return false;
2636   }
2637 
2638   if (!subtargetHasRegister(*TRI, Reg)) {
2639     if (Reg == AMDGPU::SGPR_NULL) {
2640       Error(Loc, "'null' operand is not supported on this GPU");
2641     } else {
2642       Error(Loc, "register not available on this GPU");
2643     }
2644     return false;
2645   }
2646 
2647   return true;
2648 }
2649 
2650 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2651                                           unsigned &RegNum, unsigned &RegWidth,
2652                                           bool RestoreOnFailure /*=false*/) {
2653   Reg = AMDGPU::NoRegister;
2654 
2655   SmallVector<AsmToken, 1> Tokens;
2656   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2657     if (RestoreOnFailure) {
2658       while (!Tokens.empty()) {
2659         getLexer().UnLex(Tokens.pop_back_val());
2660       }
2661     }
2662     return true;
2663   }
2664   return false;
2665 }
2666 
2667 Optional<StringRef>
2668 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2669   switch (RegKind) {
2670   case IS_VGPR:
2671     return StringRef(".amdgcn.next_free_vgpr");
2672   case IS_SGPR:
2673     return StringRef(".amdgcn.next_free_sgpr");
2674   default:
2675     return None;
2676   }
2677 }
2678 
2679 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2680   auto SymbolName = getGprCountSymbolName(RegKind);
2681   assert(SymbolName && "initializing invalid register kind");
2682   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2683   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2684 }
2685 
2686 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2687                                             unsigned DwordRegIndex,
2688                                             unsigned RegWidth) {
2689   // Symbols are only defined for GCN targets
2690   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2691     return true;
2692 
2693   auto SymbolName = getGprCountSymbolName(RegKind);
2694   if (!SymbolName)
2695     return true;
2696   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2697 
2698   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2699   int64_t OldCount;
2700 
2701   if (!Sym->isVariable())
2702     return !Error(getLoc(),
2703                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2704   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2705     return !Error(
2706         getLoc(),
2707         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2708 
2709   if (OldCount <= NewMax)
2710     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2711 
2712   return true;
2713 }
2714 
2715 std::unique_ptr<AMDGPUOperand>
2716 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2717   const auto &Tok = getToken();
2718   SMLoc StartLoc = Tok.getLoc();
2719   SMLoc EndLoc = Tok.getEndLoc();
2720   RegisterKind RegKind;
2721   unsigned Reg, RegNum, RegWidth;
2722 
2723   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2724     return nullptr;
2725   }
2726   if (isHsaAbiVersion3(&getSTI())) {
2727     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2728       return nullptr;
2729   } else
2730     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2731   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2732 }
2733 
2734 OperandMatchResultTy
2735 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2736   // TODO: add syntactic sugar for 1/(2*PI)
2737 
2738   assert(!isRegister());
2739   assert(!isModifier());
2740 
2741   const auto& Tok = getToken();
2742   const auto& NextTok = peekToken();
2743   bool IsReal = Tok.is(AsmToken::Real);
2744   SMLoc S = getLoc();
2745   bool Negate = false;
2746 
2747   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2748     lex();
2749     IsReal = true;
2750     Negate = true;
2751   }
2752 
2753   if (IsReal) {
2754     // Floating-point expressions are not supported.
2755     // Can only allow floating-point literals with an
2756     // optional sign.
2757 
2758     StringRef Num = getTokenStr();
2759     lex();
2760 
2761     APFloat RealVal(APFloat::IEEEdouble());
2762     auto roundMode = APFloat::rmNearestTiesToEven;
2763     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2764       return MatchOperand_ParseFail;
2765     }
2766     if (Negate)
2767       RealVal.changeSign();
2768 
2769     Operands.push_back(
2770       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2771                                AMDGPUOperand::ImmTyNone, true));
2772 
2773     return MatchOperand_Success;
2774 
2775   } else {
2776     int64_t IntVal;
2777     const MCExpr *Expr;
2778     SMLoc S = getLoc();
2779 
2780     if (HasSP3AbsModifier) {
2781       // This is a workaround for handling expressions
2782       // as arguments of SP3 'abs' modifier, for example:
2783       //     |1.0|
2784       //     |-1|
2785       //     |1+x|
2786       // This syntax is not compatible with syntax of standard
2787       // MC expressions (due to the trailing '|').
2788       SMLoc EndLoc;
2789       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2790         return MatchOperand_ParseFail;
2791     } else {
2792       if (Parser.parseExpression(Expr))
2793         return MatchOperand_ParseFail;
2794     }
2795 
2796     if (Expr->evaluateAsAbsolute(IntVal)) {
2797       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2798     } else {
2799       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2800     }
2801 
2802     return MatchOperand_Success;
2803   }
2804 
2805   return MatchOperand_NoMatch;
2806 }
2807 
2808 OperandMatchResultTy
2809 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2810   if (!isRegister())
2811     return MatchOperand_NoMatch;
2812 
2813   if (auto R = parseRegister()) {
2814     assert(R->isReg());
2815     Operands.push_back(std::move(R));
2816     return MatchOperand_Success;
2817   }
2818   return MatchOperand_ParseFail;
2819 }
2820 
2821 OperandMatchResultTy
2822 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2823   auto res = parseReg(Operands);
2824   if (res != MatchOperand_NoMatch) {
2825     return res;
2826   } else if (isModifier()) {
2827     return MatchOperand_NoMatch;
2828   } else {
2829     return parseImm(Operands, HasSP3AbsMod);
2830   }
2831 }
2832 
2833 bool
2834 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2835   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2836     const auto &str = Token.getString();
2837     return str == "abs" || str == "neg" || str == "sext";
2838   }
2839   return false;
2840 }
2841 
2842 bool
2843 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2844   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2845 }
2846 
2847 bool
2848 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2849   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2850 }
2851 
2852 bool
2853 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2854   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2855 }
2856 
2857 // Check if this is an operand modifier or an opcode modifier
2858 // which may look like an expression but it is not. We should
2859 // avoid parsing these modifiers as expressions. Currently
2860 // recognized sequences are:
2861 //   |...|
2862 //   abs(...)
2863 //   neg(...)
2864 //   sext(...)
2865 //   -reg
2866 //   -|...|
2867 //   -abs(...)
2868 //   name:...
2869 // Note that simple opcode modifiers like 'gds' may be parsed as
2870 // expressions; this is a special case. See getExpressionAsToken.
2871 //
2872 bool
2873 AMDGPUAsmParser::isModifier() {
2874 
2875   AsmToken Tok = getToken();
2876   AsmToken NextToken[2];
2877   peekTokens(NextToken);
2878 
2879   return isOperandModifier(Tok, NextToken[0]) ||
2880          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2881          isOpcodeModifierWithVal(Tok, NextToken[0]);
2882 }
2883 
2884 // Check if the current token is an SP3 'neg' modifier.
2885 // Currently this modifier is allowed in the following context:
2886 //
2887 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2888 // 2. Before an 'abs' modifier: -abs(...)
2889 // 3. Before an SP3 'abs' modifier: -|...|
2890 //
2891 // In all other cases "-" is handled as a part
2892 // of an expression that follows the sign.
2893 //
2894 // Note: When "-" is followed by an integer literal,
2895 // this is interpreted as integer negation rather
2896 // than a floating-point NEG modifier applied to N.
2897 // Beside being contr-intuitive, such use of floating-point
2898 // NEG modifier would have resulted in different meaning
2899 // of integer literals used with VOP1/2/C and VOP3,
2900 // for example:
2901 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2902 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2903 // Negative fp literals with preceding "-" are
2904 // handled likewise for unifomtity
2905 //
2906 bool
2907 AMDGPUAsmParser::parseSP3NegModifier() {
2908 
2909   AsmToken NextToken[2];
2910   peekTokens(NextToken);
2911 
2912   if (isToken(AsmToken::Minus) &&
2913       (isRegister(NextToken[0], NextToken[1]) ||
2914        NextToken[0].is(AsmToken::Pipe) ||
2915        isId(NextToken[0], "abs"))) {
2916     lex();
2917     return true;
2918   }
2919 
2920   return false;
2921 }
2922 
2923 OperandMatchResultTy
2924 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2925                                               bool AllowImm) {
2926   bool Neg, SP3Neg;
2927   bool Abs, SP3Abs;
2928   SMLoc Loc;
2929 
2930   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2931   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2932     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2933     return MatchOperand_ParseFail;
2934   }
2935 
2936   SP3Neg = parseSP3NegModifier();
2937 
2938   Loc = getLoc();
2939   Neg = trySkipId("neg");
2940   if (Neg && SP3Neg) {
2941     Error(Loc, "expected register or immediate");
2942     return MatchOperand_ParseFail;
2943   }
2944   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2945     return MatchOperand_ParseFail;
2946 
2947   Abs = trySkipId("abs");
2948   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2949     return MatchOperand_ParseFail;
2950 
2951   Loc = getLoc();
2952   SP3Abs = trySkipToken(AsmToken::Pipe);
2953   if (Abs && SP3Abs) {
2954     Error(Loc, "expected register or immediate");
2955     return MatchOperand_ParseFail;
2956   }
2957 
2958   OperandMatchResultTy Res;
2959   if (AllowImm) {
2960     Res = parseRegOrImm(Operands, SP3Abs);
2961   } else {
2962     Res = parseReg(Operands);
2963   }
2964   if (Res != MatchOperand_Success) {
2965     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2966   }
2967 
2968   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2969     return MatchOperand_ParseFail;
2970   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2971     return MatchOperand_ParseFail;
2972   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2973     return MatchOperand_ParseFail;
2974 
2975   AMDGPUOperand::Modifiers Mods;
2976   Mods.Abs = Abs || SP3Abs;
2977   Mods.Neg = Neg || SP3Neg;
2978 
2979   if (Mods.hasFPModifiers()) {
2980     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2981     if (Op.isExpr()) {
2982       Error(Op.getStartLoc(), "expected an absolute expression");
2983       return MatchOperand_ParseFail;
2984     }
2985     Op.setModifiers(Mods);
2986   }
2987   return MatchOperand_Success;
2988 }
2989 
2990 OperandMatchResultTy
2991 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2992                                                bool AllowImm) {
2993   bool Sext = trySkipId("sext");
2994   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2995     return MatchOperand_ParseFail;
2996 
2997   OperandMatchResultTy Res;
2998   if (AllowImm) {
2999     Res = parseRegOrImm(Operands);
3000   } else {
3001     Res = parseReg(Operands);
3002   }
3003   if (Res != MatchOperand_Success) {
3004     return Sext? MatchOperand_ParseFail : Res;
3005   }
3006 
3007   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3008     return MatchOperand_ParseFail;
3009 
3010   AMDGPUOperand::Modifiers Mods;
3011   Mods.Sext = Sext;
3012 
3013   if (Mods.hasIntModifiers()) {
3014     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3015     if (Op.isExpr()) {
3016       Error(Op.getStartLoc(), "expected an absolute expression");
3017       return MatchOperand_ParseFail;
3018     }
3019     Op.setModifiers(Mods);
3020   }
3021 
3022   return MatchOperand_Success;
3023 }
3024 
3025 OperandMatchResultTy
3026 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3027   return parseRegOrImmWithFPInputMods(Operands, false);
3028 }
3029 
3030 OperandMatchResultTy
3031 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3032   return parseRegOrImmWithIntInputMods(Operands, false);
3033 }
3034 
3035 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3036   auto Loc = getLoc();
3037   if (trySkipId("off")) {
3038     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3039                                                 AMDGPUOperand::ImmTyOff, false));
3040     return MatchOperand_Success;
3041   }
3042 
3043   if (!isRegister())
3044     return MatchOperand_NoMatch;
3045 
3046   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3047   if (Reg) {
3048     Operands.push_back(std::move(Reg));
3049     return MatchOperand_Success;
3050   }
3051 
3052   return MatchOperand_ParseFail;
3053 
3054 }
3055 
3056 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3057   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3058 
3059   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3060       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3061       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3062       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3063     return Match_InvalidOperand;
3064 
3065   if ((TSFlags & SIInstrFlags::VOP3) &&
3066       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3067       getForcedEncodingSize() != 64)
3068     return Match_PreferE32;
3069 
3070   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3071       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3072     // v_mac_f32/16 allow only dst_sel == DWORD;
3073     auto OpNum =
3074         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3075     const auto &Op = Inst.getOperand(OpNum);
3076     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3077       return Match_InvalidOperand;
3078     }
3079   }
3080 
3081   return Match_Success;
3082 }
3083 
3084 static ArrayRef<unsigned> getAllVariants() {
3085   static const unsigned Variants[] = {
3086     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3087     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3088   };
3089 
3090   return makeArrayRef(Variants);
3091 }
3092 
3093 // What asm variants we should check
3094 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3095   if (getForcedEncodingSize() == 32) {
3096     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3097     return makeArrayRef(Variants);
3098   }
3099 
3100   if (isForcedVOP3()) {
3101     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3102     return makeArrayRef(Variants);
3103   }
3104 
3105   if (isForcedSDWA()) {
3106     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3107                                         AMDGPUAsmVariants::SDWA9};
3108     return makeArrayRef(Variants);
3109   }
3110 
3111   if (isForcedDPP()) {
3112     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3113     return makeArrayRef(Variants);
3114   }
3115 
3116   return getAllVariants();
3117 }
3118 
3119 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3120   if (getForcedEncodingSize() == 32)
3121     return "e32";
3122 
3123   if (isForcedVOP3())
3124     return "e64";
3125 
3126   if (isForcedSDWA())
3127     return "sdwa";
3128 
3129   if (isForcedDPP())
3130     return "dpp";
3131 
3132   return "";
3133 }
3134 
3135 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3136   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3137   const unsigned Num = Desc.getNumImplicitUses();
3138   for (unsigned i = 0; i < Num; ++i) {
3139     unsigned Reg = Desc.ImplicitUses[i];
3140     switch (Reg) {
3141     case AMDGPU::FLAT_SCR:
3142     case AMDGPU::VCC:
3143     case AMDGPU::VCC_LO:
3144     case AMDGPU::VCC_HI:
3145     case AMDGPU::M0:
3146       return Reg;
3147     default:
3148       break;
3149     }
3150   }
3151   return AMDGPU::NoRegister;
3152 }
3153 
3154 // NB: This code is correct only when used to check constant
3155 // bus limitations because GFX7 support no f16 inline constants.
3156 // Note that there are no cases when a GFX7 opcode violates
3157 // constant bus limitations due to the use of an f16 constant.
3158 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3159                                        unsigned OpIdx) const {
3160   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3161 
3162   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3163     return false;
3164   }
3165 
3166   const MCOperand &MO = Inst.getOperand(OpIdx);
3167 
3168   int64_t Val = MO.getImm();
3169   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3170 
3171   switch (OpSize) { // expected operand size
3172   case 8:
3173     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3174   case 4:
3175     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3176   case 2: {
3177     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3178     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3179         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3180         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3181       return AMDGPU::isInlinableIntLiteral(Val);
3182 
3183     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3184         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3185         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3186       return AMDGPU::isInlinableIntLiteralV216(Val);
3187 
3188     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3189         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3190         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3191       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3192 
3193     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3194   }
3195   default:
3196     llvm_unreachable("invalid operand size");
3197   }
3198 }
3199 
3200 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3201   if (!isGFX10Plus())
3202     return 1;
3203 
3204   switch (Opcode) {
3205   // 64-bit shift instructions can use only one scalar value input
3206   case AMDGPU::V_LSHLREV_B64_e64:
3207   case AMDGPU::V_LSHLREV_B64_gfx10:
3208   case AMDGPU::V_LSHRREV_B64_e64:
3209   case AMDGPU::V_LSHRREV_B64_gfx10:
3210   case AMDGPU::V_ASHRREV_I64_e64:
3211   case AMDGPU::V_ASHRREV_I64_gfx10:
3212   case AMDGPU::V_LSHL_B64_e64:
3213   case AMDGPU::V_LSHR_B64_e64:
3214   case AMDGPU::V_ASHR_I64_e64:
3215     return 1;
3216   default:
3217     return 2;
3218   }
3219 }
3220 
3221 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3222   const MCOperand &MO = Inst.getOperand(OpIdx);
3223   if (MO.isImm()) {
3224     return !isInlineConstant(Inst, OpIdx);
3225   } else if (MO.isReg()) {
3226     auto Reg = MO.getReg();
3227     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3228     auto PReg = mc2PseudoReg(Reg);
3229     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3230   } else {
3231     return true;
3232   }
3233 }
3234 
3235 bool
3236 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3237                                                 const OperandVector &Operands) {
3238   const unsigned Opcode = Inst.getOpcode();
3239   const MCInstrDesc &Desc = MII.get(Opcode);
3240   unsigned LastSGPR = AMDGPU::NoRegister;
3241   unsigned ConstantBusUseCount = 0;
3242   unsigned NumLiterals = 0;
3243   unsigned LiteralSize;
3244 
3245   if (Desc.TSFlags &
3246       (SIInstrFlags::VOPC |
3247        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3248        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3249        SIInstrFlags::SDWA)) {
3250     // Check special imm operands (used by madmk, etc)
3251     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3252       ++ConstantBusUseCount;
3253     }
3254 
3255     SmallDenseSet<unsigned> SGPRsUsed;
3256     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3257     if (SGPRUsed != AMDGPU::NoRegister) {
3258       SGPRsUsed.insert(SGPRUsed);
3259       ++ConstantBusUseCount;
3260     }
3261 
3262     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3263     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3264     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3265 
3266     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3267 
3268     for (int OpIdx : OpIndices) {
3269       if (OpIdx == -1) break;
3270 
3271       const MCOperand &MO = Inst.getOperand(OpIdx);
3272       if (usesConstantBus(Inst, OpIdx)) {
3273         if (MO.isReg()) {
3274           LastSGPR = mc2PseudoReg(MO.getReg());
3275           // Pairs of registers with a partial intersections like these
3276           //   s0, s[0:1]
3277           //   flat_scratch_lo, flat_scratch
3278           //   flat_scratch_lo, flat_scratch_hi
3279           // are theoretically valid but they are disabled anyway.
3280           // Note that this code mimics SIInstrInfo::verifyInstruction
3281           if (!SGPRsUsed.count(LastSGPR)) {
3282             SGPRsUsed.insert(LastSGPR);
3283             ++ConstantBusUseCount;
3284           }
3285         } else { // Expression or a literal
3286 
3287           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3288             continue; // special operand like VINTERP attr_chan
3289 
3290           // An instruction may use only one literal.
3291           // This has been validated on the previous step.
3292           // See validateVOP3Literal.
3293           // This literal may be used as more than one operand.
3294           // If all these operands are of the same size,
3295           // this literal counts as one scalar value.
3296           // Otherwise it counts as 2 scalar values.
3297           // See "GFX10 Shader Programming", section 3.6.2.3.
3298 
3299           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3300           if (Size < 4) Size = 4;
3301 
3302           if (NumLiterals == 0) {
3303             NumLiterals = 1;
3304             LiteralSize = Size;
3305           } else if (LiteralSize != Size) {
3306             NumLiterals = 2;
3307           }
3308         }
3309       }
3310     }
3311   }
3312   ConstantBusUseCount += NumLiterals;
3313 
3314   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3315     return true;
3316 
3317   SMLoc LitLoc = getLitLoc(Operands);
3318   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3319   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3320   Error(Loc, "invalid operand (violates constant bus restrictions)");
3321   return false;
3322 }
3323 
3324 bool
3325 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3326                                                  const OperandVector &Operands) {
3327   const unsigned Opcode = Inst.getOpcode();
3328   const MCInstrDesc &Desc = MII.get(Opcode);
3329 
3330   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3331   if (DstIdx == -1 ||
3332       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3333     return true;
3334   }
3335 
3336   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3337 
3338   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3339   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3340   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3341 
3342   assert(DstIdx != -1);
3343   const MCOperand &Dst = Inst.getOperand(DstIdx);
3344   assert(Dst.isReg());
3345   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3346 
3347   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3348 
3349   for (int SrcIdx : SrcIndices) {
3350     if (SrcIdx == -1) break;
3351     const MCOperand &Src = Inst.getOperand(SrcIdx);
3352     if (Src.isReg()) {
3353       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3354       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3355         Error(getRegLoc(SrcReg, Operands),
3356           "destination must be different than all sources");
3357         return false;
3358       }
3359     }
3360   }
3361 
3362   return true;
3363 }
3364 
3365 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3366 
3367   const unsigned Opc = Inst.getOpcode();
3368   const MCInstrDesc &Desc = MII.get(Opc);
3369 
3370   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3371     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3372     assert(ClampIdx != -1);
3373     return Inst.getOperand(ClampIdx).getImm() == 0;
3374   }
3375 
3376   return true;
3377 }
3378 
3379 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3380 
3381   const unsigned Opc = Inst.getOpcode();
3382   const MCInstrDesc &Desc = MII.get(Opc);
3383 
3384   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3385     return true;
3386 
3387   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3388   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3389   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3390 
3391   assert(VDataIdx != -1);
3392 
3393   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3394     return true;
3395 
3396   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3397   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3398   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3399   if (DMask == 0)
3400     DMask = 1;
3401 
3402   unsigned DataSize =
3403     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3404   if (hasPackedD16()) {
3405     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3406     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3407       DataSize = (DataSize + 1) / 2;
3408   }
3409 
3410   return (VDataSize / 4) == DataSize + TFESize;
3411 }
3412 
3413 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3414   const unsigned Opc = Inst.getOpcode();
3415   const MCInstrDesc &Desc = MII.get(Opc);
3416 
3417   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3418     return true;
3419 
3420   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3421 
3422   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3423       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3424   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3425   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3426   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3427 
3428   assert(VAddr0Idx != -1);
3429   assert(SrsrcIdx != -1);
3430   assert(SrsrcIdx > VAddr0Idx);
3431 
3432   if (DimIdx == -1)
3433     return true; // intersect_ray
3434 
3435   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3436   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3437   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3438   unsigned VAddrSize =
3439       IsNSA ? SrsrcIdx - VAddr0Idx
3440             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3441 
3442   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3443                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3444                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3445                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3446   if (!IsNSA) {
3447     if (AddrSize > 8)
3448       AddrSize = 16;
3449     else if (AddrSize > 4)
3450       AddrSize = 8;
3451   }
3452 
3453   return VAddrSize == AddrSize;
3454 }
3455 
3456 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3457 
3458   const unsigned Opc = Inst.getOpcode();
3459   const MCInstrDesc &Desc = MII.get(Opc);
3460 
3461   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3462     return true;
3463   if (!Desc.mayLoad() || !Desc.mayStore())
3464     return true; // Not atomic
3465 
3466   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3467   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3468 
3469   // This is an incomplete check because image_atomic_cmpswap
3470   // may only use 0x3 and 0xf while other atomic operations
3471   // may use 0x1 and 0x3. However these limitations are
3472   // verified when we check that dmask matches dst size.
3473   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3474 }
3475 
3476 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3477 
3478   const unsigned Opc = Inst.getOpcode();
3479   const MCInstrDesc &Desc = MII.get(Opc);
3480 
3481   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3482     return true;
3483 
3484   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3485   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3486 
3487   // GATHER4 instructions use dmask in a different fashion compared to
3488   // other MIMG instructions. The only useful DMASK values are
3489   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3490   // (red,red,red,red) etc.) The ISA document doesn't mention
3491   // this.
3492   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3493 }
3494 
3495 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3496 {
3497   switch (Opcode) {
3498   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3499   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3500   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3501     return true;
3502   default:
3503     return false;
3504   }
3505 }
3506 
3507 // movrels* opcodes should only allow VGPRS as src0.
3508 // This is specified in .td description for vop1/vop3,
3509 // but sdwa is handled differently. See isSDWAOperand.
3510 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3511                                       const OperandVector &Operands) {
3512 
3513   const unsigned Opc = Inst.getOpcode();
3514   const MCInstrDesc &Desc = MII.get(Opc);
3515 
3516   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3517     return true;
3518 
3519   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3520   assert(Src0Idx != -1);
3521 
3522   SMLoc ErrLoc;
3523   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3524   if (Src0.isReg()) {
3525     auto Reg = mc2PseudoReg(Src0.getReg());
3526     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3527     if (!isSGPR(Reg, TRI))
3528       return true;
3529     ErrLoc = getRegLoc(Reg, Operands);
3530   } else {
3531     ErrLoc = getConstLoc(Operands);
3532   }
3533 
3534   Error(ErrLoc, "source operand must be a VGPR");
3535   return false;
3536 }
3537 
3538 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3539                                           const OperandVector &Operands) {
3540 
3541   const unsigned Opc = Inst.getOpcode();
3542 
3543   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3544     return true;
3545 
3546   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3547   assert(Src0Idx != -1);
3548 
3549   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3550   if (!Src0.isReg())
3551     return true;
3552 
3553   auto Reg = mc2PseudoReg(Src0.getReg());
3554   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3555   if (isSGPR(Reg, TRI)) {
3556     Error(getRegLoc(Reg, Operands),
3557           "source operand must be either a VGPR or an inline constant");
3558     return false;
3559   }
3560 
3561   return true;
3562 }
3563 
3564 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3565   switch (Inst.getOpcode()) {
3566   default:
3567     return true;
3568   case V_DIV_SCALE_F32_gfx6_gfx7:
3569   case V_DIV_SCALE_F32_vi:
3570   case V_DIV_SCALE_F32_gfx10:
3571   case V_DIV_SCALE_F64_gfx6_gfx7:
3572   case V_DIV_SCALE_F64_vi:
3573   case V_DIV_SCALE_F64_gfx10:
3574     break;
3575   }
3576 
3577   // TODO: Check that src0 = src1 or src2.
3578 
3579   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3580                     AMDGPU::OpName::src2_modifiers,
3581                     AMDGPU::OpName::src2_modifiers}) {
3582     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3583             .getImm() &
3584         SISrcMods::ABS) {
3585       return false;
3586     }
3587   }
3588 
3589   return true;
3590 }
3591 
3592 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3593 
3594   const unsigned Opc = Inst.getOpcode();
3595   const MCInstrDesc &Desc = MII.get(Opc);
3596 
3597   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3598     return true;
3599 
3600   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3601   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3602     if (isCI() || isSI())
3603       return false;
3604   }
3605 
3606   return true;
3607 }
3608 
3609 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3610   const unsigned Opc = Inst.getOpcode();
3611   const MCInstrDesc &Desc = MII.get(Opc);
3612 
3613   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3614     return true;
3615 
3616   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3617   if (DimIdx < 0)
3618     return true;
3619 
3620   long Imm = Inst.getOperand(DimIdx).getImm();
3621   if (Imm < 0 || Imm >= 8)
3622     return false;
3623 
3624   return true;
3625 }
3626 
3627 static bool IsRevOpcode(const unsigned Opcode)
3628 {
3629   switch (Opcode) {
3630   case AMDGPU::V_SUBREV_F32_e32:
3631   case AMDGPU::V_SUBREV_F32_e64:
3632   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3633   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3634   case AMDGPU::V_SUBREV_F32_e32_vi:
3635   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3636   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3637   case AMDGPU::V_SUBREV_F32_e64_vi:
3638 
3639   case AMDGPU::V_SUBREV_CO_U32_e32:
3640   case AMDGPU::V_SUBREV_CO_U32_e64:
3641   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3642   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3643 
3644   case AMDGPU::V_SUBBREV_U32_e32:
3645   case AMDGPU::V_SUBBREV_U32_e64:
3646   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3647   case AMDGPU::V_SUBBREV_U32_e32_vi:
3648   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3649   case AMDGPU::V_SUBBREV_U32_e64_vi:
3650 
3651   case AMDGPU::V_SUBREV_U32_e32:
3652   case AMDGPU::V_SUBREV_U32_e64:
3653   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3654   case AMDGPU::V_SUBREV_U32_e32_vi:
3655   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3656   case AMDGPU::V_SUBREV_U32_e64_vi:
3657 
3658   case AMDGPU::V_SUBREV_F16_e32:
3659   case AMDGPU::V_SUBREV_F16_e64:
3660   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3661   case AMDGPU::V_SUBREV_F16_e32_vi:
3662   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3663   case AMDGPU::V_SUBREV_F16_e64_vi:
3664 
3665   case AMDGPU::V_SUBREV_U16_e32:
3666   case AMDGPU::V_SUBREV_U16_e64:
3667   case AMDGPU::V_SUBREV_U16_e32_vi:
3668   case AMDGPU::V_SUBREV_U16_e64_vi:
3669 
3670   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3671   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3672   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3673 
3674   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3675   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3676 
3677   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3678   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3679 
3680   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3681   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3682 
3683   case AMDGPU::V_LSHRREV_B32_e32:
3684   case AMDGPU::V_LSHRREV_B32_e64:
3685   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3686   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3687   case AMDGPU::V_LSHRREV_B32_e32_vi:
3688   case AMDGPU::V_LSHRREV_B32_e64_vi:
3689   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3690   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3691 
3692   case AMDGPU::V_ASHRREV_I32_e32:
3693   case AMDGPU::V_ASHRREV_I32_e64:
3694   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3695   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3696   case AMDGPU::V_ASHRREV_I32_e32_vi:
3697   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3698   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3699   case AMDGPU::V_ASHRREV_I32_e64_vi:
3700 
3701   case AMDGPU::V_LSHLREV_B32_e32:
3702   case AMDGPU::V_LSHLREV_B32_e64:
3703   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3704   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3705   case AMDGPU::V_LSHLREV_B32_e32_vi:
3706   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3707   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3708   case AMDGPU::V_LSHLREV_B32_e64_vi:
3709 
3710   case AMDGPU::V_LSHLREV_B16_e32:
3711   case AMDGPU::V_LSHLREV_B16_e64:
3712   case AMDGPU::V_LSHLREV_B16_e32_vi:
3713   case AMDGPU::V_LSHLREV_B16_e64_vi:
3714   case AMDGPU::V_LSHLREV_B16_gfx10:
3715 
3716   case AMDGPU::V_LSHRREV_B16_e32:
3717   case AMDGPU::V_LSHRREV_B16_e64:
3718   case AMDGPU::V_LSHRREV_B16_e32_vi:
3719   case AMDGPU::V_LSHRREV_B16_e64_vi:
3720   case AMDGPU::V_LSHRREV_B16_gfx10:
3721 
3722   case AMDGPU::V_ASHRREV_I16_e32:
3723   case AMDGPU::V_ASHRREV_I16_e64:
3724   case AMDGPU::V_ASHRREV_I16_e32_vi:
3725   case AMDGPU::V_ASHRREV_I16_e64_vi:
3726   case AMDGPU::V_ASHRREV_I16_gfx10:
3727 
3728   case AMDGPU::V_LSHLREV_B64_e64:
3729   case AMDGPU::V_LSHLREV_B64_gfx10:
3730   case AMDGPU::V_LSHLREV_B64_vi:
3731 
3732   case AMDGPU::V_LSHRREV_B64_e64:
3733   case AMDGPU::V_LSHRREV_B64_gfx10:
3734   case AMDGPU::V_LSHRREV_B64_vi:
3735 
3736   case AMDGPU::V_ASHRREV_I64_e64:
3737   case AMDGPU::V_ASHRREV_I64_gfx10:
3738   case AMDGPU::V_ASHRREV_I64_vi:
3739 
3740   case AMDGPU::V_PK_LSHLREV_B16:
3741   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3742   case AMDGPU::V_PK_LSHLREV_B16_vi:
3743 
3744   case AMDGPU::V_PK_LSHRREV_B16:
3745   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3746   case AMDGPU::V_PK_LSHRREV_B16_vi:
3747   case AMDGPU::V_PK_ASHRREV_I16:
3748   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3749   case AMDGPU::V_PK_ASHRREV_I16_vi:
3750     return true;
3751   default:
3752     return false;
3753   }
3754 }
3755 
3756 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3757 
3758   using namespace SIInstrFlags;
3759   const unsigned Opcode = Inst.getOpcode();
3760   const MCInstrDesc &Desc = MII.get(Opcode);
3761 
3762   // lds_direct register is defined so that it can be used
3763   // with 9-bit operands only. Ignore encodings which do not accept these.
3764   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3765     return true;
3766 
3767   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3768   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3769   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3770 
3771   const int SrcIndices[] = { Src1Idx, Src2Idx };
3772 
3773   // lds_direct cannot be specified as either src1 or src2.
3774   for (int SrcIdx : SrcIndices) {
3775     if (SrcIdx == -1) break;
3776     const MCOperand &Src = Inst.getOperand(SrcIdx);
3777     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3778       return false;
3779     }
3780   }
3781 
3782   if (Src0Idx == -1)
3783     return true;
3784 
3785   const MCOperand &Src = Inst.getOperand(Src0Idx);
3786   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3787     return true;
3788 
3789   // lds_direct is specified as src0. Check additional limitations.
3790   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3791 }
3792 
3793 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3794   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3795     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3796     if (Op.isFlatOffset())
3797       return Op.getStartLoc();
3798   }
3799   return getLoc();
3800 }
3801 
3802 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3803                                          const OperandVector &Operands) {
3804   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3805   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3806     return true;
3807 
3808   auto Opcode = Inst.getOpcode();
3809   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3810   assert(OpNum != -1);
3811 
3812   const auto &Op = Inst.getOperand(OpNum);
3813   if (!hasFlatOffsets() && Op.getImm() != 0) {
3814     Error(getFlatOffsetLoc(Operands),
3815           "flat offset modifier is not supported on this GPU");
3816     return false;
3817   }
3818 
3819   // For FLAT segment the offset must be positive;
3820   // MSB is ignored and forced to zero.
3821   if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) {
3822     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3823     if (!isIntN(OffsetSize, Op.getImm())) {
3824       Error(getFlatOffsetLoc(Operands),
3825             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3826       return false;
3827     }
3828   } else {
3829     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3830     if (!isUIntN(OffsetSize, Op.getImm())) {
3831       Error(getFlatOffsetLoc(Operands),
3832             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3833       return false;
3834     }
3835   }
3836 
3837   return true;
3838 }
3839 
3840 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3841   // Start with second operand because SMEM Offset cannot be dst or src0.
3842   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3843     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3844     if (Op.isSMEMOffset())
3845       return Op.getStartLoc();
3846   }
3847   return getLoc();
3848 }
3849 
3850 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3851                                          const OperandVector &Operands) {
3852   if (isCI() || isSI())
3853     return true;
3854 
3855   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3856   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3857     return true;
3858 
3859   auto Opcode = Inst.getOpcode();
3860   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3861   if (OpNum == -1)
3862     return true;
3863 
3864   const auto &Op = Inst.getOperand(OpNum);
3865   if (!Op.isImm())
3866     return true;
3867 
3868   uint64_t Offset = Op.getImm();
3869   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3870   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3871       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3872     return true;
3873 
3874   Error(getSMEMOffsetLoc(Operands),
3875         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3876                                "expected a 21-bit signed offset");
3877 
3878   return false;
3879 }
3880 
3881 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3882   unsigned Opcode = Inst.getOpcode();
3883   const MCInstrDesc &Desc = MII.get(Opcode);
3884   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3885     return true;
3886 
3887   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3888   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3889 
3890   const int OpIndices[] = { Src0Idx, Src1Idx };
3891 
3892   unsigned NumExprs = 0;
3893   unsigned NumLiterals = 0;
3894   uint32_t LiteralValue;
3895 
3896   for (int OpIdx : OpIndices) {
3897     if (OpIdx == -1) break;
3898 
3899     const MCOperand &MO = Inst.getOperand(OpIdx);
3900     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3901     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3902       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3903         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3904         if (NumLiterals == 0 || LiteralValue != Value) {
3905           LiteralValue = Value;
3906           ++NumLiterals;
3907         }
3908       } else if (MO.isExpr()) {
3909         ++NumExprs;
3910       }
3911     }
3912   }
3913 
3914   return NumLiterals + NumExprs <= 1;
3915 }
3916 
3917 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3918   const unsigned Opc = Inst.getOpcode();
3919   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3920       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3921     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3922     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3923 
3924     if (OpSel & ~3)
3925       return false;
3926   }
3927   return true;
3928 }
3929 
3930 // Check if VCC register matches wavefront size
3931 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3932   auto FB = getFeatureBits();
3933   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3934     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3935 }
3936 
3937 // VOP3 literal is only allowed in GFX10+ and only one can be used
3938 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3939                                           const OperandVector &Operands) {
3940   unsigned Opcode = Inst.getOpcode();
3941   const MCInstrDesc &Desc = MII.get(Opcode);
3942   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3943     return true;
3944 
3945   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3946   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3947   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3948 
3949   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3950 
3951   unsigned NumExprs = 0;
3952   unsigned NumLiterals = 0;
3953   uint32_t LiteralValue;
3954 
3955   for (int OpIdx : OpIndices) {
3956     if (OpIdx == -1) break;
3957 
3958     const MCOperand &MO = Inst.getOperand(OpIdx);
3959     if (!MO.isImm() && !MO.isExpr())
3960       continue;
3961     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3962       continue;
3963 
3964     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3965         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
3966       Error(getConstLoc(Operands),
3967             "inline constants are not allowed for this operand");
3968       return false;
3969     }
3970 
3971     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3972       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3973       if (NumLiterals == 0 || LiteralValue != Value) {
3974         LiteralValue = Value;
3975         ++NumLiterals;
3976       }
3977     } else if (MO.isExpr()) {
3978       ++NumExprs;
3979     }
3980   }
3981   NumLiterals += NumExprs;
3982 
3983   if (!NumLiterals)
3984     return true;
3985 
3986   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
3987     Error(getLitLoc(Operands), "literal operands are not supported");
3988     return false;
3989   }
3990 
3991   if (NumLiterals > 1) {
3992     Error(getLitLoc(Operands), "only one literal operand is allowed");
3993     return false;
3994   }
3995 
3996   return true;
3997 }
3998 
3999 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4000 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4001                          const MCRegisterInfo *MRI) {
4002   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4003   if (OpIdx < 0)
4004     return -1;
4005 
4006   const MCOperand &Op = Inst.getOperand(OpIdx);
4007   if (!Op.isReg())
4008     return -1;
4009 
4010   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4011   auto Reg = Sub ? Sub : Op.getReg();
4012   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4013   return AGRP32.contains(Reg) ? 1 : 0;
4014 }
4015 
4016 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4017   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4018   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4019                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4020                   SIInstrFlags::DS)) == 0)
4021     return true;
4022 
4023   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4024                                                       : AMDGPU::OpName::vdata;
4025 
4026   const MCRegisterInfo *MRI = getMRI();
4027   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4028   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4029 
4030   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4031     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4032     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4033       return false;
4034   }
4035 
4036   auto FB = getFeatureBits();
4037   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4038     if (DataAreg < 0 || DstAreg < 0)
4039       return true;
4040     return DstAreg == DataAreg;
4041   }
4042 
4043   return DstAreg < 1 && DataAreg < 1;
4044 }
4045 
4046 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4047   auto FB = getFeatureBits();
4048   if (!FB[AMDGPU::FeatureGFX90AInsts])
4049     return true;
4050 
4051   const MCRegisterInfo *MRI = getMRI();
4052   const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4053   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4054   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4055     const MCOperand &Op = Inst.getOperand(I);
4056     if (!Op.isReg())
4057       continue;
4058 
4059     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4060     if (!Sub)
4061       continue;
4062 
4063     if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4064       return false;
4065     if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4066       return false;
4067   }
4068 
4069   return true;
4070 }
4071 
4072 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4073                                             const OperandVector &Operands,
4074                                             const SMLoc &IDLoc) {
4075   int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4076                                           AMDGPU::OpName::glc1);
4077   if (GLCPos != -1) {
4078     // -1 is set by GLC_1 default operand. In all cases "glc" must be present
4079     // in the asm string, and the default value means it is not present.
4080     if (Inst.getOperand(GLCPos).getImm() == -1) {
4081       Error(IDLoc, "instruction must use glc");
4082       return false;
4083     }
4084   }
4085 
4086   return true;
4087 }
4088 
4089 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4090                                           const SMLoc &IDLoc,
4091                                           const OperandVector &Operands) {
4092   if (!validateLdsDirect(Inst)) {
4093     Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands),
4094       "invalid use of lds_direct");
4095     return false;
4096   }
4097   if (!validateSOPLiteral(Inst)) {
4098     Error(getLitLoc(Operands),
4099       "only one literal operand is allowed");
4100     return false;
4101   }
4102   if (!validateVOP3Literal(Inst, Operands)) {
4103     return false;
4104   }
4105   if (!validateConstantBusLimitations(Inst, Operands)) {
4106     return false;
4107   }
4108   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4109     return false;
4110   }
4111   if (!validateIntClampSupported(Inst)) {
4112     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4113       "integer clamping is not supported on this GPU");
4114     return false;
4115   }
4116   if (!validateOpSel(Inst)) {
4117     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4118       "invalid op_sel operand");
4119     return false;
4120   }
4121   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4122   if (!validateMIMGD16(Inst)) {
4123     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4124       "d16 modifier is not supported on this GPU");
4125     return false;
4126   }
4127   if (!validateMIMGDim(Inst)) {
4128     Error(IDLoc, "dim modifier is required on this GPU");
4129     return false;
4130   }
4131   if (!validateMIMGDataSize(Inst)) {
4132     Error(IDLoc,
4133       "image data size does not match dmask and tfe");
4134     return false;
4135   }
4136   if (!validateMIMGAddrSize(Inst)) {
4137     Error(IDLoc,
4138       "image address size does not match dim and a16");
4139     return false;
4140   }
4141   if (!validateMIMGAtomicDMask(Inst)) {
4142     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4143       "invalid atomic image dmask");
4144     return false;
4145   }
4146   if (!validateMIMGGatherDMask(Inst)) {
4147     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4148       "invalid image_gather dmask: only one bit must be set");
4149     return false;
4150   }
4151   if (!validateMovrels(Inst, Operands)) {
4152     return false;
4153   }
4154   if (!validateFlatOffset(Inst, Operands)) {
4155     return false;
4156   }
4157   if (!validateSMEMOffset(Inst, Operands)) {
4158     return false;
4159   }
4160   if (!validateMAIAccWrite(Inst, Operands)) {
4161     return false;
4162   }
4163   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4164     return false;
4165   }
4166 
4167   if (!validateAGPRLdSt(Inst)) {
4168     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4169     ? "invalid register class: data and dst should be all VGPR or AGPR"
4170     : "invalid register class: agpr loads and stores not supported on this GPU"
4171     );
4172     return false;
4173   }
4174   if (!validateVGPRAlign(Inst)) {
4175     Error(IDLoc,
4176       "invalid register class: vgpr tuples must be 64 bit aligned");
4177     return false;
4178   }
4179 
4180   if (!validateDivScale(Inst)) {
4181     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4182     return false;
4183   }
4184   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4185     return false;
4186   }
4187 
4188   return true;
4189 }
4190 
4191 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4192                                             const FeatureBitset &FBS,
4193                                             unsigned VariantID = 0);
4194 
4195 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4196                                 const FeatureBitset &AvailableFeatures,
4197                                 unsigned VariantID);
4198 
4199 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4200                                        const FeatureBitset &FBS) {
4201   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4202 }
4203 
4204 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4205                                        const FeatureBitset &FBS,
4206                                        ArrayRef<unsigned> Variants) {
4207   for (auto Variant : Variants) {
4208     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4209       return true;
4210   }
4211 
4212   return false;
4213 }
4214 
4215 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4216                                                   const SMLoc &IDLoc) {
4217   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4218 
4219   // Check if requested instruction variant is supported.
4220   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4221     return false;
4222 
4223   // This instruction is not supported.
4224   // Clear any other pending errors because they are no longer relevant.
4225   getParser().clearPendingErrors();
4226 
4227   // Requested instruction variant is not supported.
4228   // Check if any other variants are supported.
4229   StringRef VariantName = getMatchedVariantName();
4230   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4231     return Error(IDLoc,
4232                  Twine(VariantName,
4233                        " variant of this instruction is not supported"));
4234   }
4235 
4236   // Finally check if this instruction is supported on any other GPU.
4237   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4238     return Error(IDLoc, "instruction not supported on this GPU");
4239   }
4240 
4241   // Instruction not supported on any GPU. Probably a typo.
4242   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4243   return Error(IDLoc, "invalid instruction" + Suggestion);
4244 }
4245 
4246 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4247                                               OperandVector &Operands,
4248                                               MCStreamer &Out,
4249                                               uint64_t &ErrorInfo,
4250                                               bool MatchingInlineAsm) {
4251   MCInst Inst;
4252   unsigned Result = Match_Success;
4253   for (auto Variant : getMatchedVariants()) {
4254     uint64_t EI;
4255     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4256                                   Variant);
4257     // We order match statuses from least to most specific. We use most specific
4258     // status as resulting
4259     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4260     if ((R == Match_Success) ||
4261         (R == Match_PreferE32) ||
4262         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4263         (R == Match_InvalidOperand && Result != Match_MissingFeature
4264                                    && Result != Match_PreferE32) ||
4265         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4266                                    && Result != Match_MissingFeature
4267                                    && Result != Match_PreferE32)) {
4268       Result = R;
4269       ErrorInfo = EI;
4270     }
4271     if (R == Match_Success)
4272       break;
4273   }
4274 
4275   if (Result == Match_Success) {
4276     if (!validateInstruction(Inst, IDLoc, Operands)) {
4277       return true;
4278     }
4279     Inst.setLoc(IDLoc);
4280     Out.emitInstruction(Inst, getSTI());
4281     return false;
4282   }
4283 
4284   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4285   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4286     return true;
4287   }
4288 
4289   switch (Result) {
4290   default: break;
4291   case Match_MissingFeature:
4292     // It has been verified that the specified instruction
4293     // mnemonic is valid. A match was found but it requires
4294     // features which are not supported on this GPU.
4295     return Error(IDLoc, "operands are not valid for this GPU or mode");
4296 
4297   case Match_InvalidOperand: {
4298     SMLoc ErrorLoc = IDLoc;
4299     if (ErrorInfo != ~0ULL) {
4300       if (ErrorInfo >= Operands.size()) {
4301         return Error(IDLoc, "too few operands for instruction");
4302       }
4303       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4304       if (ErrorLoc == SMLoc())
4305         ErrorLoc = IDLoc;
4306     }
4307     return Error(ErrorLoc, "invalid operand for instruction");
4308   }
4309 
4310   case Match_PreferE32:
4311     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4312                         "should be encoded as e32");
4313   case Match_MnemonicFail:
4314     llvm_unreachable("Invalid instructions should have been handled already");
4315   }
4316   llvm_unreachable("Implement any new match types added!");
4317 }
4318 
4319 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4320   int64_t Tmp = -1;
4321   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4322     return true;
4323   }
4324   if (getParser().parseAbsoluteExpression(Tmp)) {
4325     return true;
4326   }
4327   Ret = static_cast<uint32_t>(Tmp);
4328   return false;
4329 }
4330 
4331 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4332                                                uint32_t &Minor) {
4333   if (ParseAsAbsoluteExpression(Major))
4334     return TokError("invalid major version");
4335 
4336   if (!trySkipToken(AsmToken::Comma))
4337     return TokError("minor version number required, comma expected");
4338 
4339   if (ParseAsAbsoluteExpression(Minor))
4340     return TokError("invalid minor version");
4341 
4342   return false;
4343 }
4344 
4345 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4346   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4347     return TokError("directive only supported for amdgcn architecture");
4348 
4349   std::string Target;
4350 
4351   SMLoc TargetStart = getLoc();
4352   if (getParser().parseEscapedString(Target))
4353     return true;
4354   SMRange TargetRange = SMRange(TargetStart, getLoc());
4355 
4356   std::string ExpectedTarget;
4357   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
4358   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
4359 
4360   if (Target != ExpectedTargetOS.str())
4361     return Error(TargetRange.Start, "target must match options", TargetRange);
4362 
4363   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
4364   return false;
4365 }
4366 
4367 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4368   return Error(Range.Start, "value out of range", Range);
4369 }
4370 
4371 bool AMDGPUAsmParser::calculateGPRBlocks(
4372     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4373     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4374     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4375     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4376   // TODO(scott.linder): These calculations are duplicated from
4377   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4378   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4379 
4380   unsigned NumVGPRs = NextFreeVGPR;
4381   unsigned NumSGPRs = NextFreeSGPR;
4382 
4383   if (Version.Major >= 10)
4384     NumSGPRs = 0;
4385   else {
4386     unsigned MaxAddressableNumSGPRs =
4387         IsaInfo::getAddressableNumSGPRs(&getSTI());
4388 
4389     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4390         NumSGPRs > MaxAddressableNumSGPRs)
4391       return OutOfRangeError(SGPRRange);
4392 
4393     NumSGPRs +=
4394         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4395 
4396     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4397         NumSGPRs > MaxAddressableNumSGPRs)
4398       return OutOfRangeError(SGPRRange);
4399 
4400     if (Features.test(FeatureSGPRInitBug))
4401       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4402   }
4403 
4404   VGPRBlocks =
4405       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4406   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4407 
4408   return false;
4409 }
4410 
4411 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4412   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4413     return TokError("directive only supported for amdgcn architecture");
4414 
4415   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4416     return TokError("directive only supported for amdhsa OS");
4417 
4418   StringRef KernelName;
4419   if (getParser().parseIdentifier(KernelName))
4420     return true;
4421 
4422   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4423 
4424   StringSet<> Seen;
4425 
4426   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4427 
4428   SMRange VGPRRange;
4429   uint64_t NextFreeVGPR = 0;
4430   uint64_t AccumOffset = 0;
4431   SMRange SGPRRange;
4432   uint64_t NextFreeSGPR = 0;
4433   unsigned UserSGPRCount = 0;
4434   bool ReserveVCC = true;
4435   bool ReserveFlatScr = true;
4436   bool ReserveXNACK = hasXNACK();
4437   Optional<bool> EnableWavefrontSize32;
4438 
4439   while (true) {
4440     while (trySkipToken(AsmToken::EndOfStatement));
4441 
4442     StringRef ID;
4443     SMRange IDRange = getTok().getLocRange();
4444     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4445       return true;
4446 
4447     if (ID == ".end_amdhsa_kernel")
4448       break;
4449 
4450     if (Seen.find(ID) != Seen.end())
4451       return TokError(".amdhsa_ directives cannot be repeated");
4452     Seen.insert(ID);
4453 
4454     SMLoc ValStart = getLoc();
4455     int64_t IVal;
4456     if (getParser().parseAbsoluteExpression(IVal))
4457       return true;
4458     SMLoc ValEnd = getLoc();
4459     SMRange ValRange = SMRange(ValStart, ValEnd);
4460 
4461     if (IVal < 0)
4462       return OutOfRangeError(ValRange);
4463 
4464     uint64_t Val = IVal;
4465 
4466 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4467   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4468     return OutOfRangeError(RANGE);                                             \
4469   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4470 
4471     if (ID == ".amdhsa_group_segment_fixed_size") {
4472       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4473         return OutOfRangeError(ValRange);
4474       KD.group_segment_fixed_size = Val;
4475     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4476       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4477         return OutOfRangeError(ValRange);
4478       KD.private_segment_fixed_size = Val;
4479     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4480       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4481                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4482                        Val, ValRange);
4483       if (Val)
4484         UserSGPRCount += 4;
4485     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4486       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4487                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4488                        ValRange);
4489       if (Val)
4490         UserSGPRCount += 2;
4491     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4492       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4493                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4494                        ValRange);
4495       if (Val)
4496         UserSGPRCount += 2;
4497     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4498       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4499                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4500                        Val, ValRange);
4501       if (Val)
4502         UserSGPRCount += 2;
4503     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4504       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4505                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4506                        ValRange);
4507       if (Val)
4508         UserSGPRCount += 2;
4509     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4510       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4511                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4512                        ValRange);
4513       if (Val)
4514         UserSGPRCount += 2;
4515     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4516       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4517                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4518                        Val, ValRange);
4519       if (Val)
4520         UserSGPRCount += 1;
4521     } else if (ID == ".amdhsa_wavefront_size32") {
4522       if (IVersion.Major < 10)
4523         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4524       EnableWavefrontSize32 = Val;
4525       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4526                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4527                        Val, ValRange);
4528     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4529       PARSE_BITS_ENTRY(
4530           KD.compute_pgm_rsrc2,
4531           COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,
4532           ValRange);
4533     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4534       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4535                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4536                        ValRange);
4537     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4538       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4539                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4540                        ValRange);
4541     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4542       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4543                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4544                        ValRange);
4545     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4546       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4547                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4548                        ValRange);
4549     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4550       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4551                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4552                        ValRange);
4553     } else if (ID == ".amdhsa_next_free_vgpr") {
4554       VGPRRange = ValRange;
4555       NextFreeVGPR = Val;
4556     } else if (ID == ".amdhsa_next_free_sgpr") {
4557       SGPRRange = ValRange;
4558       NextFreeSGPR = Val;
4559     } else if (ID == ".amdhsa_accum_offset") {
4560       if (!isGFX90A())
4561         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4562       AccumOffset = Val;
4563     } else if (ID == ".amdhsa_reserve_vcc") {
4564       if (!isUInt<1>(Val))
4565         return OutOfRangeError(ValRange);
4566       ReserveVCC = Val;
4567     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4568       if (IVersion.Major < 7)
4569         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4570       if (!isUInt<1>(Val))
4571         return OutOfRangeError(ValRange);
4572       ReserveFlatScr = Val;
4573     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4574       if (IVersion.Major < 8)
4575         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4576       if (!isUInt<1>(Val))
4577         return OutOfRangeError(ValRange);
4578       ReserveXNACK = Val;
4579     } else if (ID == ".amdhsa_float_round_mode_32") {
4580       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4581                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4582     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4583       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4584                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4585     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4586       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4587                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4588     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4589       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4590                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4591                        ValRange);
4592     } else if (ID == ".amdhsa_dx10_clamp") {
4593       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4594                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4595     } else if (ID == ".amdhsa_ieee_mode") {
4596       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4597                        Val, ValRange);
4598     } else if (ID == ".amdhsa_fp16_overflow") {
4599       if (IVersion.Major < 9)
4600         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4601       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4602                        ValRange);
4603     } else if (ID == ".amdhsa_tg_split") {
4604       if (!isGFX90A())
4605         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4606       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4607                        ValRange);
4608     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4609       if (IVersion.Major < 10)
4610         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4611       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4612                        ValRange);
4613     } else if (ID == ".amdhsa_memory_ordered") {
4614       if (IVersion.Major < 10)
4615         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4616       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4617                        ValRange);
4618     } else if (ID == ".amdhsa_forward_progress") {
4619       if (IVersion.Major < 10)
4620         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4621       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4622                        ValRange);
4623     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4624       PARSE_BITS_ENTRY(
4625           KD.compute_pgm_rsrc2,
4626           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4627           ValRange);
4628     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4629       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4630                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4631                        Val, ValRange);
4632     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4633       PARSE_BITS_ENTRY(
4634           KD.compute_pgm_rsrc2,
4635           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4636           ValRange);
4637     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4638       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4639                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4640                        Val, ValRange);
4641     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4642       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4643                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4644                        Val, ValRange);
4645     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4646       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4647                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4648                        Val, ValRange);
4649     } else if (ID == ".amdhsa_exception_int_div_zero") {
4650       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4651                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4652                        Val, ValRange);
4653     } else {
4654       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4655     }
4656 
4657 #undef PARSE_BITS_ENTRY
4658   }
4659 
4660   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4661     return TokError(".amdhsa_next_free_vgpr directive is required");
4662 
4663   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4664     return TokError(".amdhsa_next_free_sgpr directive is required");
4665 
4666   unsigned VGPRBlocks;
4667   unsigned SGPRBlocks;
4668   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4669                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4670                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4671                          SGPRBlocks))
4672     return true;
4673 
4674   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4675           VGPRBlocks))
4676     return OutOfRangeError(VGPRRange);
4677   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4678                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4679 
4680   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4681           SGPRBlocks))
4682     return OutOfRangeError(SGPRRange);
4683   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4684                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4685                   SGPRBlocks);
4686 
4687   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4688     return TokError("too many user SGPRs enabled");
4689   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4690                   UserSGPRCount);
4691 
4692   if (isGFX90A()) {
4693     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4694       return TokError(".amdhsa_accum_offset directive is required");
4695     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4696       return TokError("accum_offset should be in range [4..256] in "
4697                       "increments of 4");
4698     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4699       return TokError("accum_offset exceeds total VGPR allocation");
4700     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4701                     (AccumOffset / 4 - 1));
4702   }
4703 
4704   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4705       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4706       ReserveFlatScr, ReserveXNACK);
4707   return false;
4708 }
4709 
4710 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4711   uint32_t Major;
4712   uint32_t Minor;
4713 
4714   if (ParseDirectiveMajorMinor(Major, Minor))
4715     return true;
4716 
4717   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4718   return false;
4719 }
4720 
4721 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4722   uint32_t Major;
4723   uint32_t Minor;
4724   uint32_t Stepping;
4725   StringRef VendorName;
4726   StringRef ArchName;
4727 
4728   // If this directive has no arguments, then use the ISA version for the
4729   // targeted GPU.
4730   if (isToken(AsmToken::EndOfStatement)) {
4731     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4732     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4733                                                       ISA.Stepping,
4734                                                       "AMD", "AMDGPU");
4735     return false;
4736   }
4737 
4738   if (ParseDirectiveMajorMinor(Major, Minor))
4739     return true;
4740 
4741   if (!trySkipToken(AsmToken::Comma))
4742     return TokError("stepping version number required, comma expected");
4743 
4744   if (ParseAsAbsoluteExpression(Stepping))
4745     return TokError("invalid stepping version");
4746 
4747   if (!trySkipToken(AsmToken::Comma))
4748     return TokError("vendor name required, comma expected");
4749 
4750   if (!parseString(VendorName, "invalid vendor name"))
4751     return true;
4752 
4753   if (!trySkipToken(AsmToken::Comma))
4754     return TokError("arch name required, comma expected");
4755 
4756   if (!parseString(ArchName, "invalid arch name"))
4757     return true;
4758 
4759   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4760                                                     VendorName, ArchName);
4761   return false;
4762 }
4763 
4764 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4765                                                amd_kernel_code_t &Header) {
4766   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4767   // assembly for backwards compatibility.
4768   if (ID == "max_scratch_backing_memory_byte_size") {
4769     Parser.eatToEndOfStatement();
4770     return false;
4771   }
4772 
4773   SmallString<40> ErrStr;
4774   raw_svector_ostream Err(ErrStr);
4775   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4776     return TokError(Err.str());
4777   }
4778   Lex();
4779 
4780   if (ID == "enable_wavefront_size32") {
4781     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4782       if (!isGFX10Plus())
4783         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4784       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4785         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4786     } else {
4787       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4788         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4789     }
4790   }
4791 
4792   if (ID == "wavefront_size") {
4793     if (Header.wavefront_size == 5) {
4794       if (!isGFX10Plus())
4795         return TokError("wavefront_size=5 is only allowed on GFX10+");
4796       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4797         return TokError("wavefront_size=5 requires +WavefrontSize32");
4798     } else if (Header.wavefront_size == 6) {
4799       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4800         return TokError("wavefront_size=6 requires +WavefrontSize64");
4801     }
4802   }
4803 
4804   if (ID == "enable_wgp_mode") {
4805     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4806         !isGFX10Plus())
4807       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4808   }
4809 
4810   if (ID == "enable_mem_ordered") {
4811     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4812         !isGFX10Plus())
4813       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4814   }
4815 
4816   if (ID == "enable_fwd_progress") {
4817     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4818         !isGFX10Plus())
4819       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4820   }
4821 
4822   return false;
4823 }
4824 
4825 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4826   amd_kernel_code_t Header;
4827   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4828 
4829   while (true) {
4830     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4831     // will set the current token to EndOfStatement.
4832     while(trySkipToken(AsmToken::EndOfStatement));
4833 
4834     StringRef ID;
4835     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4836       return true;
4837 
4838     if (ID == ".end_amd_kernel_code_t")
4839       break;
4840 
4841     if (ParseAMDKernelCodeTValue(ID, Header))
4842       return true;
4843   }
4844 
4845   getTargetStreamer().EmitAMDKernelCodeT(Header);
4846 
4847   return false;
4848 }
4849 
4850 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4851   StringRef KernelName;
4852   if (!parseId(KernelName, "expected symbol name"))
4853     return true;
4854 
4855   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4856                                            ELF::STT_AMDGPU_HSA_KERNEL);
4857 
4858   KernelScope.initialize(getContext());
4859   return false;
4860 }
4861 
4862 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4863   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4864     return Error(getLoc(),
4865                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4866                  "architectures");
4867   }
4868 
4869   auto ISAVersionStringFromASM = getToken().getStringContents();
4870 
4871   std::string ISAVersionStringFromSTI;
4872   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4873   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4874 
4875   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4876     return Error(getLoc(),
4877                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4878                  "arguments specified through the command line");
4879   }
4880 
4881   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4882   Lex();
4883 
4884   return false;
4885 }
4886 
4887 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4888   const char *AssemblerDirectiveBegin;
4889   const char *AssemblerDirectiveEnd;
4890   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4891       isHsaAbiVersion3(&getSTI())
4892           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4893                             HSAMD::V3::AssemblerDirectiveEnd)
4894           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4895                             HSAMD::AssemblerDirectiveEnd);
4896 
4897   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4898     return Error(getLoc(),
4899                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4900                  "not available on non-amdhsa OSes")).str());
4901   }
4902 
4903   std::string HSAMetadataString;
4904   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4905                           HSAMetadataString))
4906     return true;
4907 
4908   if (isHsaAbiVersion3(&getSTI())) {
4909     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4910       return Error(getLoc(), "invalid HSA metadata");
4911   } else {
4912     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4913       return Error(getLoc(), "invalid HSA metadata");
4914   }
4915 
4916   return false;
4917 }
4918 
4919 /// Common code to parse out a block of text (typically YAML) between start and
4920 /// end directives.
4921 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4922                                           const char *AssemblerDirectiveEnd,
4923                                           std::string &CollectString) {
4924 
4925   raw_string_ostream CollectStream(CollectString);
4926 
4927   getLexer().setSkipSpace(false);
4928 
4929   bool FoundEnd = false;
4930   while (!isToken(AsmToken::Eof)) {
4931     while (isToken(AsmToken::Space)) {
4932       CollectStream << getTokenStr();
4933       Lex();
4934     }
4935 
4936     if (trySkipId(AssemblerDirectiveEnd)) {
4937       FoundEnd = true;
4938       break;
4939     }
4940 
4941     CollectStream << Parser.parseStringToEndOfStatement()
4942                   << getContext().getAsmInfo()->getSeparatorString();
4943 
4944     Parser.eatToEndOfStatement();
4945   }
4946 
4947   getLexer().setSkipSpace(true);
4948 
4949   if (isToken(AsmToken::Eof) && !FoundEnd) {
4950     return TokError(Twine("expected directive ") +
4951                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4952   }
4953 
4954   CollectStream.flush();
4955   return false;
4956 }
4957 
4958 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4959 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4960   std::string String;
4961   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4962                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4963     return true;
4964 
4965   auto PALMetadata = getTargetStreamer().getPALMetadata();
4966   if (!PALMetadata->setFromString(String))
4967     return Error(getLoc(), "invalid PAL metadata");
4968   return false;
4969 }
4970 
4971 /// Parse the assembler directive for old linear-format PAL metadata.
4972 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4973   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4974     return Error(getLoc(),
4975                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4976                  "not available on non-amdpal OSes")).str());
4977   }
4978 
4979   auto PALMetadata = getTargetStreamer().getPALMetadata();
4980   PALMetadata->setLegacy();
4981   for (;;) {
4982     uint32_t Key, Value;
4983     if (ParseAsAbsoluteExpression(Key)) {
4984       return TokError(Twine("invalid value in ") +
4985                       Twine(PALMD::AssemblerDirective));
4986     }
4987     if (!trySkipToken(AsmToken::Comma)) {
4988       return TokError(Twine("expected an even number of values in ") +
4989                       Twine(PALMD::AssemblerDirective));
4990     }
4991     if (ParseAsAbsoluteExpression(Value)) {
4992       return TokError(Twine("invalid value in ") +
4993                       Twine(PALMD::AssemblerDirective));
4994     }
4995     PALMetadata->setRegister(Key, Value);
4996     if (!trySkipToken(AsmToken::Comma))
4997       break;
4998   }
4999   return false;
5000 }
5001 
5002 /// ParseDirectiveAMDGPULDS
5003 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5004 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5005   if (getParser().checkForValidSection())
5006     return true;
5007 
5008   StringRef Name;
5009   SMLoc NameLoc = getLoc();
5010   if (getParser().parseIdentifier(Name))
5011     return TokError("expected identifier in directive");
5012 
5013   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5014   if (parseToken(AsmToken::Comma, "expected ','"))
5015     return true;
5016 
5017   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5018 
5019   int64_t Size;
5020   SMLoc SizeLoc = getLoc();
5021   if (getParser().parseAbsoluteExpression(Size))
5022     return true;
5023   if (Size < 0)
5024     return Error(SizeLoc, "size must be non-negative");
5025   if (Size > LocalMemorySize)
5026     return Error(SizeLoc, "size is too large");
5027 
5028   int64_t Alignment = 4;
5029   if (trySkipToken(AsmToken::Comma)) {
5030     SMLoc AlignLoc = getLoc();
5031     if (getParser().parseAbsoluteExpression(Alignment))
5032       return true;
5033     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5034       return Error(AlignLoc, "alignment must be a power of two");
5035 
5036     // Alignment larger than the size of LDS is possible in theory, as long
5037     // as the linker manages to place to symbol at address 0, but we do want
5038     // to make sure the alignment fits nicely into a 32-bit integer.
5039     if (Alignment >= 1u << 31)
5040       return Error(AlignLoc, "alignment is too large");
5041   }
5042 
5043   if (parseToken(AsmToken::EndOfStatement,
5044                  "unexpected token in '.amdgpu_lds' directive"))
5045     return true;
5046 
5047   Symbol->redefineIfPossible();
5048   if (!Symbol->isUndefined())
5049     return Error(NameLoc, "invalid symbol redefinition");
5050 
5051   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5052   return false;
5053 }
5054 
5055 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5056   StringRef IDVal = DirectiveID.getString();
5057 
5058   if (isHsaAbiVersion3(&getSTI())) {
5059     if (IDVal == ".amdgcn_target")
5060       return ParseDirectiveAMDGCNTarget();
5061 
5062     if (IDVal == ".amdhsa_kernel")
5063       return ParseDirectiveAMDHSAKernel();
5064 
5065     // TODO: Restructure/combine with PAL metadata directive.
5066     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5067       return ParseDirectiveHSAMetadata();
5068   } else {
5069     if (IDVal == ".hsa_code_object_version")
5070       return ParseDirectiveHSACodeObjectVersion();
5071 
5072     if (IDVal == ".hsa_code_object_isa")
5073       return ParseDirectiveHSACodeObjectISA();
5074 
5075     if (IDVal == ".amd_kernel_code_t")
5076       return ParseDirectiveAMDKernelCodeT();
5077 
5078     if (IDVal == ".amdgpu_hsa_kernel")
5079       return ParseDirectiveAMDGPUHsaKernel();
5080 
5081     if (IDVal == ".amd_amdgpu_isa")
5082       return ParseDirectiveISAVersion();
5083 
5084     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5085       return ParseDirectiveHSAMetadata();
5086   }
5087 
5088   if (IDVal == ".amdgpu_lds")
5089     return ParseDirectiveAMDGPULDS();
5090 
5091   if (IDVal == PALMD::AssemblerDirectiveBegin)
5092     return ParseDirectivePALMetadataBegin();
5093 
5094   if (IDVal == PALMD::AssemblerDirective)
5095     return ParseDirectivePALMetadata();
5096 
5097   return true;
5098 }
5099 
5100 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5101                                            unsigned RegNo) const {
5102 
5103   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5104        R.isValid(); ++R) {
5105     if (*R == RegNo)
5106       return isGFX9Plus();
5107   }
5108 
5109   // GFX10 has 2 more SGPRs 104 and 105.
5110   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5111        R.isValid(); ++R) {
5112     if (*R == RegNo)
5113       return hasSGPR104_SGPR105();
5114   }
5115 
5116   switch (RegNo) {
5117   case AMDGPU::SRC_SHARED_BASE:
5118   case AMDGPU::SRC_SHARED_LIMIT:
5119   case AMDGPU::SRC_PRIVATE_BASE:
5120   case AMDGPU::SRC_PRIVATE_LIMIT:
5121   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5122     return isGFX9Plus();
5123   case AMDGPU::TBA:
5124   case AMDGPU::TBA_LO:
5125   case AMDGPU::TBA_HI:
5126   case AMDGPU::TMA:
5127   case AMDGPU::TMA_LO:
5128   case AMDGPU::TMA_HI:
5129     return !isGFX9Plus();
5130   case AMDGPU::XNACK_MASK:
5131   case AMDGPU::XNACK_MASK_LO:
5132   case AMDGPU::XNACK_MASK_HI:
5133     return (isVI() || isGFX9()) && hasXNACK();
5134   case AMDGPU::SGPR_NULL:
5135     return isGFX10Plus();
5136   default:
5137     break;
5138   }
5139 
5140   if (isCI())
5141     return true;
5142 
5143   if (isSI() || isGFX10Plus()) {
5144     // No flat_scr on SI.
5145     // On GFX10 flat scratch is not a valid register operand and can only be
5146     // accessed with s_setreg/s_getreg.
5147     switch (RegNo) {
5148     case AMDGPU::FLAT_SCR:
5149     case AMDGPU::FLAT_SCR_LO:
5150     case AMDGPU::FLAT_SCR_HI:
5151       return false;
5152     default:
5153       return true;
5154     }
5155   }
5156 
5157   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5158   // SI/CI have.
5159   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5160        R.isValid(); ++R) {
5161     if (*R == RegNo)
5162       return hasSGPR102_SGPR103();
5163   }
5164 
5165   return true;
5166 }
5167 
5168 OperandMatchResultTy
5169 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5170                               OperandMode Mode) {
5171   // Try to parse with a custom parser
5172   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5173 
5174   // If we successfully parsed the operand or if there as an error parsing,
5175   // we are done.
5176   //
5177   // If we are parsing after we reach EndOfStatement then this means we
5178   // are appending default values to the Operands list.  This is only done
5179   // by custom parser, so we shouldn't continue on to the generic parsing.
5180   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5181       isToken(AsmToken::EndOfStatement))
5182     return ResTy;
5183 
5184   SMLoc RBraceLoc;
5185   SMLoc LBraceLoc = getLoc();
5186   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5187     unsigned Prefix = Operands.size();
5188 
5189     for (;;) {
5190       auto Loc = getLoc();
5191       ResTy = parseReg(Operands);
5192       if (ResTy == MatchOperand_NoMatch)
5193         Error(Loc, "expected a register");
5194       if (ResTy != MatchOperand_Success)
5195         return MatchOperand_ParseFail;
5196 
5197       RBraceLoc = getLoc();
5198       if (trySkipToken(AsmToken::RBrac))
5199         break;
5200 
5201       if (!skipToken(AsmToken::Comma,
5202                      "expected a comma or a closing square bracket")) {
5203         return MatchOperand_ParseFail;
5204       }
5205     }
5206 
5207     if (Operands.size() - Prefix > 1) {
5208       Operands.insert(Operands.begin() + Prefix,
5209                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5210       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5211     }
5212 
5213     return MatchOperand_Success;
5214   }
5215 
5216   return parseRegOrImm(Operands);
5217 }
5218 
5219 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5220   // Clear any forced encodings from the previous instruction.
5221   setForcedEncodingSize(0);
5222   setForcedDPP(false);
5223   setForcedSDWA(false);
5224 
5225   if (Name.endswith("_e64")) {
5226     setForcedEncodingSize(64);
5227     return Name.substr(0, Name.size() - 4);
5228   } else if (Name.endswith("_e32")) {
5229     setForcedEncodingSize(32);
5230     return Name.substr(0, Name.size() - 4);
5231   } else if (Name.endswith("_dpp")) {
5232     setForcedDPP(true);
5233     return Name.substr(0, Name.size() - 4);
5234   } else if (Name.endswith("_sdwa")) {
5235     setForcedSDWA(true);
5236     return Name.substr(0, Name.size() - 5);
5237   }
5238   return Name;
5239 }
5240 
5241 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5242                                        StringRef Name,
5243                                        SMLoc NameLoc, OperandVector &Operands) {
5244   // Add the instruction mnemonic
5245   Name = parseMnemonicSuffix(Name);
5246   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5247 
5248   bool IsMIMG = Name.startswith("image_");
5249 
5250   while (!trySkipToken(AsmToken::EndOfStatement)) {
5251     OperandMode Mode = OperandMode_Default;
5252     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5253       Mode = OperandMode_NSA;
5254     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5255 
5256     if (Res != MatchOperand_Success) {
5257       checkUnsupportedInstruction(Name, NameLoc);
5258       if (!Parser.hasPendingError()) {
5259         // FIXME: use real operand location rather than the current location.
5260         StringRef Msg =
5261           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5262                                             "not a valid operand.";
5263         Error(getLoc(), Msg);
5264       }
5265       while (!trySkipToken(AsmToken::EndOfStatement)) {
5266         lex();
5267       }
5268       return true;
5269     }
5270 
5271     // Eat the comma or space if there is one.
5272     trySkipToken(AsmToken::Comma);
5273   }
5274 
5275   return false;
5276 }
5277 
5278 //===----------------------------------------------------------------------===//
5279 // Utility functions
5280 //===----------------------------------------------------------------------===//
5281 
5282 OperandMatchResultTy
5283 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5284 
5285   if (!trySkipId(Prefix, AsmToken::Colon))
5286     return MatchOperand_NoMatch;
5287 
5288   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5289 }
5290 
5291 OperandMatchResultTy
5292 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5293                                     AMDGPUOperand::ImmTy ImmTy,
5294                                     bool (*ConvertResult)(int64_t&)) {
5295   SMLoc S = getLoc();
5296   int64_t Value = 0;
5297 
5298   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5299   if (Res != MatchOperand_Success)
5300     return Res;
5301 
5302   if (ConvertResult && !ConvertResult(Value)) {
5303     Error(S, "invalid " + StringRef(Prefix) + " value.");
5304   }
5305 
5306   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5307   return MatchOperand_Success;
5308 }
5309 
5310 OperandMatchResultTy
5311 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5312                                              OperandVector &Operands,
5313                                              AMDGPUOperand::ImmTy ImmTy,
5314                                              bool (*ConvertResult)(int64_t&)) {
5315   SMLoc S = getLoc();
5316   if (!trySkipId(Prefix, AsmToken::Colon))
5317     return MatchOperand_NoMatch;
5318 
5319   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5320     return MatchOperand_ParseFail;
5321 
5322   unsigned Val = 0;
5323   const unsigned MaxSize = 4;
5324 
5325   // FIXME: How to verify the number of elements matches the number of src
5326   // operands?
5327   for (int I = 0; ; ++I) {
5328     int64_t Op;
5329     SMLoc Loc = getLoc();
5330     if (!parseExpr(Op))
5331       return MatchOperand_ParseFail;
5332 
5333     if (Op != 0 && Op != 1) {
5334       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5335       return MatchOperand_ParseFail;
5336     }
5337 
5338     Val |= (Op << I);
5339 
5340     if (trySkipToken(AsmToken::RBrac))
5341       break;
5342 
5343     if (I + 1 == MaxSize) {
5344       Error(getLoc(), "expected a closing square bracket");
5345       return MatchOperand_ParseFail;
5346     }
5347 
5348     if (!skipToken(AsmToken::Comma, "expected a comma"))
5349       return MatchOperand_ParseFail;
5350   }
5351 
5352   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5353   return MatchOperand_Success;
5354 }
5355 
5356 OperandMatchResultTy
5357 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5358                                AMDGPUOperand::ImmTy ImmTy) {
5359   int64_t Bit;
5360   SMLoc S = getLoc();
5361 
5362   if (trySkipId(Name)) {
5363     Bit = 1;
5364   } else if (trySkipId("no", Name)) {
5365     Bit = 0;
5366   } else {
5367     return MatchOperand_NoMatch;
5368   }
5369 
5370   if (Name == "r128" && !hasMIMG_R128()) {
5371     Error(S, "r128 modifier is not supported on this GPU");
5372     return MatchOperand_ParseFail;
5373   }
5374   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5375     Error(S, "a16 modifier is not supported on this GPU");
5376     return MatchOperand_ParseFail;
5377   }
5378   if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC) {
5379     Error(S, "dlc modifier is not supported on this GPU");
5380     return MatchOperand_ParseFail;
5381   }
5382   if (!isGFX90A() && ImmTy == AMDGPUOperand::ImmTySCCB)
5383     return MatchOperand_ParseFail;
5384 
5385   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5386     ImmTy = AMDGPUOperand::ImmTyR128A16;
5387 
5388   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5389   return MatchOperand_Success;
5390 }
5391 
5392 static void addOptionalImmOperand(
5393   MCInst& Inst, const OperandVector& Operands,
5394   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5395   AMDGPUOperand::ImmTy ImmT,
5396   int64_t Default = 0) {
5397   auto i = OptionalIdx.find(ImmT);
5398   if (i != OptionalIdx.end()) {
5399     unsigned Idx = i->second;
5400     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5401   } else {
5402     Inst.addOperand(MCOperand::createImm(Default));
5403   }
5404 }
5405 
5406 OperandMatchResultTy
5407 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5408                                        StringRef &Value,
5409                                        SMLoc &StringLoc) {
5410   if (!trySkipId(Prefix, AsmToken::Colon))
5411     return MatchOperand_NoMatch;
5412 
5413   StringLoc = getLoc();
5414   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5415                                                   : MatchOperand_ParseFail;
5416 }
5417 
5418 //===----------------------------------------------------------------------===//
5419 // MTBUF format
5420 //===----------------------------------------------------------------------===//
5421 
5422 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5423                                   int64_t MaxVal,
5424                                   int64_t &Fmt) {
5425   int64_t Val;
5426   SMLoc Loc = getLoc();
5427 
5428   auto Res = parseIntWithPrefix(Pref, Val);
5429   if (Res == MatchOperand_ParseFail)
5430     return false;
5431   if (Res == MatchOperand_NoMatch)
5432     return true;
5433 
5434   if (Val < 0 || Val > MaxVal) {
5435     Error(Loc, Twine("out of range ", StringRef(Pref)));
5436     return false;
5437   }
5438 
5439   Fmt = Val;
5440   return true;
5441 }
5442 
5443 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5444 // values to live in a joint format operand in the MCInst encoding.
5445 OperandMatchResultTy
5446 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5447   using namespace llvm::AMDGPU::MTBUFFormat;
5448 
5449   int64_t Dfmt = DFMT_UNDEF;
5450   int64_t Nfmt = NFMT_UNDEF;
5451 
5452   // dfmt and nfmt can appear in either order, and each is optional.
5453   for (int I = 0; I < 2; ++I) {
5454     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5455       return MatchOperand_ParseFail;
5456 
5457     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5458       return MatchOperand_ParseFail;
5459     }
5460     // Skip optional comma between dfmt/nfmt
5461     // but guard against 2 commas following each other.
5462     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5463         !peekToken().is(AsmToken::Comma)) {
5464       trySkipToken(AsmToken::Comma);
5465     }
5466   }
5467 
5468   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5469     return MatchOperand_NoMatch;
5470 
5471   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5472   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5473 
5474   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5475   return MatchOperand_Success;
5476 }
5477 
5478 OperandMatchResultTy
5479 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5480   using namespace llvm::AMDGPU::MTBUFFormat;
5481 
5482   int64_t Fmt = UFMT_UNDEF;
5483 
5484   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5485     return MatchOperand_ParseFail;
5486 
5487   if (Fmt == UFMT_UNDEF)
5488     return MatchOperand_NoMatch;
5489 
5490   Format = Fmt;
5491   return MatchOperand_Success;
5492 }
5493 
5494 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5495                                     int64_t &Nfmt,
5496                                     StringRef FormatStr,
5497                                     SMLoc Loc) {
5498   using namespace llvm::AMDGPU::MTBUFFormat;
5499   int64_t Format;
5500 
5501   Format = getDfmt(FormatStr);
5502   if (Format != DFMT_UNDEF) {
5503     Dfmt = Format;
5504     return true;
5505   }
5506 
5507   Format = getNfmt(FormatStr, getSTI());
5508   if (Format != NFMT_UNDEF) {
5509     Nfmt = Format;
5510     return true;
5511   }
5512 
5513   Error(Loc, "unsupported format");
5514   return false;
5515 }
5516 
5517 OperandMatchResultTy
5518 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5519                                           SMLoc FormatLoc,
5520                                           int64_t &Format) {
5521   using namespace llvm::AMDGPU::MTBUFFormat;
5522 
5523   int64_t Dfmt = DFMT_UNDEF;
5524   int64_t Nfmt = NFMT_UNDEF;
5525   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5526     return MatchOperand_ParseFail;
5527 
5528   if (trySkipToken(AsmToken::Comma)) {
5529     StringRef Str;
5530     SMLoc Loc = getLoc();
5531     if (!parseId(Str, "expected a format string") ||
5532         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5533       return MatchOperand_ParseFail;
5534     }
5535     if (Dfmt == DFMT_UNDEF) {
5536       Error(Loc, "duplicate numeric format");
5537       return MatchOperand_ParseFail;
5538     } else if (Nfmt == NFMT_UNDEF) {
5539       Error(Loc, "duplicate data format");
5540       return MatchOperand_ParseFail;
5541     }
5542   }
5543 
5544   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5545   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5546 
5547   if (isGFX10Plus()) {
5548     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5549     if (Ufmt == UFMT_UNDEF) {
5550       Error(FormatLoc, "unsupported format");
5551       return MatchOperand_ParseFail;
5552     }
5553     Format = Ufmt;
5554   } else {
5555     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5556   }
5557 
5558   return MatchOperand_Success;
5559 }
5560 
5561 OperandMatchResultTy
5562 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5563                                             SMLoc Loc,
5564                                             int64_t &Format) {
5565   using namespace llvm::AMDGPU::MTBUFFormat;
5566 
5567   auto Id = getUnifiedFormat(FormatStr);
5568   if (Id == UFMT_UNDEF)
5569     return MatchOperand_NoMatch;
5570 
5571   if (!isGFX10Plus()) {
5572     Error(Loc, "unified format is not supported on this GPU");
5573     return MatchOperand_ParseFail;
5574   }
5575 
5576   Format = Id;
5577   return MatchOperand_Success;
5578 }
5579 
5580 OperandMatchResultTy
5581 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5582   using namespace llvm::AMDGPU::MTBUFFormat;
5583   SMLoc Loc = getLoc();
5584 
5585   if (!parseExpr(Format))
5586     return MatchOperand_ParseFail;
5587   if (!isValidFormatEncoding(Format, getSTI())) {
5588     Error(Loc, "out of range format");
5589     return MatchOperand_ParseFail;
5590   }
5591 
5592   return MatchOperand_Success;
5593 }
5594 
5595 OperandMatchResultTy
5596 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5597   using namespace llvm::AMDGPU::MTBUFFormat;
5598 
5599   if (!trySkipId("format", AsmToken::Colon))
5600     return MatchOperand_NoMatch;
5601 
5602   if (trySkipToken(AsmToken::LBrac)) {
5603     StringRef FormatStr;
5604     SMLoc Loc = getLoc();
5605     if (!parseId(FormatStr, "expected a format string"))
5606       return MatchOperand_ParseFail;
5607 
5608     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5609     if (Res == MatchOperand_NoMatch)
5610       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5611     if (Res != MatchOperand_Success)
5612       return Res;
5613 
5614     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5615       return MatchOperand_ParseFail;
5616 
5617     return MatchOperand_Success;
5618   }
5619 
5620   return parseNumericFormat(Format);
5621 }
5622 
5623 OperandMatchResultTy
5624 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5625   using namespace llvm::AMDGPU::MTBUFFormat;
5626 
5627   int64_t Format = getDefaultFormatEncoding(getSTI());
5628   OperandMatchResultTy Res;
5629   SMLoc Loc = getLoc();
5630 
5631   // Parse legacy format syntax.
5632   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5633   if (Res == MatchOperand_ParseFail)
5634     return Res;
5635 
5636   bool FormatFound = (Res == MatchOperand_Success);
5637 
5638   Operands.push_back(
5639     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5640 
5641   if (FormatFound)
5642     trySkipToken(AsmToken::Comma);
5643 
5644   if (isToken(AsmToken::EndOfStatement)) {
5645     // We are expecting an soffset operand,
5646     // but let matcher handle the error.
5647     return MatchOperand_Success;
5648   }
5649 
5650   // Parse soffset.
5651   Res = parseRegOrImm(Operands);
5652   if (Res != MatchOperand_Success)
5653     return Res;
5654 
5655   trySkipToken(AsmToken::Comma);
5656 
5657   if (!FormatFound) {
5658     Res = parseSymbolicOrNumericFormat(Format);
5659     if (Res == MatchOperand_ParseFail)
5660       return Res;
5661     if (Res == MatchOperand_Success) {
5662       auto Size = Operands.size();
5663       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5664       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5665       Op.setImm(Format);
5666     }
5667     return MatchOperand_Success;
5668   }
5669 
5670   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5671     Error(getLoc(), "duplicate format");
5672     return MatchOperand_ParseFail;
5673   }
5674   return MatchOperand_Success;
5675 }
5676 
5677 //===----------------------------------------------------------------------===//
5678 // ds
5679 //===----------------------------------------------------------------------===//
5680 
5681 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5682                                     const OperandVector &Operands) {
5683   OptionalImmIndexMap OptionalIdx;
5684 
5685   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5686     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5687 
5688     // Add the register arguments
5689     if (Op.isReg()) {
5690       Op.addRegOperands(Inst, 1);
5691       continue;
5692     }
5693 
5694     // Handle optional arguments
5695     OptionalIdx[Op.getImmTy()] = i;
5696   }
5697 
5698   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5699   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5700   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5701 
5702   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5703 }
5704 
5705 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5706                                 bool IsGdsHardcoded) {
5707   OptionalImmIndexMap OptionalIdx;
5708 
5709   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5710     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5711 
5712     // Add the register arguments
5713     if (Op.isReg()) {
5714       Op.addRegOperands(Inst, 1);
5715       continue;
5716     }
5717 
5718     if (Op.isToken() && Op.getToken() == "gds") {
5719       IsGdsHardcoded = true;
5720       continue;
5721     }
5722 
5723     // Handle optional arguments
5724     OptionalIdx[Op.getImmTy()] = i;
5725   }
5726 
5727   AMDGPUOperand::ImmTy OffsetType =
5728     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5729      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5730      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5731                                                       AMDGPUOperand::ImmTyOffset;
5732 
5733   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5734 
5735   if (!IsGdsHardcoded) {
5736     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5737   }
5738   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5739 }
5740 
5741 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5742   OptionalImmIndexMap OptionalIdx;
5743 
5744   unsigned OperandIdx[4];
5745   unsigned EnMask = 0;
5746   int SrcIdx = 0;
5747 
5748   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5749     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5750 
5751     // Add the register arguments
5752     if (Op.isReg()) {
5753       assert(SrcIdx < 4);
5754       OperandIdx[SrcIdx] = Inst.size();
5755       Op.addRegOperands(Inst, 1);
5756       ++SrcIdx;
5757       continue;
5758     }
5759 
5760     if (Op.isOff()) {
5761       assert(SrcIdx < 4);
5762       OperandIdx[SrcIdx] = Inst.size();
5763       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5764       ++SrcIdx;
5765       continue;
5766     }
5767 
5768     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5769       Op.addImmOperands(Inst, 1);
5770       continue;
5771     }
5772 
5773     if (Op.isToken() && Op.getToken() == "done")
5774       continue;
5775 
5776     // Handle optional arguments
5777     OptionalIdx[Op.getImmTy()] = i;
5778   }
5779 
5780   assert(SrcIdx == 4);
5781 
5782   bool Compr = false;
5783   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5784     Compr = true;
5785     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5786     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5787     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5788   }
5789 
5790   for (auto i = 0; i < SrcIdx; ++i) {
5791     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5792       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5793     }
5794   }
5795 
5796   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5797   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5798 
5799   Inst.addOperand(MCOperand::createImm(EnMask));
5800 }
5801 
5802 //===----------------------------------------------------------------------===//
5803 // s_waitcnt
5804 //===----------------------------------------------------------------------===//
5805 
5806 static bool
5807 encodeCnt(
5808   const AMDGPU::IsaVersion ISA,
5809   int64_t &IntVal,
5810   int64_t CntVal,
5811   bool Saturate,
5812   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5813   unsigned (*decode)(const IsaVersion &Version, unsigned))
5814 {
5815   bool Failed = false;
5816 
5817   IntVal = encode(ISA, IntVal, CntVal);
5818   if (CntVal != decode(ISA, IntVal)) {
5819     if (Saturate) {
5820       IntVal = encode(ISA, IntVal, -1);
5821     } else {
5822       Failed = true;
5823     }
5824   }
5825   return Failed;
5826 }
5827 
5828 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5829 
5830   SMLoc CntLoc = getLoc();
5831   StringRef CntName = getTokenStr();
5832 
5833   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5834       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5835     return false;
5836 
5837   int64_t CntVal;
5838   SMLoc ValLoc = getLoc();
5839   if (!parseExpr(CntVal))
5840     return false;
5841 
5842   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5843 
5844   bool Failed = true;
5845   bool Sat = CntName.endswith("_sat");
5846 
5847   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5848     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5849   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5850     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5851   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5852     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5853   } else {
5854     Error(CntLoc, "invalid counter name " + CntName);
5855     return false;
5856   }
5857 
5858   if (Failed) {
5859     Error(ValLoc, "too large value for " + CntName);
5860     return false;
5861   }
5862 
5863   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5864     return false;
5865 
5866   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5867     if (isToken(AsmToken::EndOfStatement)) {
5868       Error(getLoc(), "expected a counter name");
5869       return false;
5870     }
5871   }
5872 
5873   return true;
5874 }
5875 
5876 OperandMatchResultTy
5877 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5878   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5879   int64_t Waitcnt = getWaitcntBitMask(ISA);
5880   SMLoc S = getLoc();
5881 
5882   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5883     while (!isToken(AsmToken::EndOfStatement)) {
5884       if (!parseCnt(Waitcnt))
5885         return MatchOperand_ParseFail;
5886     }
5887   } else {
5888     if (!parseExpr(Waitcnt))
5889       return MatchOperand_ParseFail;
5890   }
5891 
5892   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5893   return MatchOperand_Success;
5894 }
5895 
5896 bool
5897 AMDGPUOperand::isSWaitCnt() const {
5898   return isImm();
5899 }
5900 
5901 //===----------------------------------------------------------------------===//
5902 // hwreg
5903 //===----------------------------------------------------------------------===//
5904 
5905 bool
5906 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5907                                 OperandInfoTy &Offset,
5908                                 OperandInfoTy &Width) {
5909   using namespace llvm::AMDGPU::Hwreg;
5910 
5911   // The register may be specified by name or using a numeric code
5912   HwReg.Loc = getLoc();
5913   if (isToken(AsmToken::Identifier) &&
5914       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5915     HwReg.IsSymbolic = true;
5916     lex(); // skip register name
5917   } else if (!parseExpr(HwReg.Id, "a register name")) {
5918     return false;
5919   }
5920 
5921   if (trySkipToken(AsmToken::RParen))
5922     return true;
5923 
5924   // parse optional params
5925   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
5926     return false;
5927 
5928   Offset.Loc = getLoc();
5929   if (!parseExpr(Offset.Id))
5930     return false;
5931 
5932   if (!skipToken(AsmToken::Comma, "expected a comma"))
5933     return false;
5934 
5935   Width.Loc = getLoc();
5936   return parseExpr(Width.Id) &&
5937          skipToken(AsmToken::RParen, "expected a closing parenthesis");
5938 }
5939 
5940 bool
5941 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5942                                const OperandInfoTy &Offset,
5943                                const OperandInfoTy &Width) {
5944 
5945   using namespace llvm::AMDGPU::Hwreg;
5946 
5947   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5948     Error(HwReg.Loc,
5949           "specified hardware register is not supported on this GPU");
5950     return false;
5951   }
5952   if (!isValidHwreg(HwReg.Id)) {
5953     Error(HwReg.Loc,
5954           "invalid code of hardware register: only 6-bit values are legal");
5955     return false;
5956   }
5957   if (!isValidHwregOffset(Offset.Id)) {
5958     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
5959     return false;
5960   }
5961   if (!isValidHwregWidth(Width.Id)) {
5962     Error(Width.Loc,
5963           "invalid bitfield width: only values from 1 to 32 are legal");
5964     return false;
5965   }
5966   return true;
5967 }
5968 
5969 OperandMatchResultTy
5970 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5971   using namespace llvm::AMDGPU::Hwreg;
5972 
5973   int64_t ImmVal = 0;
5974   SMLoc Loc = getLoc();
5975 
5976   if (trySkipId("hwreg", AsmToken::LParen)) {
5977     OperandInfoTy HwReg(ID_UNKNOWN_);
5978     OperandInfoTy Offset(OFFSET_DEFAULT_);
5979     OperandInfoTy Width(WIDTH_DEFAULT_);
5980     if (parseHwregBody(HwReg, Offset, Width) &&
5981         validateHwreg(HwReg, Offset, Width)) {
5982       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
5983     } else {
5984       return MatchOperand_ParseFail;
5985     }
5986   } else if (parseExpr(ImmVal, "a hwreg macro")) {
5987     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5988       Error(Loc, "invalid immediate: only 16-bit values are legal");
5989       return MatchOperand_ParseFail;
5990     }
5991   } else {
5992     return MatchOperand_ParseFail;
5993   }
5994 
5995   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5996   return MatchOperand_Success;
5997 }
5998 
5999 bool AMDGPUOperand::isHwreg() const {
6000   return isImmTy(ImmTyHwreg);
6001 }
6002 
6003 //===----------------------------------------------------------------------===//
6004 // sendmsg
6005 //===----------------------------------------------------------------------===//
6006 
6007 bool
6008 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6009                                   OperandInfoTy &Op,
6010                                   OperandInfoTy &Stream) {
6011   using namespace llvm::AMDGPU::SendMsg;
6012 
6013   Msg.Loc = getLoc();
6014   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6015     Msg.IsSymbolic = true;
6016     lex(); // skip message name
6017   } else if (!parseExpr(Msg.Id, "a message name")) {
6018     return false;
6019   }
6020 
6021   if (trySkipToken(AsmToken::Comma)) {
6022     Op.IsDefined = true;
6023     Op.Loc = getLoc();
6024     if (isToken(AsmToken::Identifier) &&
6025         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6026       lex(); // skip operation name
6027     } else if (!parseExpr(Op.Id, "an operation name")) {
6028       return false;
6029     }
6030 
6031     if (trySkipToken(AsmToken::Comma)) {
6032       Stream.IsDefined = true;
6033       Stream.Loc = getLoc();
6034       if (!parseExpr(Stream.Id))
6035         return false;
6036     }
6037   }
6038 
6039   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6040 }
6041 
6042 bool
6043 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6044                                  const OperandInfoTy &Op,
6045                                  const OperandInfoTy &Stream) {
6046   using namespace llvm::AMDGPU::SendMsg;
6047 
6048   // Validation strictness depends on whether message is specified
6049   // in a symbolc or in a numeric form. In the latter case
6050   // only encoding possibility is checked.
6051   bool Strict = Msg.IsSymbolic;
6052 
6053   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6054     Error(Msg.Loc, "invalid message id");
6055     return false;
6056   }
6057   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6058     if (Op.IsDefined) {
6059       Error(Op.Loc, "message does not support operations");
6060     } else {
6061       Error(Msg.Loc, "missing message operation");
6062     }
6063     return false;
6064   }
6065   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6066     Error(Op.Loc, "invalid operation id");
6067     return false;
6068   }
6069   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6070     Error(Stream.Loc, "message operation does not support streams");
6071     return false;
6072   }
6073   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6074     Error(Stream.Loc, "invalid message stream id");
6075     return false;
6076   }
6077   return true;
6078 }
6079 
6080 OperandMatchResultTy
6081 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6082   using namespace llvm::AMDGPU::SendMsg;
6083 
6084   int64_t ImmVal = 0;
6085   SMLoc Loc = getLoc();
6086 
6087   if (trySkipId("sendmsg", AsmToken::LParen)) {
6088     OperandInfoTy Msg(ID_UNKNOWN_);
6089     OperandInfoTy Op(OP_NONE_);
6090     OperandInfoTy Stream(STREAM_ID_NONE_);
6091     if (parseSendMsgBody(Msg, Op, Stream) &&
6092         validateSendMsg(Msg, Op, Stream)) {
6093       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6094     } else {
6095       return MatchOperand_ParseFail;
6096     }
6097   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6098     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6099       Error(Loc, "invalid immediate: only 16-bit values are legal");
6100       return MatchOperand_ParseFail;
6101     }
6102   } else {
6103     return MatchOperand_ParseFail;
6104   }
6105 
6106   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6107   return MatchOperand_Success;
6108 }
6109 
6110 bool AMDGPUOperand::isSendMsg() const {
6111   return isImmTy(ImmTySendMsg);
6112 }
6113 
6114 //===----------------------------------------------------------------------===//
6115 // v_interp
6116 //===----------------------------------------------------------------------===//
6117 
6118 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6119   StringRef Str;
6120   SMLoc S = getLoc();
6121 
6122   if (!parseId(Str))
6123     return MatchOperand_NoMatch;
6124 
6125   int Slot = StringSwitch<int>(Str)
6126     .Case("p10", 0)
6127     .Case("p20", 1)
6128     .Case("p0", 2)
6129     .Default(-1);
6130 
6131   if (Slot == -1) {
6132     Error(S, "invalid interpolation slot");
6133     return MatchOperand_ParseFail;
6134   }
6135 
6136   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6137                                               AMDGPUOperand::ImmTyInterpSlot));
6138   return MatchOperand_Success;
6139 }
6140 
6141 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6142   StringRef Str;
6143   SMLoc S = getLoc();
6144 
6145   if (!parseId(Str))
6146     return MatchOperand_NoMatch;
6147 
6148   if (!Str.startswith("attr")) {
6149     Error(S, "invalid interpolation attribute");
6150     return MatchOperand_ParseFail;
6151   }
6152 
6153   StringRef Chan = Str.take_back(2);
6154   int AttrChan = StringSwitch<int>(Chan)
6155     .Case(".x", 0)
6156     .Case(".y", 1)
6157     .Case(".z", 2)
6158     .Case(".w", 3)
6159     .Default(-1);
6160   if (AttrChan == -1) {
6161     Error(S, "invalid or missing interpolation attribute channel");
6162     return MatchOperand_ParseFail;
6163   }
6164 
6165   Str = Str.drop_back(2).drop_front(4);
6166 
6167   uint8_t Attr;
6168   if (Str.getAsInteger(10, Attr)) {
6169     Error(S, "invalid or missing interpolation attribute number");
6170     return MatchOperand_ParseFail;
6171   }
6172 
6173   if (Attr > 63) {
6174     Error(S, "out of bounds interpolation attribute number");
6175     return MatchOperand_ParseFail;
6176   }
6177 
6178   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6179 
6180   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6181                                               AMDGPUOperand::ImmTyInterpAttr));
6182   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6183                                               AMDGPUOperand::ImmTyAttrChan));
6184   return MatchOperand_Success;
6185 }
6186 
6187 //===----------------------------------------------------------------------===//
6188 // exp
6189 //===----------------------------------------------------------------------===//
6190 
6191 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6192   using namespace llvm::AMDGPU::Exp;
6193 
6194   StringRef Str;
6195   SMLoc S = getLoc();
6196 
6197   if (!parseId(Str))
6198     return MatchOperand_NoMatch;
6199 
6200   unsigned Id = getTgtId(Str);
6201   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6202     Error(S, (Id == ET_INVALID) ?
6203                 "invalid exp target" :
6204                 "exp target is not supported on this GPU");
6205     return MatchOperand_ParseFail;
6206   }
6207 
6208   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6209                                               AMDGPUOperand::ImmTyExpTgt));
6210   return MatchOperand_Success;
6211 }
6212 
6213 //===----------------------------------------------------------------------===//
6214 // parser helpers
6215 //===----------------------------------------------------------------------===//
6216 
6217 bool
6218 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6219   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6220 }
6221 
6222 bool
6223 AMDGPUAsmParser::isId(const StringRef Id) const {
6224   return isId(getToken(), Id);
6225 }
6226 
6227 bool
6228 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6229   return getTokenKind() == Kind;
6230 }
6231 
6232 bool
6233 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6234   if (isId(Id)) {
6235     lex();
6236     return true;
6237   }
6238   return false;
6239 }
6240 
6241 bool
6242 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6243   if (isToken(AsmToken::Identifier)) {
6244     StringRef Tok = getTokenStr();
6245     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6246       lex();
6247       return true;
6248     }
6249   }
6250   return false;
6251 }
6252 
6253 bool
6254 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6255   if (isId(Id) && peekToken().is(Kind)) {
6256     lex();
6257     lex();
6258     return true;
6259   }
6260   return false;
6261 }
6262 
6263 bool
6264 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6265   if (isToken(Kind)) {
6266     lex();
6267     return true;
6268   }
6269   return false;
6270 }
6271 
6272 bool
6273 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6274                            const StringRef ErrMsg) {
6275   if (!trySkipToken(Kind)) {
6276     Error(getLoc(), ErrMsg);
6277     return false;
6278   }
6279   return true;
6280 }
6281 
6282 bool
6283 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6284   SMLoc S = getLoc();
6285 
6286   const MCExpr *Expr;
6287   if (Parser.parseExpression(Expr))
6288     return false;
6289 
6290   if (Expr->evaluateAsAbsolute(Imm))
6291     return true;
6292 
6293   if (Expected.empty()) {
6294     Error(S, "expected absolute expression");
6295   } else {
6296     Error(S, Twine("expected ", Expected) +
6297              Twine(" or an absolute expression"));
6298   }
6299   return false;
6300 }
6301 
6302 bool
6303 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6304   SMLoc S = getLoc();
6305 
6306   const MCExpr *Expr;
6307   if (Parser.parseExpression(Expr))
6308     return false;
6309 
6310   int64_t IntVal;
6311   if (Expr->evaluateAsAbsolute(IntVal)) {
6312     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6313   } else {
6314     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6315   }
6316   return true;
6317 }
6318 
6319 bool
6320 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6321   if (isToken(AsmToken::String)) {
6322     Val = getToken().getStringContents();
6323     lex();
6324     return true;
6325   } else {
6326     Error(getLoc(), ErrMsg);
6327     return false;
6328   }
6329 }
6330 
6331 bool
6332 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6333   if (isToken(AsmToken::Identifier)) {
6334     Val = getTokenStr();
6335     lex();
6336     return true;
6337   } else {
6338     if (!ErrMsg.empty())
6339       Error(getLoc(), ErrMsg);
6340     return false;
6341   }
6342 }
6343 
6344 AsmToken
6345 AMDGPUAsmParser::getToken() const {
6346   return Parser.getTok();
6347 }
6348 
6349 AsmToken
6350 AMDGPUAsmParser::peekToken() {
6351   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6352 }
6353 
6354 void
6355 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6356   auto TokCount = getLexer().peekTokens(Tokens);
6357 
6358   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6359     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6360 }
6361 
6362 AsmToken::TokenKind
6363 AMDGPUAsmParser::getTokenKind() const {
6364   return getLexer().getKind();
6365 }
6366 
6367 SMLoc
6368 AMDGPUAsmParser::getLoc() const {
6369   return getToken().getLoc();
6370 }
6371 
6372 StringRef
6373 AMDGPUAsmParser::getTokenStr() const {
6374   return getToken().getString();
6375 }
6376 
6377 void
6378 AMDGPUAsmParser::lex() {
6379   Parser.Lex();
6380 }
6381 
6382 SMLoc
6383 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6384                                const OperandVector &Operands) const {
6385   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6386     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6387     if (Test(Op))
6388       return Op.getStartLoc();
6389   }
6390   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6391 }
6392 
6393 SMLoc
6394 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6395                            const OperandVector &Operands) const {
6396   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6397   return getOperandLoc(Test, Operands);
6398 }
6399 
6400 SMLoc
6401 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6402                            const OperandVector &Operands) const {
6403   auto Test = [=](const AMDGPUOperand& Op) {
6404     return Op.isRegKind() && Op.getReg() == Reg;
6405   };
6406   return getOperandLoc(Test, Operands);
6407 }
6408 
6409 SMLoc
6410 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6411   auto Test = [](const AMDGPUOperand& Op) {
6412     return Op.IsImmKindLiteral() || Op.isExpr();
6413   };
6414   return getOperandLoc(Test, Operands);
6415 }
6416 
6417 SMLoc
6418 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6419   auto Test = [](const AMDGPUOperand& Op) {
6420     return Op.isImmKindConst();
6421   };
6422   return getOperandLoc(Test, Operands);
6423 }
6424 
6425 //===----------------------------------------------------------------------===//
6426 // swizzle
6427 //===----------------------------------------------------------------------===//
6428 
6429 LLVM_READNONE
6430 static unsigned
6431 encodeBitmaskPerm(const unsigned AndMask,
6432                   const unsigned OrMask,
6433                   const unsigned XorMask) {
6434   using namespace llvm::AMDGPU::Swizzle;
6435 
6436   return BITMASK_PERM_ENC |
6437          (AndMask << BITMASK_AND_SHIFT) |
6438          (OrMask  << BITMASK_OR_SHIFT)  |
6439          (XorMask << BITMASK_XOR_SHIFT);
6440 }
6441 
6442 bool
6443 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6444                                      const unsigned MinVal,
6445                                      const unsigned MaxVal,
6446                                      const StringRef ErrMsg,
6447                                      SMLoc &Loc) {
6448   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6449     return false;
6450   }
6451   Loc = getLoc();
6452   if (!parseExpr(Op)) {
6453     return false;
6454   }
6455   if (Op < MinVal || Op > MaxVal) {
6456     Error(Loc, ErrMsg);
6457     return false;
6458   }
6459 
6460   return true;
6461 }
6462 
6463 bool
6464 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6465                                       const unsigned MinVal,
6466                                       const unsigned MaxVal,
6467                                       const StringRef ErrMsg) {
6468   SMLoc Loc;
6469   for (unsigned i = 0; i < OpNum; ++i) {
6470     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6471       return false;
6472   }
6473 
6474   return true;
6475 }
6476 
6477 bool
6478 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6479   using namespace llvm::AMDGPU::Swizzle;
6480 
6481   int64_t Lane[LANE_NUM];
6482   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6483                            "expected a 2-bit lane id")) {
6484     Imm = QUAD_PERM_ENC;
6485     for (unsigned I = 0; I < LANE_NUM; ++I) {
6486       Imm |= Lane[I] << (LANE_SHIFT * I);
6487     }
6488     return true;
6489   }
6490   return false;
6491 }
6492 
6493 bool
6494 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6495   using namespace llvm::AMDGPU::Swizzle;
6496 
6497   SMLoc Loc;
6498   int64_t GroupSize;
6499   int64_t LaneIdx;
6500 
6501   if (!parseSwizzleOperand(GroupSize,
6502                            2, 32,
6503                            "group size must be in the interval [2,32]",
6504                            Loc)) {
6505     return false;
6506   }
6507   if (!isPowerOf2_64(GroupSize)) {
6508     Error(Loc, "group size must be a power of two");
6509     return false;
6510   }
6511   if (parseSwizzleOperand(LaneIdx,
6512                           0, GroupSize - 1,
6513                           "lane id must be in the interval [0,group size - 1]",
6514                           Loc)) {
6515     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6516     return true;
6517   }
6518   return false;
6519 }
6520 
6521 bool
6522 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6523   using namespace llvm::AMDGPU::Swizzle;
6524 
6525   SMLoc Loc;
6526   int64_t GroupSize;
6527 
6528   if (!parseSwizzleOperand(GroupSize,
6529                            2, 32,
6530                            "group size must be in the interval [2,32]",
6531                            Loc)) {
6532     return false;
6533   }
6534   if (!isPowerOf2_64(GroupSize)) {
6535     Error(Loc, "group size must be a power of two");
6536     return false;
6537   }
6538 
6539   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6540   return true;
6541 }
6542 
6543 bool
6544 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6545   using namespace llvm::AMDGPU::Swizzle;
6546 
6547   SMLoc Loc;
6548   int64_t GroupSize;
6549 
6550   if (!parseSwizzleOperand(GroupSize,
6551                            1, 16,
6552                            "group size must be in the interval [1,16]",
6553                            Loc)) {
6554     return false;
6555   }
6556   if (!isPowerOf2_64(GroupSize)) {
6557     Error(Loc, "group size must be a power of two");
6558     return false;
6559   }
6560 
6561   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6562   return true;
6563 }
6564 
6565 bool
6566 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6567   using namespace llvm::AMDGPU::Swizzle;
6568 
6569   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6570     return false;
6571   }
6572 
6573   StringRef Ctl;
6574   SMLoc StrLoc = getLoc();
6575   if (!parseString(Ctl)) {
6576     return false;
6577   }
6578   if (Ctl.size() != BITMASK_WIDTH) {
6579     Error(StrLoc, "expected a 5-character mask");
6580     return false;
6581   }
6582 
6583   unsigned AndMask = 0;
6584   unsigned OrMask = 0;
6585   unsigned XorMask = 0;
6586 
6587   for (size_t i = 0; i < Ctl.size(); ++i) {
6588     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6589     switch(Ctl[i]) {
6590     default:
6591       Error(StrLoc, "invalid mask");
6592       return false;
6593     case '0':
6594       break;
6595     case '1':
6596       OrMask |= Mask;
6597       break;
6598     case 'p':
6599       AndMask |= Mask;
6600       break;
6601     case 'i':
6602       AndMask |= Mask;
6603       XorMask |= Mask;
6604       break;
6605     }
6606   }
6607 
6608   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6609   return true;
6610 }
6611 
6612 bool
6613 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6614 
6615   SMLoc OffsetLoc = getLoc();
6616 
6617   if (!parseExpr(Imm, "a swizzle macro")) {
6618     return false;
6619   }
6620   if (!isUInt<16>(Imm)) {
6621     Error(OffsetLoc, "expected a 16-bit offset");
6622     return false;
6623   }
6624   return true;
6625 }
6626 
6627 bool
6628 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6629   using namespace llvm::AMDGPU::Swizzle;
6630 
6631   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6632 
6633     SMLoc ModeLoc = getLoc();
6634     bool Ok = false;
6635 
6636     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6637       Ok = parseSwizzleQuadPerm(Imm);
6638     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6639       Ok = parseSwizzleBitmaskPerm(Imm);
6640     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6641       Ok = parseSwizzleBroadcast(Imm);
6642     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6643       Ok = parseSwizzleSwap(Imm);
6644     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6645       Ok = parseSwizzleReverse(Imm);
6646     } else {
6647       Error(ModeLoc, "expected a swizzle mode");
6648     }
6649 
6650     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6651   }
6652 
6653   return false;
6654 }
6655 
6656 OperandMatchResultTy
6657 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6658   SMLoc S = getLoc();
6659   int64_t Imm = 0;
6660 
6661   if (trySkipId("offset")) {
6662 
6663     bool Ok = false;
6664     if (skipToken(AsmToken::Colon, "expected a colon")) {
6665       if (trySkipId("swizzle")) {
6666         Ok = parseSwizzleMacro(Imm);
6667       } else {
6668         Ok = parseSwizzleOffset(Imm);
6669       }
6670     }
6671 
6672     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6673 
6674     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6675   } else {
6676     // Swizzle "offset" operand is optional.
6677     // If it is omitted, try parsing other optional operands.
6678     return parseOptionalOpr(Operands);
6679   }
6680 }
6681 
6682 bool
6683 AMDGPUOperand::isSwizzle() const {
6684   return isImmTy(ImmTySwizzle);
6685 }
6686 
6687 //===----------------------------------------------------------------------===//
6688 // VGPR Index Mode
6689 //===----------------------------------------------------------------------===//
6690 
6691 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6692 
6693   using namespace llvm::AMDGPU::VGPRIndexMode;
6694 
6695   if (trySkipToken(AsmToken::RParen)) {
6696     return OFF;
6697   }
6698 
6699   int64_t Imm = 0;
6700 
6701   while (true) {
6702     unsigned Mode = 0;
6703     SMLoc S = getLoc();
6704 
6705     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6706       if (trySkipId(IdSymbolic[ModeId])) {
6707         Mode = 1 << ModeId;
6708         break;
6709       }
6710     }
6711 
6712     if (Mode == 0) {
6713       Error(S, (Imm == 0)?
6714                "expected a VGPR index mode or a closing parenthesis" :
6715                "expected a VGPR index mode");
6716       return UNDEF;
6717     }
6718 
6719     if (Imm & Mode) {
6720       Error(S, "duplicate VGPR index mode");
6721       return UNDEF;
6722     }
6723     Imm |= Mode;
6724 
6725     if (trySkipToken(AsmToken::RParen))
6726       break;
6727     if (!skipToken(AsmToken::Comma,
6728                    "expected a comma or a closing parenthesis"))
6729       return UNDEF;
6730   }
6731 
6732   return Imm;
6733 }
6734 
6735 OperandMatchResultTy
6736 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6737 
6738   using namespace llvm::AMDGPU::VGPRIndexMode;
6739 
6740   int64_t Imm = 0;
6741   SMLoc S = getLoc();
6742 
6743   if (trySkipId("gpr_idx", AsmToken::LParen)) {
6744     Imm = parseGPRIdxMacro();
6745     if (Imm == UNDEF)
6746       return MatchOperand_ParseFail;
6747   } else {
6748     if (getParser().parseAbsoluteExpression(Imm))
6749       return MatchOperand_ParseFail;
6750     if (Imm < 0 || !isUInt<4>(Imm)) {
6751       Error(S, "invalid immediate: only 4-bit values are legal");
6752       return MatchOperand_ParseFail;
6753     }
6754   }
6755 
6756   Operands.push_back(
6757       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6758   return MatchOperand_Success;
6759 }
6760 
6761 bool AMDGPUOperand::isGPRIdxMode() const {
6762   return isImmTy(ImmTyGprIdxMode);
6763 }
6764 
6765 //===----------------------------------------------------------------------===//
6766 // sopp branch targets
6767 //===----------------------------------------------------------------------===//
6768 
6769 OperandMatchResultTy
6770 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6771 
6772   // Make sure we are not parsing something
6773   // that looks like a label or an expression but is not.
6774   // This will improve error messages.
6775   if (isRegister() || isModifier())
6776     return MatchOperand_NoMatch;
6777 
6778   if (!parseExpr(Operands))
6779     return MatchOperand_ParseFail;
6780 
6781   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6782   assert(Opr.isImm() || Opr.isExpr());
6783   SMLoc Loc = Opr.getStartLoc();
6784 
6785   // Currently we do not support arbitrary expressions as branch targets.
6786   // Only labels and absolute expressions are accepted.
6787   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6788     Error(Loc, "expected an absolute expression or a label");
6789   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6790     Error(Loc, "expected a 16-bit signed jump offset");
6791   }
6792 
6793   return MatchOperand_Success;
6794 }
6795 
6796 //===----------------------------------------------------------------------===//
6797 // Boolean holding registers
6798 //===----------------------------------------------------------------------===//
6799 
6800 OperandMatchResultTy
6801 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6802   return parseReg(Operands);
6803 }
6804 
6805 //===----------------------------------------------------------------------===//
6806 // mubuf
6807 //===----------------------------------------------------------------------===//
6808 
6809 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
6810   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
6811 }
6812 
6813 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSCCB() const {
6814   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySCCB);
6815 }
6816 
6817 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
6818   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
6819 }
6820 
6821 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const {
6822   return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC);
6823 }
6824 
6825 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
6826   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
6827 }
6828 
6829 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6830                                const OperandVector &Operands,
6831                                bool IsAtomic,
6832                                bool IsAtomicReturn,
6833                                bool IsLds) {
6834   bool IsLdsOpcode = IsLds;
6835   bool HasLdsModifier = false;
6836   OptionalImmIndexMap OptionalIdx;
6837   assert(IsAtomicReturn ? IsAtomic : true);
6838   unsigned FirstOperandIdx = 1;
6839 
6840   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6841     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6842 
6843     // Add the register arguments
6844     if (Op.isReg()) {
6845       Op.addRegOperands(Inst, 1);
6846       // Insert a tied src for atomic return dst.
6847       // This cannot be postponed as subsequent calls to
6848       // addImmOperands rely on correct number of MC operands.
6849       if (IsAtomicReturn && i == FirstOperandIdx)
6850         Op.addRegOperands(Inst, 1);
6851       continue;
6852     }
6853 
6854     // Handle the case where soffset is an immediate
6855     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6856       Op.addImmOperands(Inst, 1);
6857       continue;
6858     }
6859 
6860     HasLdsModifier |= Op.isLDS();
6861 
6862     // Handle tokens like 'offen' which are sometimes hard-coded into the
6863     // asm string.  There are no MCInst operands for these.
6864     if (Op.isToken()) {
6865       continue;
6866     }
6867     assert(Op.isImm());
6868 
6869     // Handle optional arguments
6870     OptionalIdx[Op.getImmTy()] = i;
6871   }
6872 
6873   // This is a workaround for an llvm quirk which may result in an
6874   // incorrect instruction selection. Lds and non-lds versions of
6875   // MUBUF instructions are identical except that lds versions
6876   // have mandatory 'lds' modifier. However this modifier follows
6877   // optional modifiers and llvm asm matcher regards this 'lds'
6878   // modifier as an optional one. As a result, an lds version
6879   // of opcode may be selected even if it has no 'lds' modifier.
6880   if (IsLdsOpcode && !HasLdsModifier) {
6881     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6882     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6883       Inst.setOpcode(NoLdsOpcode);
6884       IsLdsOpcode = false;
6885     }
6886   }
6887 
6888   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6889   if (!IsAtomic || IsAtomicReturn) {
6890     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC,
6891                           IsAtomicReturn ? -1 : 0);
6892   }
6893   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6894 
6895   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6896     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6897   }
6898 
6899   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6900   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
6901   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySCCB);
6902 }
6903 
6904 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6905   OptionalImmIndexMap OptionalIdx;
6906 
6907   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6908     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6909 
6910     // Add the register arguments
6911     if (Op.isReg()) {
6912       Op.addRegOperands(Inst, 1);
6913       continue;
6914     }
6915 
6916     // Handle the case where soffset is an immediate
6917     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6918       Op.addImmOperands(Inst, 1);
6919       continue;
6920     }
6921 
6922     // Handle tokens like 'offen' which are sometimes hard-coded into the
6923     // asm string.  There are no MCInst operands for these.
6924     if (Op.isToken()) {
6925       continue;
6926     }
6927     assert(Op.isImm());
6928 
6929     // Handle optional arguments
6930     OptionalIdx[Op.getImmTy()] = i;
6931   }
6932 
6933   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6934                         AMDGPUOperand::ImmTyOffset);
6935   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6936   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6937   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6938   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6939   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6940   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
6941   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySCCB);
6942 }
6943 
6944 //===----------------------------------------------------------------------===//
6945 // mimg
6946 //===----------------------------------------------------------------------===//
6947 
6948 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6949                               bool IsAtomic) {
6950   unsigned I = 1;
6951   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6952   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6953     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6954   }
6955 
6956   if (IsAtomic) {
6957     // Add src, same as dst
6958     assert(Desc.getNumDefs() == 1);
6959     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6960   }
6961 
6962   OptionalImmIndexMap OptionalIdx;
6963 
6964   for (unsigned E = Operands.size(); I != E; ++I) {
6965     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6966 
6967     // Add the register arguments
6968     if (Op.isReg()) {
6969       Op.addRegOperands(Inst, 1);
6970     } else if (Op.isImmModifier()) {
6971       OptionalIdx[Op.getImmTy()] = I;
6972     } else if (!Op.isToken()) {
6973       llvm_unreachable("unexpected operand type");
6974     }
6975   }
6976 
6977   bool IsGFX10Plus = isGFX10Plus();
6978 
6979   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6980   if (IsGFX10Plus)
6981     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6982   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6983 
6984   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::sccb) != -1)
6985     addOptionalImmOperand(Inst, Operands, OptionalIdx,
6986                           AMDGPUOperand::ImmTySCCB);
6987 
6988   if (IsGFX10Plus)
6989     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6990 
6991   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6992   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6993   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6994   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
6995     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6996   if (IsGFX10Plus)
6997     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6998   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6999   if (!IsGFX10Plus)
7000     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7001   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7002 }
7003 
7004 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7005   cvtMIMG(Inst, Operands, true);
7006 }
7007 
7008 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7009                                       const OperandVector &Operands) {
7010   for (unsigned I = 1; I < Operands.size(); ++I) {
7011     auto &Operand = (AMDGPUOperand &)*Operands[I];
7012     if (Operand.isReg())
7013       Operand.addRegOperands(Inst, 1);
7014   }
7015 
7016   Inst.addOperand(MCOperand::createImm(1)); // a16
7017 }
7018 
7019 //===----------------------------------------------------------------------===//
7020 // smrd
7021 //===----------------------------------------------------------------------===//
7022 
7023 bool AMDGPUOperand::isSMRDOffset8() const {
7024   return isImm() && isUInt<8>(getImm());
7025 }
7026 
7027 bool AMDGPUOperand::isSMEMOffset() const {
7028   return isImm(); // Offset range is checked later by validator.
7029 }
7030 
7031 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7032   // 32-bit literals are only supported on CI and we only want to use them
7033   // when the offset is > 8-bits.
7034   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7035 }
7036 
7037 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7038   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7039 }
7040 
7041 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7042   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7043 }
7044 
7045 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7046   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7047 }
7048 
7049 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7050   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7051 }
7052 
7053 //===----------------------------------------------------------------------===//
7054 // vop3
7055 //===----------------------------------------------------------------------===//
7056 
7057 static bool ConvertOmodMul(int64_t &Mul) {
7058   if (Mul != 1 && Mul != 2 && Mul != 4)
7059     return false;
7060 
7061   Mul >>= 1;
7062   return true;
7063 }
7064 
7065 static bool ConvertOmodDiv(int64_t &Div) {
7066   if (Div == 1) {
7067     Div = 0;
7068     return true;
7069   }
7070 
7071   if (Div == 2) {
7072     Div = 3;
7073     return true;
7074   }
7075 
7076   return false;
7077 }
7078 
7079 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7080 // This is intentional and ensures compatibility with sp3.
7081 // See bug 35397 for details.
7082 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7083   if (BoundCtrl == 0 || BoundCtrl == 1) {
7084     BoundCtrl = 1;
7085     return true;
7086   }
7087   return false;
7088 }
7089 
7090 // Note: the order in this table matches the order of operands in AsmString.
7091 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7092   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7093   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7094   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7095   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7096   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7097   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7098   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7099   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7100   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7101   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
7102   {"scc",     AMDGPUOperand::ImmTySCCB, true, nullptr},
7103   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
7104   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
7105   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7106   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7107   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7108   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7109   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7110   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7111   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7112   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7113   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7114   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7115   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7116   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7117   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7118   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7119   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7120   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7121   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7122   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7123   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7124   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7125   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7126   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7127   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7128   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7129   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7130   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7131   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7132   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7133   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7134   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7135   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7136 };
7137 
7138 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7139 
7140   OperandMatchResultTy res = parseOptionalOpr(Operands);
7141 
7142   // This is a hack to enable hardcoded mandatory operands which follow
7143   // optional operands.
7144   //
7145   // Current design assumes that all operands after the first optional operand
7146   // are also optional. However implementation of some instructions violates
7147   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7148   //
7149   // To alleviate this problem, we have to (implicitly) parse extra operands
7150   // to make sure autogenerated parser of custom operands never hit hardcoded
7151   // mandatory operands.
7152 
7153   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7154     if (res != MatchOperand_Success ||
7155         isToken(AsmToken::EndOfStatement))
7156       break;
7157 
7158     trySkipToken(AsmToken::Comma);
7159     res = parseOptionalOpr(Operands);
7160   }
7161 
7162   return res;
7163 }
7164 
7165 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7166   OperandMatchResultTy res;
7167   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7168     // try to parse any optional operand here
7169     if (Op.IsBit) {
7170       res = parseNamedBit(Op.Name, Operands, Op.Type);
7171     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7172       res = parseOModOperand(Operands);
7173     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7174                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7175                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7176       res = parseSDWASel(Operands, Op.Name, Op.Type);
7177     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7178       res = parseSDWADstUnused(Operands);
7179     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7180                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7181                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7182                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7183       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7184                                         Op.ConvertResult);
7185     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7186       res = parseDim(Operands);
7187     } else {
7188       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7189     }
7190     if (res != MatchOperand_NoMatch) {
7191       return res;
7192     }
7193   }
7194   return MatchOperand_NoMatch;
7195 }
7196 
7197 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7198   StringRef Name = getTokenStr();
7199   if (Name == "mul") {
7200     return parseIntWithPrefix("mul", Operands,
7201                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7202   }
7203 
7204   if (Name == "div") {
7205     return parseIntWithPrefix("div", Operands,
7206                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7207   }
7208 
7209   return MatchOperand_NoMatch;
7210 }
7211 
7212 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7213   cvtVOP3P(Inst, Operands);
7214 
7215   int Opc = Inst.getOpcode();
7216 
7217   int SrcNum;
7218   const int Ops[] = { AMDGPU::OpName::src0,
7219                       AMDGPU::OpName::src1,
7220                       AMDGPU::OpName::src2 };
7221   for (SrcNum = 0;
7222        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7223        ++SrcNum);
7224   assert(SrcNum > 0);
7225 
7226   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7227   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7228 
7229   if ((OpSel & (1 << SrcNum)) != 0) {
7230     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7231     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7232     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7233   }
7234 }
7235 
7236 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7237       // 1. This operand is input modifiers
7238   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7239       // 2. This is not last operand
7240       && Desc.NumOperands > (OpNum + 1)
7241       // 3. Next operand is register class
7242       && Desc.OpInfo[OpNum + 1].RegClass != -1
7243       // 4. Next register is not tied to any other operand
7244       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7245 }
7246 
7247 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7248 {
7249   OptionalImmIndexMap OptionalIdx;
7250   unsigned Opc = Inst.getOpcode();
7251 
7252   unsigned I = 1;
7253   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7254   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7255     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7256   }
7257 
7258   for (unsigned E = Operands.size(); I != E; ++I) {
7259     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7260     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7261       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7262     } else if (Op.isInterpSlot() ||
7263                Op.isInterpAttr() ||
7264                Op.isAttrChan()) {
7265       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7266     } else if (Op.isImmModifier()) {
7267       OptionalIdx[Op.getImmTy()] = I;
7268     } else {
7269       llvm_unreachable("unhandled operand type");
7270     }
7271   }
7272 
7273   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7274     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7275   }
7276 
7277   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7278     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7279   }
7280 
7281   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7282     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7283   }
7284 }
7285 
7286 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7287                               OptionalImmIndexMap &OptionalIdx) {
7288   unsigned Opc = Inst.getOpcode();
7289 
7290   unsigned I = 1;
7291   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7292   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7293     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7294   }
7295 
7296   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7297     // This instruction has src modifiers
7298     for (unsigned E = Operands.size(); I != E; ++I) {
7299       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7300       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7301         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7302       } else if (Op.isImmModifier()) {
7303         OptionalIdx[Op.getImmTy()] = I;
7304       } else if (Op.isRegOrImm()) {
7305         Op.addRegOrImmOperands(Inst, 1);
7306       } else {
7307         llvm_unreachable("unhandled operand type");
7308       }
7309     }
7310   } else {
7311     // No src modifiers
7312     for (unsigned E = Operands.size(); I != E; ++I) {
7313       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7314       if (Op.isMod()) {
7315         OptionalIdx[Op.getImmTy()] = I;
7316       } else {
7317         Op.addRegOrImmOperands(Inst, 1);
7318       }
7319     }
7320   }
7321 
7322   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7323     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7324   }
7325 
7326   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7327     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7328   }
7329 
7330   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7331   // it has src2 register operand that is tied to dst operand
7332   // we don't allow modifiers for this operand in assembler so src2_modifiers
7333   // should be 0.
7334   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7335       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7336       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7337       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7338       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7339       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7340       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7341       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7342       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7343       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7344       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7345     auto it = Inst.begin();
7346     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7347     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7348     ++it;
7349     // Copy the operand to ensure it's not invalidated when Inst grows.
7350     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7351   }
7352 }
7353 
7354 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7355   OptionalImmIndexMap OptionalIdx;
7356   cvtVOP3(Inst, Operands, OptionalIdx);
7357 }
7358 
7359 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
7360                                const OperandVector &Operands) {
7361   OptionalImmIndexMap OptIdx;
7362   const int Opc = Inst.getOpcode();
7363   const MCInstrDesc &Desc = MII.get(Opc);
7364 
7365   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7366 
7367   cvtVOP3(Inst, Operands, OptIdx);
7368 
7369   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7370     assert(!IsPacked);
7371     Inst.addOperand(Inst.getOperand(0));
7372   }
7373 
7374   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7375   // instruction, and then figure out where to actually put the modifiers
7376 
7377   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7378 
7379   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7380   if (OpSelHiIdx != -1) {
7381     int DefaultVal = IsPacked ? -1 : 0;
7382     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7383                           DefaultVal);
7384   }
7385 
7386   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7387   if (NegLoIdx != -1) {
7388     assert(IsPacked);
7389     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7390     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7391   }
7392 
7393   const int Ops[] = { AMDGPU::OpName::src0,
7394                       AMDGPU::OpName::src1,
7395                       AMDGPU::OpName::src2 };
7396   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7397                          AMDGPU::OpName::src1_modifiers,
7398                          AMDGPU::OpName::src2_modifiers };
7399 
7400   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7401 
7402   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7403   unsigned OpSelHi = 0;
7404   unsigned NegLo = 0;
7405   unsigned NegHi = 0;
7406 
7407   if (OpSelHiIdx != -1) {
7408     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7409   }
7410 
7411   if (NegLoIdx != -1) {
7412     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7413     NegLo = Inst.getOperand(NegLoIdx).getImm();
7414     NegHi = Inst.getOperand(NegHiIdx).getImm();
7415   }
7416 
7417   for (int J = 0; J < 3; ++J) {
7418     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7419     if (OpIdx == -1)
7420       break;
7421 
7422     uint32_t ModVal = 0;
7423 
7424     if ((OpSel & (1 << J)) != 0)
7425       ModVal |= SISrcMods::OP_SEL_0;
7426 
7427     if ((OpSelHi & (1 << J)) != 0)
7428       ModVal |= SISrcMods::OP_SEL_1;
7429 
7430     if ((NegLo & (1 << J)) != 0)
7431       ModVal |= SISrcMods::NEG;
7432 
7433     if ((NegHi & (1 << J)) != 0)
7434       ModVal |= SISrcMods::NEG_HI;
7435 
7436     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7437 
7438     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7439   }
7440 }
7441 
7442 //===----------------------------------------------------------------------===//
7443 // dpp
7444 //===----------------------------------------------------------------------===//
7445 
7446 bool AMDGPUOperand::isDPP8() const {
7447   return isImmTy(ImmTyDPP8);
7448 }
7449 
7450 bool AMDGPUOperand::isDPPCtrl() const {
7451   using namespace AMDGPU::DPP;
7452 
7453   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7454   if (result) {
7455     int64_t Imm = getImm();
7456     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7457            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7458            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7459            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7460            (Imm == DppCtrl::WAVE_SHL1) ||
7461            (Imm == DppCtrl::WAVE_ROL1) ||
7462            (Imm == DppCtrl::WAVE_SHR1) ||
7463            (Imm == DppCtrl::WAVE_ROR1) ||
7464            (Imm == DppCtrl::ROW_MIRROR) ||
7465            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7466            (Imm == DppCtrl::BCAST15) ||
7467            (Imm == DppCtrl::BCAST31) ||
7468            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7469            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7470   }
7471   return false;
7472 }
7473 
7474 //===----------------------------------------------------------------------===//
7475 // mAI
7476 //===----------------------------------------------------------------------===//
7477 
7478 bool AMDGPUOperand::isBLGP() const {
7479   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7480 }
7481 
7482 bool AMDGPUOperand::isCBSZ() const {
7483   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7484 }
7485 
7486 bool AMDGPUOperand::isABID() const {
7487   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7488 }
7489 
7490 bool AMDGPUOperand::isS16Imm() const {
7491   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7492 }
7493 
7494 bool AMDGPUOperand::isU16Imm() const {
7495   return isImm() && isUInt<16>(getImm());
7496 }
7497 
7498 //===----------------------------------------------------------------------===//
7499 // dim
7500 //===----------------------------------------------------------------------===//
7501 
7502 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7503   // We want to allow "dim:1D" etc.,
7504   // but the initial 1 is tokenized as an integer.
7505   std::string Token;
7506   if (isToken(AsmToken::Integer)) {
7507     SMLoc Loc = getToken().getEndLoc();
7508     Token = std::string(getTokenStr());
7509     lex();
7510     if (getLoc() != Loc)
7511       return false;
7512   }
7513 
7514   StringRef Suffix;
7515   if (!parseId(Suffix))
7516     return false;
7517   Token += Suffix;
7518 
7519   StringRef DimId = Token;
7520   if (DimId.startswith("SQ_RSRC_IMG_"))
7521     DimId = DimId.drop_front(12);
7522 
7523   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7524   if (!DimInfo)
7525     return false;
7526 
7527   Encoding = DimInfo->Encoding;
7528   return true;
7529 }
7530 
7531 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7532   if (!isGFX10Plus())
7533     return MatchOperand_NoMatch;
7534 
7535   SMLoc S = getLoc();
7536 
7537   if (!trySkipId("dim", AsmToken::Colon))
7538     return MatchOperand_NoMatch;
7539 
7540   unsigned Encoding;
7541   SMLoc Loc = getLoc();
7542   if (!parseDimId(Encoding)) {
7543     Error(Loc, "invalid dim value");
7544     return MatchOperand_ParseFail;
7545   }
7546 
7547   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7548                                               AMDGPUOperand::ImmTyDim));
7549   return MatchOperand_Success;
7550 }
7551 
7552 //===----------------------------------------------------------------------===//
7553 // dpp
7554 //===----------------------------------------------------------------------===//
7555 
7556 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7557   SMLoc S = getLoc();
7558 
7559   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7560     return MatchOperand_NoMatch;
7561 
7562   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7563 
7564   int64_t Sels[8];
7565 
7566   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7567     return MatchOperand_ParseFail;
7568 
7569   for (size_t i = 0; i < 8; ++i) {
7570     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7571       return MatchOperand_ParseFail;
7572 
7573     SMLoc Loc = getLoc();
7574     if (getParser().parseAbsoluteExpression(Sels[i]))
7575       return MatchOperand_ParseFail;
7576     if (0 > Sels[i] || 7 < Sels[i]) {
7577       Error(Loc, "expected a 3-bit value");
7578       return MatchOperand_ParseFail;
7579     }
7580   }
7581 
7582   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7583     return MatchOperand_ParseFail;
7584 
7585   unsigned DPP8 = 0;
7586   for (size_t i = 0; i < 8; ++i)
7587     DPP8 |= (Sels[i] << (i * 3));
7588 
7589   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7590   return MatchOperand_Success;
7591 }
7592 
7593 bool
7594 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7595                                     const OperandVector &Operands) {
7596   if (Ctrl == "row_newbcast")
7597       return isGFX90A();
7598 
7599   // DPP64 is supported for row_newbcast only.
7600   const MCRegisterInfo *MRI = getMRI();
7601   if (Operands.size() > 2 && Operands[1]->isReg() &&
7602       MRI->getSubReg(Operands[1]->getReg(), AMDGPU::sub1))
7603     return false;
7604 
7605   if (Ctrl == "row_share" ||
7606       Ctrl == "row_xmask")
7607     return isGFX10Plus();
7608 
7609   if (Ctrl == "wave_shl" ||
7610       Ctrl == "wave_shr" ||
7611       Ctrl == "wave_rol" ||
7612       Ctrl == "wave_ror" ||
7613       Ctrl == "row_bcast")
7614     return isVI() || isGFX9();
7615 
7616   return Ctrl == "row_mirror" ||
7617          Ctrl == "row_half_mirror" ||
7618          Ctrl == "quad_perm" ||
7619          Ctrl == "row_shl" ||
7620          Ctrl == "row_shr" ||
7621          Ctrl == "row_ror";
7622 }
7623 
7624 int64_t
7625 AMDGPUAsmParser::parseDPPCtrlPerm() {
7626   // quad_perm:[%d,%d,%d,%d]
7627 
7628   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7629     return -1;
7630 
7631   int64_t Val = 0;
7632   for (int i = 0; i < 4; ++i) {
7633     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7634       return -1;
7635 
7636     int64_t Temp;
7637     SMLoc Loc = getLoc();
7638     if (getParser().parseAbsoluteExpression(Temp))
7639       return -1;
7640     if (Temp < 0 || Temp > 3) {
7641       Error(Loc, "expected a 2-bit value");
7642       return -1;
7643     }
7644 
7645     Val += (Temp << i * 2);
7646   }
7647 
7648   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7649     return -1;
7650 
7651   return Val;
7652 }
7653 
7654 int64_t
7655 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7656   using namespace AMDGPU::DPP;
7657 
7658   // sel:%d
7659 
7660   int64_t Val;
7661   SMLoc Loc = getLoc();
7662 
7663   if (getParser().parseAbsoluteExpression(Val))
7664     return -1;
7665 
7666   struct DppCtrlCheck {
7667     int64_t Ctrl;
7668     int Lo;
7669     int Hi;
7670   };
7671 
7672   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7673     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7674     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7675     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7676     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7677     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7678     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7679     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7680     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7681     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7682     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7683     .Default({-1, 0, 0});
7684 
7685   bool Valid;
7686   if (Check.Ctrl == -1) {
7687     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7688     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7689   } else {
7690     Valid = Check.Lo <= Val && Val <= Check.Hi;
7691     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7692   }
7693 
7694   if (!Valid) {
7695     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7696     return -1;
7697   }
7698 
7699   return Val;
7700 }
7701 
7702 OperandMatchResultTy
7703 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7704   using namespace AMDGPU::DPP;
7705 
7706   if (!isToken(AsmToken::Identifier) ||
7707       !isSupportedDPPCtrl(getTokenStr(), Operands))
7708     return MatchOperand_NoMatch;
7709 
7710   SMLoc S = getLoc();
7711   int64_t Val = -1;
7712   StringRef Ctrl;
7713 
7714   parseId(Ctrl);
7715 
7716   if (Ctrl == "row_mirror") {
7717     Val = DppCtrl::ROW_MIRROR;
7718   } else if (Ctrl == "row_half_mirror") {
7719     Val = DppCtrl::ROW_HALF_MIRROR;
7720   } else {
7721     if (skipToken(AsmToken::Colon, "expected a colon")) {
7722       if (Ctrl == "quad_perm") {
7723         Val = parseDPPCtrlPerm();
7724       } else {
7725         Val = parseDPPCtrlSel(Ctrl);
7726       }
7727     }
7728   }
7729 
7730   if (Val == -1)
7731     return MatchOperand_ParseFail;
7732 
7733   Operands.push_back(
7734     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7735   return MatchOperand_Success;
7736 }
7737 
7738 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7739   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7740 }
7741 
7742 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7743   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7744 }
7745 
7746 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7747   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7748 }
7749 
7750 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7751   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7752 }
7753 
7754 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7755   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7756 }
7757 
7758 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7759   OptionalImmIndexMap OptionalIdx;
7760 
7761   unsigned I = 1;
7762   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7763   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7764     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7765   }
7766 
7767   int Fi = 0;
7768   for (unsigned E = Operands.size(); I != E; ++I) {
7769     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7770                                             MCOI::TIED_TO);
7771     if (TiedTo != -1) {
7772       assert((unsigned)TiedTo < Inst.getNumOperands());
7773       // handle tied old or src2 for MAC instructions
7774       Inst.addOperand(Inst.getOperand(TiedTo));
7775     }
7776     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7777     // Add the register arguments
7778     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7779       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7780       // Skip it.
7781       continue;
7782     }
7783 
7784     if (IsDPP8) {
7785       if (Op.isDPP8()) {
7786         Op.addImmOperands(Inst, 1);
7787       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7788         Op.addRegWithFPInputModsOperands(Inst, 2);
7789       } else if (Op.isFI()) {
7790         Fi = Op.getImm();
7791       } else if (Op.isReg()) {
7792         Op.addRegOperands(Inst, 1);
7793       } else {
7794         llvm_unreachable("Invalid operand type");
7795       }
7796     } else {
7797       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7798         Op.addRegWithFPInputModsOperands(Inst, 2);
7799       } else if (Op.isDPPCtrl()) {
7800         Op.addImmOperands(Inst, 1);
7801       } else if (Op.isImm()) {
7802         // Handle optional arguments
7803         OptionalIdx[Op.getImmTy()] = I;
7804       } else {
7805         llvm_unreachable("Invalid operand type");
7806       }
7807     }
7808   }
7809 
7810   if (IsDPP8) {
7811     using namespace llvm::AMDGPU::DPP;
7812     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7813   } else {
7814     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7815     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7816     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7817     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7818       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7819     }
7820   }
7821 }
7822 
7823 //===----------------------------------------------------------------------===//
7824 // sdwa
7825 //===----------------------------------------------------------------------===//
7826 
7827 OperandMatchResultTy
7828 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7829                               AMDGPUOperand::ImmTy Type) {
7830   using namespace llvm::AMDGPU::SDWA;
7831 
7832   SMLoc S = getLoc();
7833   StringRef Value;
7834   OperandMatchResultTy res;
7835 
7836   SMLoc StringLoc;
7837   res = parseStringWithPrefix(Prefix, Value, StringLoc);
7838   if (res != MatchOperand_Success) {
7839     return res;
7840   }
7841 
7842   int64_t Int;
7843   Int = StringSwitch<int64_t>(Value)
7844         .Case("BYTE_0", SdwaSel::BYTE_0)
7845         .Case("BYTE_1", SdwaSel::BYTE_1)
7846         .Case("BYTE_2", SdwaSel::BYTE_2)
7847         .Case("BYTE_3", SdwaSel::BYTE_3)
7848         .Case("WORD_0", SdwaSel::WORD_0)
7849         .Case("WORD_1", SdwaSel::WORD_1)
7850         .Case("DWORD", SdwaSel::DWORD)
7851         .Default(0xffffffff);
7852 
7853   if (Int == 0xffffffff) {
7854     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
7855     return MatchOperand_ParseFail;
7856   }
7857 
7858   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7859   return MatchOperand_Success;
7860 }
7861 
7862 OperandMatchResultTy
7863 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7864   using namespace llvm::AMDGPU::SDWA;
7865 
7866   SMLoc S = getLoc();
7867   StringRef Value;
7868   OperandMatchResultTy res;
7869 
7870   SMLoc StringLoc;
7871   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
7872   if (res != MatchOperand_Success) {
7873     return res;
7874   }
7875 
7876   int64_t Int;
7877   Int = StringSwitch<int64_t>(Value)
7878         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
7879         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
7880         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
7881         .Default(0xffffffff);
7882 
7883   if (Int == 0xffffffff) {
7884     Error(StringLoc, "invalid dst_unused value");
7885     return MatchOperand_ParseFail;
7886   }
7887 
7888   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7889   return MatchOperand_Success;
7890 }
7891 
7892 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7893   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7894 }
7895 
7896 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7897   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7898 }
7899 
7900 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7901   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7902 }
7903 
7904 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7905   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7906 }
7907 
7908 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7909   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7910 }
7911 
7912 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7913                               uint64_t BasicInstType,
7914                               bool SkipDstVcc,
7915                               bool SkipSrcVcc) {
7916   using namespace llvm::AMDGPU::SDWA;
7917 
7918   OptionalImmIndexMap OptionalIdx;
7919   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7920   bool SkippedVcc = false;
7921 
7922   unsigned I = 1;
7923   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7924   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7925     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7926   }
7927 
7928   for (unsigned E = Operands.size(); I != E; ++I) {
7929     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7930     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7931         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7932       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7933       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7934       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7935       // Skip VCC only if we didn't skip it on previous iteration.
7936       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7937       if (BasicInstType == SIInstrFlags::VOP2 &&
7938           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7939            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7940         SkippedVcc = true;
7941         continue;
7942       } else if (BasicInstType == SIInstrFlags::VOPC &&
7943                  Inst.getNumOperands() == 0) {
7944         SkippedVcc = true;
7945         continue;
7946       }
7947     }
7948     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7949       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7950     } else if (Op.isImm()) {
7951       // Handle optional arguments
7952       OptionalIdx[Op.getImmTy()] = I;
7953     } else {
7954       llvm_unreachable("Invalid operand type");
7955     }
7956     SkippedVcc = false;
7957   }
7958 
7959   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7960       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7961       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7962     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7963     switch (BasicInstType) {
7964     case SIInstrFlags::VOP1:
7965       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7966       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7967         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7968       }
7969       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7970       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7971       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7972       break;
7973 
7974     case SIInstrFlags::VOP2:
7975       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7976       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7977         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7978       }
7979       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7980       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7981       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7982       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7983       break;
7984 
7985     case SIInstrFlags::VOPC:
7986       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7987         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7988       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7989       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7990       break;
7991 
7992     default:
7993       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7994     }
7995   }
7996 
7997   // special case v_mac_{f16, f32}:
7998   // it has src2 register operand that is tied to dst operand
7999   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8000       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8001     auto it = Inst.begin();
8002     std::advance(
8003       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8004     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8005   }
8006 }
8007 
8008 //===----------------------------------------------------------------------===//
8009 // mAI
8010 //===----------------------------------------------------------------------===//
8011 
8012 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8013   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8014 }
8015 
8016 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8017   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8018 }
8019 
8020 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8021   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8022 }
8023 
8024 /// Force static initialization.
8025 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8026   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8027   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8028 }
8029 
8030 #define GET_REGISTER_MATCHER
8031 #define GET_MATCHER_IMPLEMENTATION
8032 #define GET_MNEMONIC_SPELL_CHECKER
8033 #define GET_MNEMONIC_CHECKER
8034 #include "AMDGPUGenAsmMatcher.inc"
8035 
8036 // This fuction should be defined after auto-generated include so that we have
8037 // MatchClassKind enum defined
8038 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8039                                                      unsigned Kind) {
8040   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8041   // But MatchInstructionImpl() expects to meet token and fails to validate
8042   // operand. This method checks if we are given immediate operand but expect to
8043   // get corresponding token.
8044   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8045   switch (Kind) {
8046   case MCK_addr64:
8047     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8048   case MCK_gds:
8049     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8050   case MCK_lds:
8051     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8052   case MCK_glc:
8053     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
8054   case MCK_idxen:
8055     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8056   case MCK_offen:
8057     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8058   case MCK_SSrcB32:
8059     // When operands have expression values, they will return true for isToken,
8060     // because it is not possible to distinguish between a token and an
8061     // expression at parse time. MatchInstructionImpl() will always try to
8062     // match an operand as a token, when isToken returns true, and when the
8063     // name of the expression is not a valid token, the match will fail,
8064     // so we need to handle it here.
8065     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8066   case MCK_SSrcF32:
8067     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8068   case MCK_SoppBrTarget:
8069     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8070   case MCK_VReg32OrOff:
8071     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8072   case MCK_InterpSlot:
8073     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8074   case MCK_Attr:
8075     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8076   case MCK_AttrChan:
8077     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8078   case MCK_ImmSMEMOffset:
8079     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8080   case MCK_SReg_64:
8081   case MCK_SReg_64_XEXEC:
8082     // Null is defined as a 32-bit register but
8083     // it should also be enabled with 64-bit operands.
8084     // The following code enables it for SReg_64 operands
8085     // used as source and destination. Remaining source
8086     // operands are handled in isInlinableImm.
8087     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8088   default:
8089     return Match_InvalidOperand;
8090   }
8091 }
8092 
8093 //===----------------------------------------------------------------------===//
8094 // endpgm
8095 //===----------------------------------------------------------------------===//
8096 
8097 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8098   SMLoc S = getLoc();
8099   int64_t Imm = 0;
8100 
8101   if (!parseExpr(Imm)) {
8102     // The operand is optional, if not present default to 0
8103     Imm = 0;
8104   }
8105 
8106   if (!isUInt<16>(Imm)) {
8107     Error(S, "expected a 16-bit value");
8108     return MatchOperand_ParseFail;
8109   }
8110 
8111   Operands.push_back(
8112       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8113   return MatchOperand_Success;
8114 }
8115 
8116 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8117