1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/Support/AMDGPUMetadata.h"
32 #include "llvm/Support/AMDHSAKernelDescriptor.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/MachineValueType.h"
35 #include "llvm/Support/TargetParser.h"
36 #include "llvm/Support/TargetRegistry.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53   enum KindTy {
54     Token,
55     Immediate,
56     Register,
57     Expression
58   } Kind;
59 
60   SMLoc StartLoc, EndLoc;
61   const AMDGPUAsmParser *AsmParser;
62 
63 public:
64   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66 
67   using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69   struct Modifiers {
70     bool Abs = false;
71     bool Neg = false;
72     bool Sext = false;
73 
74     bool hasFPModifiers() const { return Abs || Neg; }
75     bool hasIntModifiers() const { return Sext; }
76     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78     int64_t getFPModifiersOperand() const {
79       int64_t Operand = 0;
80       Operand |= Abs ? SISrcMods::ABS : 0u;
81       Operand |= Neg ? SISrcMods::NEG : 0u;
82       return Operand;
83     }
84 
85     int64_t getIntModifiersOperand() const {
86       int64_t Operand = 0;
87       Operand |= Sext ? SISrcMods::SEXT : 0u;
88       return Operand;
89     }
90 
91     int64_t getModifiersOperand() const {
92       assert(!(hasFPModifiers() && hasIntModifiers())
93            && "fp and int modifiers should not be used simultaneously");
94       if (hasFPModifiers()) {
95         return getFPModifiersOperand();
96       } else if (hasIntModifiers()) {
97         return getIntModifiersOperand();
98       } else {
99         return 0;
100       }
101     }
102 
103     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104   };
105 
106   enum ImmTy {
107     ImmTyNone,
108     ImmTyGDS,
109     ImmTyLDS,
110     ImmTyOffen,
111     ImmTyIdxen,
112     ImmTyAddr64,
113     ImmTyOffset,
114     ImmTyInstOffset,
115     ImmTyOffset0,
116     ImmTyOffset1,
117     ImmTyDLC,
118     ImmTySCCB,
119     ImmTyGLC,
120     ImmTySLC,
121     ImmTySWZ,
122     ImmTyTFE,
123     ImmTyD16,
124     ImmTyClampSI,
125     ImmTyOModSI,
126     ImmTyDPP8,
127     ImmTyDppCtrl,
128     ImmTyDppRowMask,
129     ImmTyDppBankMask,
130     ImmTyDppBoundCtrl,
131     ImmTyDppFi,
132     ImmTySdwaDstSel,
133     ImmTySdwaSrc0Sel,
134     ImmTySdwaSrc1Sel,
135     ImmTySdwaDstUnused,
136     ImmTyDMask,
137     ImmTyDim,
138     ImmTyUNorm,
139     ImmTyDA,
140     ImmTyR128A16,
141     ImmTyA16,
142     ImmTyLWE,
143     ImmTyExpTgt,
144     ImmTyExpCompr,
145     ImmTyExpVM,
146     ImmTyFORMAT,
147     ImmTyHwreg,
148     ImmTyOff,
149     ImmTySendMsg,
150     ImmTyInterpSlot,
151     ImmTyInterpAttr,
152     ImmTyAttrChan,
153     ImmTyOpSel,
154     ImmTyOpSelHi,
155     ImmTyNegLo,
156     ImmTyNegHi,
157     ImmTySwizzle,
158     ImmTyGprIdxMode,
159     ImmTyHigh,
160     ImmTyBLGP,
161     ImmTyCBSZ,
162     ImmTyABID,
163     ImmTyEndpgm,
164   };
165 
166   enum ImmKindTy {
167     ImmKindTyNone,
168     ImmKindTyLiteral,
169     ImmKindTyConst,
170   };
171 
172 private:
173   struct TokOp {
174     const char *Data;
175     unsigned Length;
176   };
177 
178   struct ImmOp {
179     int64_t Val;
180     ImmTy Type;
181     bool IsFPImm;
182     mutable ImmKindTy Kind;
183     Modifiers Mods;
184   };
185 
186   struct RegOp {
187     unsigned RegNo;
188     Modifiers Mods;
189   };
190 
191   union {
192     TokOp Tok;
193     ImmOp Imm;
194     RegOp Reg;
195     const MCExpr *Expr;
196   };
197 
198 public:
199   bool isToken() const override {
200     if (Kind == Token)
201       return true;
202 
203     // When parsing operands, we can't always tell if something was meant to be
204     // a token, like 'gds', or an expression that references a global variable.
205     // In this case, we assume the string is an expression, and if we need to
206     // interpret is a token, then we treat the symbol name as the token.
207     return isSymbolRefExpr();
208   }
209 
210   bool isSymbolRefExpr() const {
211     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
212   }
213 
214   bool isImm() const override {
215     return Kind == Immediate;
216   }
217 
218   void setImmKindNone() const {
219     assert(isImm());
220     Imm.Kind = ImmKindTyNone;
221   }
222 
223   void setImmKindLiteral() const {
224     assert(isImm());
225     Imm.Kind = ImmKindTyLiteral;
226   }
227 
228   void setImmKindConst() const {
229     assert(isImm());
230     Imm.Kind = ImmKindTyConst;
231   }
232 
233   bool IsImmKindLiteral() const {
234     return isImm() && Imm.Kind == ImmKindTyLiteral;
235   }
236 
237   bool isImmKindConst() const {
238     return isImm() && Imm.Kind == ImmKindTyConst;
239   }
240 
241   bool isInlinableImm(MVT type) const;
242   bool isLiteralImm(MVT type) const;
243 
244   bool isRegKind() const {
245     return Kind == Register;
246   }
247 
248   bool isReg() const override {
249     return isRegKind() && !hasModifiers();
250   }
251 
252   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
253     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
254   }
255 
256   bool isRegOrImmWithInt16InputMods() const {
257     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
258   }
259 
260   bool isRegOrImmWithInt32InputMods() const {
261     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
262   }
263 
264   bool isRegOrImmWithInt64InputMods() const {
265     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
266   }
267 
268   bool isRegOrImmWithFP16InputMods() const {
269     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
270   }
271 
272   bool isRegOrImmWithFP32InputMods() const {
273     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
274   }
275 
276   bool isRegOrImmWithFP64InputMods() const {
277     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
278   }
279 
280   bool isVReg() const {
281     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
282            isRegClass(AMDGPU::VReg_64RegClassID) ||
283            isRegClass(AMDGPU::VReg_96RegClassID) ||
284            isRegClass(AMDGPU::VReg_128RegClassID) ||
285            isRegClass(AMDGPU::VReg_160RegClassID) ||
286            isRegClass(AMDGPU::VReg_192RegClassID) ||
287            isRegClass(AMDGPU::VReg_256RegClassID) ||
288            isRegClass(AMDGPU::VReg_512RegClassID) ||
289            isRegClass(AMDGPU::VReg_1024RegClassID);
290   }
291 
292   bool isVReg32() const {
293     return isRegClass(AMDGPU::VGPR_32RegClassID);
294   }
295 
296   bool isVReg32OrOff() const {
297     return isOff() || isVReg32();
298   }
299 
300   bool isNull() const {
301     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
302   }
303 
304   bool isVRegWithInputMods() const;
305 
306   bool isSDWAOperand(MVT type) const;
307   bool isSDWAFP16Operand() const;
308   bool isSDWAFP32Operand() const;
309   bool isSDWAInt16Operand() const;
310   bool isSDWAInt32Operand() const;
311 
312   bool isImmTy(ImmTy ImmT) const {
313     return isImm() && Imm.Type == ImmT;
314   }
315 
316   bool isImmModifier() const {
317     return isImm() && Imm.Type != ImmTyNone;
318   }
319 
320   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
321   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
322   bool isDMask() const { return isImmTy(ImmTyDMask); }
323   bool isDim() const { return isImmTy(ImmTyDim); }
324   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
325   bool isDA() const { return isImmTy(ImmTyDA); }
326   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
327   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
328   bool isLWE() const { return isImmTy(ImmTyLWE); }
329   bool isOff() const { return isImmTy(ImmTyOff); }
330   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
331   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
332   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
333   bool isOffen() const { return isImmTy(ImmTyOffen); }
334   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
335   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
336   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
337   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
338   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
339 
340   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
341   bool isGDS() const { return isImmTy(ImmTyGDS); }
342   bool isLDS() const { return isImmTy(ImmTyLDS); }
343   bool isDLC() const { return isImmTy(ImmTyDLC); }
344   bool isSCCB() const { return isImmTy(ImmTySCCB); }
345   bool isGLC() const { return isImmTy(ImmTyGLC); }
346   // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced
347   // value of the GLC operand.
348   bool isGLC_1() const { return isImmTy(ImmTyGLC); }
349   bool isSLC() const { return isImmTy(ImmTySLC); }
350   bool isSWZ() const { return isImmTy(ImmTySWZ); }
351   bool isTFE() const { return isImmTy(ImmTyTFE); }
352   bool isD16() const { return isImmTy(ImmTyD16); }
353   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
354   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
355   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
356   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
357   bool isFI() const { return isImmTy(ImmTyDppFi); }
358   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
359   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
360   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
361   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
362   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
363   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
364   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
365   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
366   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
367   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
368   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
369   bool isHigh() const { return isImmTy(ImmTyHigh); }
370 
371   bool isMod() const {
372     return isClampSI() || isOModSI();
373   }
374 
375   bool isRegOrImm() const {
376     return isReg() || isImm();
377   }
378 
379   bool isRegClass(unsigned RCID) const;
380 
381   bool isInlineValue() const;
382 
383   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
384     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
385   }
386 
387   bool isSCSrcB16() const {
388     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
389   }
390 
391   bool isSCSrcV2B16() const {
392     return isSCSrcB16();
393   }
394 
395   bool isSCSrcB32() const {
396     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
397   }
398 
399   bool isSCSrcB64() const {
400     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
401   }
402 
403   bool isBoolReg() const;
404 
405   bool isSCSrcF16() const {
406     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
407   }
408 
409   bool isSCSrcV2F16() const {
410     return isSCSrcF16();
411   }
412 
413   bool isSCSrcF32() const {
414     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
415   }
416 
417   bool isSCSrcF64() const {
418     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
419   }
420 
421   bool isSSrcB32() const {
422     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
423   }
424 
425   bool isSSrcB16() const {
426     return isSCSrcB16() || isLiteralImm(MVT::i16);
427   }
428 
429   bool isSSrcV2B16() const {
430     llvm_unreachable("cannot happen");
431     return isSSrcB16();
432   }
433 
434   bool isSSrcB64() const {
435     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
436     // See isVSrc64().
437     return isSCSrcB64() || isLiteralImm(MVT::i64);
438   }
439 
440   bool isSSrcF32() const {
441     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
442   }
443 
444   bool isSSrcF64() const {
445     return isSCSrcB64() || isLiteralImm(MVT::f64);
446   }
447 
448   bool isSSrcF16() const {
449     return isSCSrcB16() || isLiteralImm(MVT::f16);
450   }
451 
452   bool isSSrcV2F16() const {
453     llvm_unreachable("cannot happen");
454     return isSSrcF16();
455   }
456 
457   bool isSSrcV2FP32() const {
458     llvm_unreachable("cannot happen");
459     return isSSrcF32();
460   }
461 
462   bool isSCSrcV2FP32() const {
463     llvm_unreachable("cannot happen");
464     return isSCSrcF32();
465   }
466 
467   bool isSSrcV2INT32() const {
468     llvm_unreachable("cannot happen");
469     return isSSrcB32();
470   }
471 
472   bool isSCSrcV2INT32() const {
473     llvm_unreachable("cannot happen");
474     return isSCSrcB32();
475   }
476 
477   bool isSSrcOrLdsB32() const {
478     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
479            isLiteralImm(MVT::i32) || isExpr();
480   }
481 
482   bool isVCSrcB32() const {
483     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
484   }
485 
486   bool isVCSrcB64() const {
487     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
488   }
489 
490   bool isVCSrcB16() const {
491     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
492   }
493 
494   bool isVCSrcV2B16() const {
495     return isVCSrcB16();
496   }
497 
498   bool isVCSrcF32() const {
499     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
500   }
501 
502   bool isVCSrcF64() const {
503     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
504   }
505 
506   bool isVCSrcF16() const {
507     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
508   }
509 
510   bool isVCSrcV2F16() const {
511     return isVCSrcF16();
512   }
513 
514   bool isVSrcB32() const {
515     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
516   }
517 
518   bool isVSrcB64() const {
519     return isVCSrcF64() || isLiteralImm(MVT::i64);
520   }
521 
522   bool isVSrcB16() const {
523     return isVCSrcB16() || isLiteralImm(MVT::i16);
524   }
525 
526   bool isVSrcV2B16() const {
527     return isVSrcB16() || isLiteralImm(MVT::v2i16);
528   }
529 
530   bool isVCSrcV2FP32() const {
531     return isVCSrcF64();
532   }
533 
534   bool isVSrcV2FP32() const {
535     return isVSrcF64() || isLiteralImm(MVT::v2f32);
536   }
537 
538   bool isVCSrcV2INT32() const {
539     return isVCSrcB64();
540   }
541 
542   bool isVSrcV2INT32() const {
543     return isVSrcB64() || isLiteralImm(MVT::v2i32);
544   }
545 
546   bool isVSrcF32() const {
547     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
548   }
549 
550   bool isVSrcF64() const {
551     return isVCSrcF64() || isLiteralImm(MVT::f64);
552   }
553 
554   bool isVSrcF16() const {
555     return isVCSrcF16() || isLiteralImm(MVT::f16);
556   }
557 
558   bool isVSrcV2F16() const {
559     return isVSrcF16() || isLiteralImm(MVT::v2f16);
560   }
561 
562   bool isVISrcB32() const {
563     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
564   }
565 
566   bool isVISrcB16() const {
567     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
568   }
569 
570   bool isVISrcV2B16() const {
571     return isVISrcB16();
572   }
573 
574   bool isVISrcF32() const {
575     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
576   }
577 
578   bool isVISrcF16() const {
579     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
580   }
581 
582   bool isVISrcV2F16() const {
583     return isVISrcF16() || isVISrcB32();
584   }
585 
586   bool isVISrc_64B64() const {
587     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
588   }
589 
590   bool isVISrc_64F64() const {
591     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
592   }
593 
594   bool isVISrc_64V2FP32() const {
595     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
596   }
597 
598   bool isVISrc_64V2INT32() const {
599     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
600   }
601 
602   bool isVISrc_256B64() const {
603     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
604   }
605 
606   bool isVISrc_256F64() const {
607     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
608   }
609 
610   bool isVISrc_128B16() const {
611     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
612   }
613 
614   bool isVISrc_128V2B16() const {
615     return isVISrc_128B16();
616   }
617 
618   bool isVISrc_128B32() const {
619     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
620   }
621 
622   bool isVISrc_128F32() const {
623     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
624   }
625 
626   bool isVISrc_256V2FP32() const {
627     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
628   }
629 
630   bool isVISrc_256V2INT32() const {
631     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
632   }
633 
634   bool isVISrc_512B32() const {
635     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
636   }
637 
638   bool isVISrc_512B16() const {
639     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
640   }
641 
642   bool isVISrc_512V2B16() const {
643     return isVISrc_512B16();
644   }
645 
646   bool isVISrc_512F32() const {
647     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
648   }
649 
650   bool isVISrc_512F16() const {
651     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
652   }
653 
654   bool isVISrc_512V2F16() const {
655     return isVISrc_512F16() || isVISrc_512B32();
656   }
657 
658   bool isVISrc_1024B32() const {
659     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
660   }
661 
662   bool isVISrc_1024B16() const {
663     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
664   }
665 
666   bool isVISrc_1024V2B16() const {
667     return isVISrc_1024B16();
668   }
669 
670   bool isVISrc_1024F32() const {
671     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
672   }
673 
674   bool isVISrc_1024F16() const {
675     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
676   }
677 
678   bool isVISrc_1024V2F16() const {
679     return isVISrc_1024F16() || isVISrc_1024B32();
680   }
681 
682   bool isAISrcB32() const {
683     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
684   }
685 
686   bool isAISrcB16() const {
687     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
688   }
689 
690   bool isAISrcV2B16() const {
691     return isAISrcB16();
692   }
693 
694   bool isAISrcF32() const {
695     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
696   }
697 
698   bool isAISrcF16() const {
699     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
700   }
701 
702   bool isAISrcV2F16() const {
703     return isAISrcF16() || isAISrcB32();
704   }
705 
706   bool isAISrc_64B64() const {
707     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
708   }
709 
710   bool isAISrc_64F64() const {
711     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
712   }
713 
714   bool isAISrc_128B32() const {
715     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
716   }
717 
718   bool isAISrc_128B16() const {
719     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
720   }
721 
722   bool isAISrc_128V2B16() const {
723     return isAISrc_128B16();
724   }
725 
726   bool isAISrc_128F32() const {
727     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
728   }
729 
730   bool isAISrc_128F16() const {
731     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
732   }
733 
734   bool isAISrc_128V2F16() const {
735     return isAISrc_128F16() || isAISrc_128B32();
736   }
737 
738   bool isVISrc_128F16() const {
739     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
740   }
741 
742   bool isVISrc_128V2F16() const {
743     return isVISrc_128F16() || isVISrc_128B32();
744   }
745 
746   bool isAISrc_256B64() const {
747     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
748   }
749 
750   bool isAISrc_256F64() const {
751     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
752   }
753 
754   bool isAISrc_512B32() const {
755     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
756   }
757 
758   bool isAISrc_512B16() const {
759     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
760   }
761 
762   bool isAISrc_512V2B16() const {
763     return isAISrc_512B16();
764   }
765 
766   bool isAISrc_512F32() const {
767     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
768   }
769 
770   bool isAISrc_512F16() const {
771     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
772   }
773 
774   bool isAISrc_512V2F16() const {
775     return isAISrc_512F16() || isAISrc_512B32();
776   }
777 
778   bool isAISrc_1024B32() const {
779     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
780   }
781 
782   bool isAISrc_1024B16() const {
783     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
784   }
785 
786   bool isAISrc_1024V2B16() const {
787     return isAISrc_1024B16();
788   }
789 
790   bool isAISrc_1024F32() const {
791     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
792   }
793 
794   bool isAISrc_1024F16() const {
795     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
796   }
797 
798   bool isAISrc_1024V2F16() const {
799     return isAISrc_1024F16() || isAISrc_1024B32();
800   }
801 
802   bool isKImmFP32() const {
803     return isLiteralImm(MVT::f32);
804   }
805 
806   bool isKImmFP16() const {
807     return isLiteralImm(MVT::f16);
808   }
809 
810   bool isMem() const override {
811     return false;
812   }
813 
814   bool isExpr() const {
815     return Kind == Expression;
816   }
817 
818   bool isSoppBrTarget() const {
819     return isExpr() || isImm();
820   }
821 
822   bool isSWaitCnt() const;
823   bool isHwreg() const;
824   bool isSendMsg() const;
825   bool isSwizzle() const;
826   bool isSMRDOffset8() const;
827   bool isSMEMOffset() const;
828   bool isSMRDLiteralOffset() const;
829   bool isDPP8() const;
830   bool isDPPCtrl() const;
831   bool isBLGP() const;
832   bool isCBSZ() const;
833   bool isABID() const;
834   bool isGPRIdxMode() const;
835   bool isS16Imm() const;
836   bool isU16Imm() const;
837   bool isEndpgm() const;
838 
839   StringRef getExpressionAsToken() const {
840     assert(isExpr());
841     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
842     return S->getSymbol().getName();
843   }
844 
845   StringRef getToken() const {
846     assert(isToken());
847 
848     if (Kind == Expression)
849       return getExpressionAsToken();
850 
851     return StringRef(Tok.Data, Tok.Length);
852   }
853 
854   int64_t getImm() const {
855     assert(isImm());
856     return Imm.Val;
857   }
858 
859   void setImm(int64_t Val) {
860     assert(isImm());
861     Imm.Val = Val;
862   }
863 
864   ImmTy getImmTy() const {
865     assert(isImm());
866     return Imm.Type;
867   }
868 
869   unsigned getReg() const override {
870     assert(isRegKind());
871     return Reg.RegNo;
872   }
873 
874   SMLoc getStartLoc() const override {
875     return StartLoc;
876   }
877 
878   SMLoc getEndLoc() const override {
879     return EndLoc;
880   }
881 
882   SMRange getLocRange() const {
883     return SMRange(StartLoc, EndLoc);
884   }
885 
886   Modifiers getModifiers() const {
887     assert(isRegKind() || isImmTy(ImmTyNone));
888     return isRegKind() ? Reg.Mods : Imm.Mods;
889   }
890 
891   void setModifiers(Modifiers Mods) {
892     assert(isRegKind() || isImmTy(ImmTyNone));
893     if (isRegKind())
894       Reg.Mods = Mods;
895     else
896       Imm.Mods = Mods;
897   }
898 
899   bool hasModifiers() const {
900     return getModifiers().hasModifiers();
901   }
902 
903   bool hasFPModifiers() const {
904     return getModifiers().hasFPModifiers();
905   }
906 
907   bool hasIntModifiers() const {
908     return getModifiers().hasIntModifiers();
909   }
910 
911   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
912 
913   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
914 
915   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
916 
917   template <unsigned Bitwidth>
918   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
919 
920   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
921     addKImmFPOperands<16>(Inst, N);
922   }
923 
924   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
925     addKImmFPOperands<32>(Inst, N);
926   }
927 
928   void addRegOperands(MCInst &Inst, unsigned N) const;
929 
930   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
931     addRegOperands(Inst, N);
932   }
933 
934   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
935     if (isRegKind())
936       addRegOperands(Inst, N);
937     else if (isExpr())
938       Inst.addOperand(MCOperand::createExpr(Expr));
939     else
940       addImmOperands(Inst, N);
941   }
942 
943   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
944     Modifiers Mods = getModifiers();
945     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
946     if (isRegKind()) {
947       addRegOperands(Inst, N);
948     } else {
949       addImmOperands(Inst, N, false);
950     }
951   }
952 
953   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
954     assert(!hasIntModifiers());
955     addRegOrImmWithInputModsOperands(Inst, N);
956   }
957 
958   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
959     assert(!hasFPModifiers());
960     addRegOrImmWithInputModsOperands(Inst, N);
961   }
962 
963   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
964     Modifiers Mods = getModifiers();
965     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
966     assert(isRegKind());
967     addRegOperands(Inst, N);
968   }
969 
970   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
971     assert(!hasIntModifiers());
972     addRegWithInputModsOperands(Inst, N);
973   }
974 
975   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
976     assert(!hasFPModifiers());
977     addRegWithInputModsOperands(Inst, N);
978   }
979 
980   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
981     if (isImm())
982       addImmOperands(Inst, N);
983     else {
984       assert(isExpr());
985       Inst.addOperand(MCOperand::createExpr(Expr));
986     }
987   }
988 
989   static void printImmTy(raw_ostream& OS, ImmTy Type) {
990     switch (Type) {
991     case ImmTyNone: OS << "None"; break;
992     case ImmTyGDS: OS << "GDS"; break;
993     case ImmTyLDS: OS << "LDS"; break;
994     case ImmTyOffen: OS << "Offen"; break;
995     case ImmTyIdxen: OS << "Idxen"; break;
996     case ImmTyAddr64: OS << "Addr64"; break;
997     case ImmTyOffset: OS << "Offset"; break;
998     case ImmTyInstOffset: OS << "InstOffset"; break;
999     case ImmTyOffset0: OS << "Offset0"; break;
1000     case ImmTyOffset1: OS << "Offset1"; break;
1001     case ImmTyDLC: OS << "DLC"; break;
1002     case ImmTySCCB: OS << "SCCB"; break;
1003     case ImmTyGLC: OS << "GLC"; break;
1004     case ImmTySLC: OS << "SLC"; break;
1005     case ImmTySWZ: OS << "SWZ"; break;
1006     case ImmTyTFE: OS << "TFE"; break;
1007     case ImmTyD16: OS << "D16"; break;
1008     case ImmTyFORMAT: OS << "FORMAT"; break;
1009     case ImmTyClampSI: OS << "ClampSI"; break;
1010     case ImmTyOModSI: OS << "OModSI"; break;
1011     case ImmTyDPP8: OS << "DPP8"; break;
1012     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1013     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1014     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1015     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1016     case ImmTyDppFi: OS << "FI"; break;
1017     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1018     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1019     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1020     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1021     case ImmTyDMask: OS << "DMask"; break;
1022     case ImmTyDim: OS << "Dim"; break;
1023     case ImmTyUNorm: OS << "UNorm"; break;
1024     case ImmTyDA: OS << "DA"; break;
1025     case ImmTyR128A16: OS << "R128A16"; break;
1026     case ImmTyA16: OS << "A16"; break;
1027     case ImmTyLWE: OS << "LWE"; break;
1028     case ImmTyOff: OS << "Off"; break;
1029     case ImmTyExpTgt: OS << "ExpTgt"; break;
1030     case ImmTyExpCompr: OS << "ExpCompr"; break;
1031     case ImmTyExpVM: OS << "ExpVM"; break;
1032     case ImmTyHwreg: OS << "Hwreg"; break;
1033     case ImmTySendMsg: OS << "SendMsg"; break;
1034     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1035     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1036     case ImmTyAttrChan: OS << "AttrChan"; break;
1037     case ImmTyOpSel: OS << "OpSel"; break;
1038     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1039     case ImmTyNegLo: OS << "NegLo"; break;
1040     case ImmTyNegHi: OS << "NegHi"; break;
1041     case ImmTySwizzle: OS << "Swizzle"; break;
1042     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1043     case ImmTyHigh: OS << "High"; break;
1044     case ImmTyBLGP: OS << "BLGP"; break;
1045     case ImmTyCBSZ: OS << "CBSZ"; break;
1046     case ImmTyABID: OS << "ABID"; break;
1047     case ImmTyEndpgm: OS << "Endpgm"; break;
1048     }
1049   }
1050 
1051   void print(raw_ostream &OS) const override {
1052     switch (Kind) {
1053     case Register:
1054       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1055       break;
1056     case Immediate:
1057       OS << '<' << getImm();
1058       if (getImmTy() != ImmTyNone) {
1059         OS << " type: "; printImmTy(OS, getImmTy());
1060       }
1061       OS << " mods: " << Imm.Mods << '>';
1062       break;
1063     case Token:
1064       OS << '\'' << getToken() << '\'';
1065       break;
1066     case Expression:
1067       OS << "<expr " << *Expr << '>';
1068       break;
1069     }
1070   }
1071 
1072   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1073                                       int64_t Val, SMLoc Loc,
1074                                       ImmTy Type = ImmTyNone,
1075                                       bool IsFPImm = false) {
1076     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1077     Op->Imm.Val = Val;
1078     Op->Imm.IsFPImm = IsFPImm;
1079     Op->Imm.Kind = ImmKindTyNone;
1080     Op->Imm.Type = Type;
1081     Op->Imm.Mods = Modifiers();
1082     Op->StartLoc = Loc;
1083     Op->EndLoc = Loc;
1084     return Op;
1085   }
1086 
1087   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1088                                         StringRef Str, SMLoc Loc,
1089                                         bool HasExplicitEncodingSize = true) {
1090     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1091     Res->Tok.Data = Str.data();
1092     Res->Tok.Length = Str.size();
1093     Res->StartLoc = Loc;
1094     Res->EndLoc = Loc;
1095     return Res;
1096   }
1097 
1098   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1099                                       unsigned RegNo, SMLoc S,
1100                                       SMLoc E) {
1101     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1102     Op->Reg.RegNo = RegNo;
1103     Op->Reg.Mods = Modifiers();
1104     Op->StartLoc = S;
1105     Op->EndLoc = E;
1106     return Op;
1107   }
1108 
1109   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1110                                        const class MCExpr *Expr, SMLoc S) {
1111     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1112     Op->Expr = Expr;
1113     Op->StartLoc = S;
1114     Op->EndLoc = S;
1115     return Op;
1116   }
1117 };
1118 
1119 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1120   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1121   return OS;
1122 }
1123 
1124 //===----------------------------------------------------------------------===//
1125 // AsmParser
1126 //===----------------------------------------------------------------------===//
1127 
1128 // Holds info related to the current kernel, e.g. count of SGPRs used.
1129 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1130 // .amdgpu_hsa_kernel or at EOF.
1131 class KernelScopeInfo {
1132   int SgprIndexUnusedMin = -1;
1133   int VgprIndexUnusedMin = -1;
1134   MCContext *Ctx = nullptr;
1135 
1136   void usesSgprAt(int i) {
1137     if (i >= SgprIndexUnusedMin) {
1138       SgprIndexUnusedMin = ++i;
1139       if (Ctx) {
1140         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1141         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1142       }
1143     }
1144   }
1145 
1146   void usesVgprAt(int i) {
1147     if (i >= VgprIndexUnusedMin) {
1148       VgprIndexUnusedMin = ++i;
1149       if (Ctx) {
1150         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1151         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1152       }
1153     }
1154   }
1155 
1156 public:
1157   KernelScopeInfo() = default;
1158 
1159   void initialize(MCContext &Context) {
1160     Ctx = &Context;
1161     usesSgprAt(SgprIndexUnusedMin = -1);
1162     usesVgprAt(VgprIndexUnusedMin = -1);
1163   }
1164 
1165   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1166     switch (RegKind) {
1167       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1168       case IS_AGPR: // fall through
1169       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1170       default: break;
1171     }
1172   }
1173 };
1174 
1175 class AMDGPUAsmParser : public MCTargetAsmParser {
1176   MCAsmParser &Parser;
1177 
1178   // Number of extra operands parsed after the first optional operand.
1179   // This may be necessary to skip hardcoded mandatory operands.
1180   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1181 
1182   unsigned ForcedEncodingSize = 0;
1183   bool ForcedDPP = false;
1184   bool ForcedSDWA = false;
1185   KernelScopeInfo KernelScope;
1186 
1187   /// @name Auto-generated Match Functions
1188   /// {
1189 
1190 #define GET_ASSEMBLER_HEADER
1191 #include "AMDGPUGenAsmMatcher.inc"
1192 
1193   /// }
1194 
1195 private:
1196   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1197   bool OutOfRangeError(SMRange Range);
1198   /// Calculate VGPR/SGPR blocks required for given target, reserved
1199   /// registers, and user-specified NextFreeXGPR values.
1200   ///
1201   /// \param Features [in] Target features, used for bug corrections.
1202   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1203   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1204   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1205   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1206   /// descriptor field, if valid.
1207   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1208   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1209   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1210   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1211   /// \param VGPRBlocks [out] Result VGPR block count.
1212   /// \param SGPRBlocks [out] Result SGPR block count.
1213   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1214                           bool FlatScrUsed, bool XNACKUsed,
1215                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1216                           SMRange VGPRRange, unsigned NextFreeSGPR,
1217                           SMRange SGPRRange, unsigned &VGPRBlocks,
1218                           unsigned &SGPRBlocks);
1219   bool ParseDirectiveAMDGCNTarget();
1220   bool ParseDirectiveAMDHSAKernel();
1221   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1222   bool ParseDirectiveHSACodeObjectVersion();
1223   bool ParseDirectiveHSACodeObjectISA();
1224   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1225   bool ParseDirectiveAMDKernelCodeT();
1226   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1227   bool ParseDirectiveAMDGPUHsaKernel();
1228 
1229   bool ParseDirectiveISAVersion();
1230   bool ParseDirectiveHSAMetadata();
1231   bool ParseDirectivePALMetadataBegin();
1232   bool ParseDirectivePALMetadata();
1233   bool ParseDirectiveAMDGPULDS();
1234 
1235   /// Common code to parse out a block of text (typically YAML) between start and
1236   /// end directives.
1237   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1238                            const char *AssemblerDirectiveEnd,
1239                            std::string &CollectString);
1240 
1241   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1242                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1243   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1244                            unsigned &RegNum, unsigned &RegWidth,
1245                            bool RestoreOnFailure = false);
1246   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1247                            unsigned &RegNum, unsigned &RegWidth,
1248                            SmallVectorImpl<AsmToken> &Tokens);
1249   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1250                            unsigned &RegWidth,
1251                            SmallVectorImpl<AsmToken> &Tokens);
1252   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1253                            unsigned &RegWidth,
1254                            SmallVectorImpl<AsmToken> &Tokens);
1255   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1256                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1257   bool ParseRegRange(unsigned& Num, unsigned& Width);
1258   unsigned getRegularReg(RegisterKind RegKind,
1259                          unsigned RegNum,
1260                          unsigned RegWidth,
1261                          SMLoc Loc);
1262 
1263   bool isRegister();
1264   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1265   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1266   void initializeGprCountSymbol(RegisterKind RegKind);
1267   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1268                              unsigned RegWidth);
1269   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1270                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1271   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1272                  bool IsGdsHardcoded);
1273 
1274 public:
1275   enum AMDGPUMatchResultTy {
1276     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1277   };
1278   enum OperandMode {
1279     OperandMode_Default,
1280     OperandMode_NSA,
1281   };
1282 
1283   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1284 
1285   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1286                const MCInstrInfo &MII,
1287                const MCTargetOptions &Options)
1288       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1289     MCAsmParserExtension::Initialize(Parser);
1290 
1291     if (getFeatureBits().none()) {
1292       // Set default features.
1293       copySTI().ToggleFeature("southern-islands");
1294     }
1295 
1296     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1297 
1298     {
1299       // TODO: make those pre-defined variables read-only.
1300       // Currently there is none suitable machinery in the core llvm-mc for this.
1301       // MCSymbol::isRedefinable is intended for another purpose, and
1302       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1303       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1304       MCContext &Ctx = getContext();
1305       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1306         MCSymbol *Sym =
1307             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1308         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1309         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1310         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1311         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1312         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1313       } else {
1314         MCSymbol *Sym =
1315             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1316         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1317         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1318         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1319         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1320         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1321       }
1322       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1323         initializeGprCountSymbol(IS_VGPR);
1324         initializeGprCountSymbol(IS_SGPR);
1325       } else
1326         KernelScope.initialize(getContext());
1327     }
1328   }
1329 
1330   bool hasXNACK() const {
1331     return AMDGPU::hasXNACK(getSTI());
1332   }
1333 
1334   bool hasMIMG_R128() const {
1335     return AMDGPU::hasMIMG_R128(getSTI());
1336   }
1337 
1338   bool hasPackedD16() const {
1339     return AMDGPU::hasPackedD16(getSTI());
1340   }
1341 
1342   bool hasGFX10A16() const {
1343     return AMDGPU::hasGFX10A16(getSTI());
1344   }
1345 
1346   bool isSI() const {
1347     return AMDGPU::isSI(getSTI());
1348   }
1349 
1350   bool isCI() const {
1351     return AMDGPU::isCI(getSTI());
1352   }
1353 
1354   bool isVI() const {
1355     return AMDGPU::isVI(getSTI());
1356   }
1357 
1358   bool isGFX9() const {
1359     return AMDGPU::isGFX9(getSTI());
1360   }
1361 
1362   bool isGFX90A() const {
1363     return AMDGPU::isGFX90A(getSTI());
1364   }
1365 
1366   bool isGFX9Plus() const {
1367     return AMDGPU::isGFX9Plus(getSTI());
1368   }
1369 
1370   bool isGFX10() const {
1371     return AMDGPU::isGFX10(getSTI());
1372   }
1373 
1374   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1375 
1376   bool isGFX10_BEncoding() const {
1377     return AMDGPU::isGFX10_BEncoding(getSTI());
1378   }
1379 
1380   bool hasInv2PiInlineImm() const {
1381     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1382   }
1383 
1384   bool hasFlatOffsets() const {
1385     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1386   }
1387 
1388   bool hasSGPR102_SGPR103() const {
1389     return !isVI() && !isGFX9();
1390   }
1391 
1392   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1393 
1394   bool hasIntClamp() const {
1395     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1396   }
1397 
1398   AMDGPUTargetStreamer &getTargetStreamer() {
1399     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1400     return static_cast<AMDGPUTargetStreamer &>(TS);
1401   }
1402 
1403   const MCRegisterInfo *getMRI() const {
1404     // We need this const_cast because for some reason getContext() is not const
1405     // in MCAsmParser.
1406     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1407   }
1408 
1409   const MCInstrInfo *getMII() const {
1410     return &MII;
1411   }
1412 
1413   const FeatureBitset &getFeatureBits() const {
1414     return getSTI().getFeatureBits();
1415   }
1416 
1417   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1418   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1419   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1420 
1421   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1422   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1423   bool isForcedDPP() const { return ForcedDPP; }
1424   bool isForcedSDWA() const { return ForcedSDWA; }
1425   ArrayRef<unsigned> getMatchedVariants() const;
1426   StringRef getMatchedVariantName() const;
1427 
1428   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1429   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1430                      bool RestoreOnFailure);
1431   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1432   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1433                                         SMLoc &EndLoc) override;
1434   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1435   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1436                                       unsigned Kind) override;
1437   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1438                                OperandVector &Operands, MCStreamer &Out,
1439                                uint64_t &ErrorInfo,
1440                                bool MatchingInlineAsm) override;
1441   bool ParseDirective(AsmToken DirectiveID) override;
1442   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1443                                     OperandMode Mode = OperandMode_Default);
1444   StringRef parseMnemonicSuffix(StringRef Name);
1445   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1446                         SMLoc NameLoc, OperandVector &Operands) override;
1447   //bool ProcessInstruction(MCInst &Inst);
1448 
1449   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1450 
1451   OperandMatchResultTy
1452   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1453                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1454                      bool (*ConvertResult)(int64_t &) = nullptr);
1455 
1456   OperandMatchResultTy
1457   parseOperandArrayWithPrefix(const char *Prefix,
1458                               OperandVector &Operands,
1459                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1460                               bool (*ConvertResult)(int64_t&) = nullptr);
1461 
1462   OperandMatchResultTy
1463   parseNamedBit(StringRef Name, OperandVector &Operands,
1464                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1465   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1466                                              StringRef &Value,
1467                                              SMLoc &StringLoc);
1468 
1469   bool isModifier();
1470   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1471   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1472   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1473   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1474   bool parseSP3NegModifier();
1475   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1476   OperandMatchResultTy parseReg(OperandVector &Operands);
1477   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1478   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1479   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1480   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1481   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1482   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1483   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1484   OperandMatchResultTy parseUfmt(int64_t &Format);
1485   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1486   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1487   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1488   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1489   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1490   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1491   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1492 
1493   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1494   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1495   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1496   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1497 
1498   bool parseCnt(int64_t &IntVal);
1499   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1500   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1501 
1502 private:
1503   struct OperandInfoTy {
1504     SMLoc Loc;
1505     int64_t Id;
1506     bool IsSymbolic = false;
1507     bool IsDefined = false;
1508 
1509     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1510   };
1511 
1512   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1513   bool validateSendMsg(const OperandInfoTy &Msg,
1514                        const OperandInfoTy &Op,
1515                        const OperandInfoTy &Stream);
1516 
1517   bool parseHwregBody(OperandInfoTy &HwReg,
1518                       OperandInfoTy &Offset,
1519                       OperandInfoTy &Width);
1520   bool validateHwreg(const OperandInfoTy &HwReg,
1521                      const OperandInfoTy &Offset,
1522                      const OperandInfoTy &Width);
1523 
1524   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1525   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1526 
1527   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1528                       const OperandVector &Operands) const;
1529   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1530   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1531   SMLoc getLitLoc(const OperandVector &Operands) const;
1532   SMLoc getConstLoc(const OperandVector &Operands) const;
1533 
1534   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1535   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1536   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1537   bool validateSOPLiteral(const MCInst &Inst) const;
1538   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1539   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1540   bool validateIntClampSupported(const MCInst &Inst);
1541   bool validateMIMGAtomicDMask(const MCInst &Inst);
1542   bool validateMIMGGatherDMask(const MCInst &Inst);
1543   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1544   bool validateMIMGDataSize(const MCInst &Inst);
1545   bool validateMIMGAddrSize(const MCInst &Inst);
1546   bool validateMIMGD16(const MCInst &Inst);
1547   bool validateMIMGDim(const MCInst &Inst);
1548   bool validateMIMGMSAA(const MCInst &Inst);
1549   bool validateLdsDirect(const MCInst &Inst);
1550   bool validateOpSel(const MCInst &Inst);
1551   bool validateVccOperand(unsigned Reg) const;
1552   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1553   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1554   bool validateAGPRLdSt(const MCInst &Inst) const;
1555   bool validateVGPRAlign(const MCInst &Inst) const;
1556   bool validateDivScale(const MCInst &Inst);
1557   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1558                              const SMLoc &IDLoc);
1559   unsigned getConstantBusLimit(unsigned Opcode) const;
1560   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1561   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1562   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1563 
1564   bool isSupportedMnemo(StringRef Mnemo,
1565                         const FeatureBitset &FBS);
1566   bool isSupportedMnemo(StringRef Mnemo,
1567                         const FeatureBitset &FBS,
1568                         ArrayRef<unsigned> Variants);
1569   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1570 
1571   bool isId(const StringRef Id) const;
1572   bool isId(const AsmToken &Token, const StringRef Id) const;
1573   bool isToken(const AsmToken::TokenKind Kind) const;
1574   bool trySkipId(const StringRef Id);
1575   bool trySkipId(const StringRef Pref, const StringRef Id);
1576   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1577   bool trySkipToken(const AsmToken::TokenKind Kind);
1578   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1579   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1580   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1581 
1582   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1583   AsmToken::TokenKind getTokenKind() const;
1584   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1585   bool parseExpr(OperandVector &Operands);
1586   StringRef getTokenStr() const;
1587   AsmToken peekToken();
1588   AsmToken getToken() const;
1589   SMLoc getLoc() const;
1590   void lex();
1591 
1592 public:
1593   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1594   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1595 
1596   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1597   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1598   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1599   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1600   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1601   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1602 
1603   bool parseSwizzleOperand(int64_t &Op,
1604                            const unsigned MinVal,
1605                            const unsigned MaxVal,
1606                            const StringRef ErrMsg,
1607                            SMLoc &Loc);
1608   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1609                             const unsigned MinVal,
1610                             const unsigned MaxVal,
1611                             const StringRef ErrMsg);
1612   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1613   bool parseSwizzleOffset(int64_t &Imm);
1614   bool parseSwizzleMacro(int64_t &Imm);
1615   bool parseSwizzleQuadPerm(int64_t &Imm);
1616   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1617   bool parseSwizzleBroadcast(int64_t &Imm);
1618   bool parseSwizzleSwap(int64_t &Imm);
1619   bool parseSwizzleReverse(int64_t &Imm);
1620 
1621   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1622   int64_t parseGPRIdxMacro();
1623 
1624   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1625   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1626   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1627   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1628   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1629 
1630   AMDGPUOperand::Ptr defaultDLC() const;
1631   AMDGPUOperand::Ptr defaultSCCB() const;
1632   AMDGPUOperand::Ptr defaultGLC() const;
1633   AMDGPUOperand::Ptr defaultGLC_1() const;
1634   AMDGPUOperand::Ptr defaultSLC() const;
1635 
1636   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1637   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1638   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1639   AMDGPUOperand::Ptr defaultFlatOffset() const;
1640 
1641   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1642 
1643   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1644                OptionalImmIndexMap &OptionalIdx);
1645   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1646   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1647   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1648 
1649   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1650 
1651   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1652                bool IsAtomic = false);
1653   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1654   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1655 
1656   bool parseDimId(unsigned &Encoding);
1657   OperandMatchResultTy parseDim(OperandVector &Operands);
1658   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1659   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1660   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1661   int64_t parseDPPCtrlSel(StringRef Ctrl);
1662   int64_t parseDPPCtrlPerm();
1663   AMDGPUOperand::Ptr defaultRowMask() const;
1664   AMDGPUOperand::Ptr defaultBankMask() const;
1665   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1666   AMDGPUOperand::Ptr defaultFI() const;
1667   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1668   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1669 
1670   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1671                                     AMDGPUOperand::ImmTy Type);
1672   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1673   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1674   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1675   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1676   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1677   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1678   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1679                uint64_t BasicInstType,
1680                bool SkipDstVcc = false,
1681                bool SkipSrcVcc = false);
1682 
1683   AMDGPUOperand::Ptr defaultBLGP() const;
1684   AMDGPUOperand::Ptr defaultCBSZ() const;
1685   AMDGPUOperand::Ptr defaultABID() const;
1686 
1687   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1688   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1689 };
1690 
1691 struct OptionalOperand {
1692   const char *Name;
1693   AMDGPUOperand::ImmTy Type;
1694   bool IsBit;
1695   bool (*ConvertResult)(int64_t&);
1696 };
1697 
1698 } // end anonymous namespace
1699 
1700 // May be called with integer type with equivalent bitwidth.
1701 static const fltSemantics *getFltSemantics(unsigned Size) {
1702   switch (Size) {
1703   case 4:
1704     return &APFloat::IEEEsingle();
1705   case 8:
1706     return &APFloat::IEEEdouble();
1707   case 2:
1708     return &APFloat::IEEEhalf();
1709   default:
1710     llvm_unreachable("unsupported fp type");
1711   }
1712 }
1713 
1714 static const fltSemantics *getFltSemantics(MVT VT) {
1715   return getFltSemantics(VT.getSizeInBits() / 8);
1716 }
1717 
1718 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1719   switch (OperandType) {
1720   case AMDGPU::OPERAND_REG_IMM_INT32:
1721   case AMDGPU::OPERAND_REG_IMM_FP32:
1722   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1723   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1724   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1725   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1726   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1727   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1728   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1729   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1730     return &APFloat::IEEEsingle();
1731   case AMDGPU::OPERAND_REG_IMM_INT64:
1732   case AMDGPU::OPERAND_REG_IMM_FP64:
1733   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1734   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1735   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1736     return &APFloat::IEEEdouble();
1737   case AMDGPU::OPERAND_REG_IMM_INT16:
1738   case AMDGPU::OPERAND_REG_IMM_FP16:
1739   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1740   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1741   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1742   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1743   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1744   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1745   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1746   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1747   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1748   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1749     return &APFloat::IEEEhalf();
1750   default:
1751     llvm_unreachable("unsupported fp type");
1752   }
1753 }
1754 
1755 //===----------------------------------------------------------------------===//
1756 // Operand
1757 //===----------------------------------------------------------------------===//
1758 
1759 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1760   bool Lost;
1761 
1762   // Convert literal to single precision
1763   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1764                                                APFloat::rmNearestTiesToEven,
1765                                                &Lost);
1766   // We allow precision lost but not overflow or underflow
1767   if (Status != APFloat::opOK &&
1768       Lost &&
1769       ((Status & APFloat::opOverflow)  != 0 ||
1770        (Status & APFloat::opUnderflow) != 0)) {
1771     return false;
1772   }
1773 
1774   return true;
1775 }
1776 
1777 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1778   return isUIntN(Size, Val) || isIntN(Size, Val);
1779 }
1780 
1781 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1782   if (VT.getScalarType() == MVT::i16) {
1783     // FP immediate values are broken.
1784     return isInlinableIntLiteral(Val);
1785   }
1786 
1787   // f16/v2f16 operands work correctly for all values.
1788   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1789 }
1790 
1791 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1792 
1793   // This is a hack to enable named inline values like
1794   // shared_base with both 32-bit and 64-bit operands.
1795   // Note that these values are defined as
1796   // 32-bit operands only.
1797   if (isInlineValue()) {
1798     return true;
1799   }
1800 
1801   if (!isImmTy(ImmTyNone)) {
1802     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1803     return false;
1804   }
1805   // TODO: We should avoid using host float here. It would be better to
1806   // check the float bit values which is what a few other places do.
1807   // We've had bot failures before due to weird NaN support on mips hosts.
1808 
1809   APInt Literal(64, Imm.Val);
1810 
1811   if (Imm.IsFPImm) { // We got fp literal token
1812     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1813       return AMDGPU::isInlinableLiteral64(Imm.Val,
1814                                           AsmParser->hasInv2PiInlineImm());
1815     }
1816 
1817     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1818     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1819       return false;
1820 
1821     if (type.getScalarSizeInBits() == 16) {
1822       return isInlineableLiteralOp16(
1823         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1824         type, AsmParser->hasInv2PiInlineImm());
1825     }
1826 
1827     // Check if single precision literal is inlinable
1828     return AMDGPU::isInlinableLiteral32(
1829       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1830       AsmParser->hasInv2PiInlineImm());
1831   }
1832 
1833   // We got int literal token.
1834   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1835     return AMDGPU::isInlinableLiteral64(Imm.Val,
1836                                         AsmParser->hasInv2PiInlineImm());
1837   }
1838 
1839   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1840     return false;
1841   }
1842 
1843   if (type.getScalarSizeInBits() == 16) {
1844     return isInlineableLiteralOp16(
1845       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1846       type, AsmParser->hasInv2PiInlineImm());
1847   }
1848 
1849   return AMDGPU::isInlinableLiteral32(
1850     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1851     AsmParser->hasInv2PiInlineImm());
1852 }
1853 
1854 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1855   // Check that this immediate can be added as literal
1856   if (!isImmTy(ImmTyNone)) {
1857     return false;
1858   }
1859 
1860   if (!Imm.IsFPImm) {
1861     // We got int literal token.
1862 
1863     if (type == MVT::f64 && hasFPModifiers()) {
1864       // Cannot apply fp modifiers to int literals preserving the same semantics
1865       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1866       // disable these cases.
1867       return false;
1868     }
1869 
1870     unsigned Size = type.getSizeInBits();
1871     if (Size == 64)
1872       Size = 32;
1873 
1874     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1875     // types.
1876     return isSafeTruncation(Imm.Val, Size);
1877   }
1878 
1879   // We got fp literal token
1880   if (type == MVT::f64) { // Expected 64-bit fp operand
1881     // We would set low 64-bits of literal to zeroes but we accept this literals
1882     return true;
1883   }
1884 
1885   if (type == MVT::i64) { // Expected 64-bit int operand
1886     // We don't allow fp literals in 64-bit integer instructions. It is
1887     // unclear how we should encode them.
1888     return false;
1889   }
1890 
1891   // We allow fp literals with f16x2 operands assuming that the specified
1892   // literal goes into the lower half and the upper half is zero. We also
1893   // require that the literal may be losslesly converted to f16.
1894   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1895                      (type == MVT::v2i16)? MVT::i16 :
1896                      (type == MVT::v2f32)? MVT::f32 : type;
1897 
1898   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1899   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1900 }
1901 
1902 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1903   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1904 }
1905 
1906 bool AMDGPUOperand::isVRegWithInputMods() const {
1907   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1908          // GFX90A allows DPP on 64-bit operands.
1909          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1910           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1911 }
1912 
1913 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1914   if (AsmParser->isVI())
1915     return isVReg32();
1916   else if (AsmParser->isGFX9Plus())
1917     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1918   else
1919     return false;
1920 }
1921 
1922 bool AMDGPUOperand::isSDWAFP16Operand() const {
1923   return isSDWAOperand(MVT::f16);
1924 }
1925 
1926 bool AMDGPUOperand::isSDWAFP32Operand() const {
1927   return isSDWAOperand(MVT::f32);
1928 }
1929 
1930 bool AMDGPUOperand::isSDWAInt16Operand() const {
1931   return isSDWAOperand(MVT::i16);
1932 }
1933 
1934 bool AMDGPUOperand::isSDWAInt32Operand() const {
1935   return isSDWAOperand(MVT::i32);
1936 }
1937 
1938 bool AMDGPUOperand::isBoolReg() const {
1939   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1940          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1941 }
1942 
1943 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1944 {
1945   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1946   assert(Size == 2 || Size == 4 || Size == 8);
1947 
1948   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1949 
1950   if (Imm.Mods.Abs) {
1951     Val &= ~FpSignMask;
1952   }
1953   if (Imm.Mods.Neg) {
1954     Val ^= FpSignMask;
1955   }
1956 
1957   return Val;
1958 }
1959 
1960 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1961   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1962                              Inst.getNumOperands())) {
1963     addLiteralImmOperand(Inst, Imm.Val,
1964                          ApplyModifiers &
1965                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1966   } else {
1967     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1968     Inst.addOperand(MCOperand::createImm(Imm.Val));
1969     setImmKindNone();
1970   }
1971 }
1972 
1973 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1974   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1975   auto OpNum = Inst.getNumOperands();
1976   // Check that this operand accepts literals
1977   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1978 
1979   if (ApplyModifiers) {
1980     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1981     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1982     Val = applyInputFPModifiers(Val, Size);
1983   }
1984 
1985   APInt Literal(64, Val);
1986   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1987 
1988   if (Imm.IsFPImm) { // We got fp literal token
1989     switch (OpTy) {
1990     case AMDGPU::OPERAND_REG_IMM_INT64:
1991     case AMDGPU::OPERAND_REG_IMM_FP64:
1992     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1993     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1994     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1995       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1996                                        AsmParser->hasInv2PiInlineImm())) {
1997         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1998         setImmKindConst();
1999         return;
2000       }
2001 
2002       // Non-inlineable
2003       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2004         // For fp operands we check if low 32 bits are zeros
2005         if (Literal.getLoBits(32) != 0) {
2006           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2007           "Can't encode literal as exact 64-bit floating-point operand. "
2008           "Low 32-bits will be set to zero");
2009         }
2010 
2011         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2012         setImmKindLiteral();
2013         return;
2014       }
2015 
2016       // We don't allow fp literals in 64-bit integer instructions. It is
2017       // unclear how we should encode them. This case should be checked earlier
2018       // in predicate methods (isLiteralImm())
2019       llvm_unreachable("fp literal in 64-bit integer instruction.");
2020 
2021     case AMDGPU::OPERAND_REG_IMM_INT32:
2022     case AMDGPU::OPERAND_REG_IMM_FP32:
2023     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2024     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2025     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2026     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2027     case AMDGPU::OPERAND_REG_IMM_INT16:
2028     case AMDGPU::OPERAND_REG_IMM_FP16:
2029     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2030     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2031     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2032     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2033     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2034     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2035     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2036     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2037     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2038     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2039     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2040     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2041     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2042     case AMDGPU::OPERAND_REG_IMM_V2INT32: {
2043       bool lost;
2044       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2045       // Convert literal to single precision
2046       FPLiteral.convert(*getOpFltSemantics(OpTy),
2047                         APFloat::rmNearestTiesToEven, &lost);
2048       // We allow precision lost but not overflow or underflow. This should be
2049       // checked earlier in isLiteralImm()
2050 
2051       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2052       Inst.addOperand(MCOperand::createImm(ImmVal));
2053       setImmKindLiteral();
2054       return;
2055     }
2056     default:
2057       llvm_unreachable("invalid operand size");
2058     }
2059 
2060     return;
2061   }
2062 
2063   // We got int literal token.
2064   // Only sign extend inline immediates.
2065   switch (OpTy) {
2066   case AMDGPU::OPERAND_REG_IMM_INT32:
2067   case AMDGPU::OPERAND_REG_IMM_FP32:
2068   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2069   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2070   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2071   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2072   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2073   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2074   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2075   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2076   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2077   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2078     if (isSafeTruncation(Val, 32) &&
2079         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2080                                      AsmParser->hasInv2PiInlineImm())) {
2081       Inst.addOperand(MCOperand::createImm(Val));
2082       setImmKindConst();
2083       return;
2084     }
2085 
2086     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2087     setImmKindLiteral();
2088     return;
2089 
2090   case AMDGPU::OPERAND_REG_IMM_INT64:
2091   case AMDGPU::OPERAND_REG_IMM_FP64:
2092   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2093   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2094   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2095     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2096       Inst.addOperand(MCOperand::createImm(Val));
2097       setImmKindConst();
2098       return;
2099     }
2100 
2101     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2102     setImmKindLiteral();
2103     return;
2104 
2105   case AMDGPU::OPERAND_REG_IMM_INT16:
2106   case AMDGPU::OPERAND_REG_IMM_FP16:
2107   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2108   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2109   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2110   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2111     if (isSafeTruncation(Val, 16) &&
2112         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2113                                      AsmParser->hasInv2PiInlineImm())) {
2114       Inst.addOperand(MCOperand::createImm(Val));
2115       setImmKindConst();
2116       return;
2117     }
2118 
2119     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2120     setImmKindLiteral();
2121     return;
2122 
2123   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2124   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2125   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2126   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2127     assert(isSafeTruncation(Val, 16));
2128     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2129                                         AsmParser->hasInv2PiInlineImm()));
2130 
2131     Inst.addOperand(MCOperand::createImm(Val));
2132     return;
2133   }
2134   default:
2135     llvm_unreachable("invalid operand size");
2136   }
2137 }
2138 
2139 template <unsigned Bitwidth>
2140 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2141   APInt Literal(64, Imm.Val);
2142   setImmKindNone();
2143 
2144   if (!Imm.IsFPImm) {
2145     // We got int literal token.
2146     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2147     return;
2148   }
2149 
2150   bool Lost;
2151   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2152   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2153                     APFloat::rmNearestTiesToEven, &Lost);
2154   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2155 }
2156 
2157 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2158   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2159 }
2160 
2161 static bool isInlineValue(unsigned Reg) {
2162   switch (Reg) {
2163   case AMDGPU::SRC_SHARED_BASE:
2164   case AMDGPU::SRC_SHARED_LIMIT:
2165   case AMDGPU::SRC_PRIVATE_BASE:
2166   case AMDGPU::SRC_PRIVATE_LIMIT:
2167   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2168     return true;
2169   case AMDGPU::SRC_VCCZ:
2170   case AMDGPU::SRC_EXECZ:
2171   case AMDGPU::SRC_SCC:
2172     return true;
2173   case AMDGPU::SGPR_NULL:
2174     return true;
2175   default:
2176     return false;
2177   }
2178 }
2179 
2180 bool AMDGPUOperand::isInlineValue() const {
2181   return isRegKind() && ::isInlineValue(getReg());
2182 }
2183 
2184 //===----------------------------------------------------------------------===//
2185 // AsmParser
2186 //===----------------------------------------------------------------------===//
2187 
2188 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2189   if (Is == IS_VGPR) {
2190     switch (RegWidth) {
2191       default: return -1;
2192       case 1: return AMDGPU::VGPR_32RegClassID;
2193       case 2: return AMDGPU::VReg_64RegClassID;
2194       case 3: return AMDGPU::VReg_96RegClassID;
2195       case 4: return AMDGPU::VReg_128RegClassID;
2196       case 5: return AMDGPU::VReg_160RegClassID;
2197       case 6: return AMDGPU::VReg_192RegClassID;
2198       case 8: return AMDGPU::VReg_256RegClassID;
2199       case 16: return AMDGPU::VReg_512RegClassID;
2200       case 32: return AMDGPU::VReg_1024RegClassID;
2201     }
2202   } else if (Is == IS_TTMP) {
2203     switch (RegWidth) {
2204       default: return -1;
2205       case 1: return AMDGPU::TTMP_32RegClassID;
2206       case 2: return AMDGPU::TTMP_64RegClassID;
2207       case 4: return AMDGPU::TTMP_128RegClassID;
2208       case 8: return AMDGPU::TTMP_256RegClassID;
2209       case 16: return AMDGPU::TTMP_512RegClassID;
2210     }
2211   } else if (Is == IS_SGPR) {
2212     switch (RegWidth) {
2213       default: return -1;
2214       case 1: return AMDGPU::SGPR_32RegClassID;
2215       case 2: return AMDGPU::SGPR_64RegClassID;
2216       case 3: return AMDGPU::SGPR_96RegClassID;
2217       case 4: return AMDGPU::SGPR_128RegClassID;
2218       case 5: return AMDGPU::SGPR_160RegClassID;
2219       case 6: return AMDGPU::SGPR_192RegClassID;
2220       case 8: return AMDGPU::SGPR_256RegClassID;
2221       case 16: return AMDGPU::SGPR_512RegClassID;
2222     }
2223   } else if (Is == IS_AGPR) {
2224     switch (RegWidth) {
2225       default: return -1;
2226       case 1: return AMDGPU::AGPR_32RegClassID;
2227       case 2: return AMDGPU::AReg_64RegClassID;
2228       case 3: return AMDGPU::AReg_96RegClassID;
2229       case 4: return AMDGPU::AReg_128RegClassID;
2230       case 5: return AMDGPU::AReg_160RegClassID;
2231       case 6: return AMDGPU::AReg_192RegClassID;
2232       case 8: return AMDGPU::AReg_256RegClassID;
2233       case 16: return AMDGPU::AReg_512RegClassID;
2234       case 32: return AMDGPU::AReg_1024RegClassID;
2235     }
2236   }
2237   return -1;
2238 }
2239 
2240 static unsigned getSpecialRegForName(StringRef RegName) {
2241   return StringSwitch<unsigned>(RegName)
2242     .Case("exec", AMDGPU::EXEC)
2243     .Case("vcc", AMDGPU::VCC)
2244     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2245     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2246     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2247     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2248     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2249     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2250     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2251     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2252     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2253     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2254     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2255     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2256     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2257     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2258     .Case("m0", AMDGPU::M0)
2259     .Case("vccz", AMDGPU::SRC_VCCZ)
2260     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2261     .Case("execz", AMDGPU::SRC_EXECZ)
2262     .Case("src_execz", AMDGPU::SRC_EXECZ)
2263     .Case("scc", AMDGPU::SRC_SCC)
2264     .Case("src_scc", AMDGPU::SRC_SCC)
2265     .Case("tba", AMDGPU::TBA)
2266     .Case("tma", AMDGPU::TMA)
2267     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2268     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2269     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2270     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2271     .Case("vcc_lo", AMDGPU::VCC_LO)
2272     .Case("vcc_hi", AMDGPU::VCC_HI)
2273     .Case("exec_lo", AMDGPU::EXEC_LO)
2274     .Case("exec_hi", AMDGPU::EXEC_HI)
2275     .Case("tma_lo", AMDGPU::TMA_LO)
2276     .Case("tma_hi", AMDGPU::TMA_HI)
2277     .Case("tba_lo", AMDGPU::TBA_LO)
2278     .Case("tba_hi", AMDGPU::TBA_HI)
2279     .Case("pc", AMDGPU::PC_REG)
2280     .Case("null", AMDGPU::SGPR_NULL)
2281     .Default(AMDGPU::NoRegister);
2282 }
2283 
2284 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2285                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2286   auto R = parseRegister();
2287   if (!R) return true;
2288   assert(R->isReg());
2289   RegNo = R->getReg();
2290   StartLoc = R->getStartLoc();
2291   EndLoc = R->getEndLoc();
2292   return false;
2293 }
2294 
2295 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2296                                     SMLoc &EndLoc) {
2297   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2298 }
2299 
2300 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2301                                                        SMLoc &StartLoc,
2302                                                        SMLoc &EndLoc) {
2303   bool Result =
2304       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2305   bool PendingErrors = getParser().hasPendingError();
2306   getParser().clearPendingErrors();
2307   if (PendingErrors)
2308     return MatchOperand_ParseFail;
2309   if (Result)
2310     return MatchOperand_NoMatch;
2311   return MatchOperand_Success;
2312 }
2313 
2314 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2315                                             RegisterKind RegKind, unsigned Reg1,
2316                                             SMLoc Loc) {
2317   switch (RegKind) {
2318   case IS_SPECIAL:
2319     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2320       Reg = AMDGPU::EXEC;
2321       RegWidth = 2;
2322       return true;
2323     }
2324     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2325       Reg = AMDGPU::FLAT_SCR;
2326       RegWidth = 2;
2327       return true;
2328     }
2329     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2330       Reg = AMDGPU::XNACK_MASK;
2331       RegWidth = 2;
2332       return true;
2333     }
2334     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2335       Reg = AMDGPU::VCC;
2336       RegWidth = 2;
2337       return true;
2338     }
2339     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2340       Reg = AMDGPU::TBA;
2341       RegWidth = 2;
2342       return true;
2343     }
2344     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2345       Reg = AMDGPU::TMA;
2346       RegWidth = 2;
2347       return true;
2348     }
2349     Error(Loc, "register does not fit in the list");
2350     return false;
2351   case IS_VGPR:
2352   case IS_SGPR:
2353   case IS_AGPR:
2354   case IS_TTMP:
2355     if (Reg1 != Reg + RegWidth) {
2356       Error(Loc, "registers in a list must have consecutive indices");
2357       return false;
2358     }
2359     RegWidth++;
2360     return true;
2361   default:
2362     llvm_unreachable("unexpected register kind");
2363   }
2364 }
2365 
2366 struct RegInfo {
2367   StringLiteral Name;
2368   RegisterKind Kind;
2369 };
2370 
2371 static constexpr RegInfo RegularRegisters[] = {
2372   {{"v"},    IS_VGPR},
2373   {{"s"},    IS_SGPR},
2374   {{"ttmp"}, IS_TTMP},
2375   {{"acc"},  IS_AGPR},
2376   {{"a"},    IS_AGPR},
2377 };
2378 
2379 static bool isRegularReg(RegisterKind Kind) {
2380   return Kind == IS_VGPR ||
2381          Kind == IS_SGPR ||
2382          Kind == IS_TTMP ||
2383          Kind == IS_AGPR;
2384 }
2385 
2386 static const RegInfo* getRegularRegInfo(StringRef Str) {
2387   for (const RegInfo &Reg : RegularRegisters)
2388     if (Str.startswith(Reg.Name))
2389       return &Reg;
2390   return nullptr;
2391 }
2392 
2393 static bool getRegNum(StringRef Str, unsigned& Num) {
2394   return !Str.getAsInteger(10, Num);
2395 }
2396 
2397 bool
2398 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2399                             const AsmToken &NextToken) const {
2400 
2401   // A list of consecutive registers: [s0,s1,s2,s3]
2402   if (Token.is(AsmToken::LBrac))
2403     return true;
2404 
2405   if (!Token.is(AsmToken::Identifier))
2406     return false;
2407 
2408   // A single register like s0 or a range of registers like s[0:1]
2409 
2410   StringRef Str = Token.getString();
2411   const RegInfo *Reg = getRegularRegInfo(Str);
2412   if (Reg) {
2413     StringRef RegName = Reg->Name;
2414     StringRef RegSuffix = Str.substr(RegName.size());
2415     if (!RegSuffix.empty()) {
2416       unsigned Num;
2417       // A single register with an index: rXX
2418       if (getRegNum(RegSuffix, Num))
2419         return true;
2420     } else {
2421       // A range of registers: r[XX:YY].
2422       if (NextToken.is(AsmToken::LBrac))
2423         return true;
2424     }
2425   }
2426 
2427   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2428 }
2429 
2430 bool
2431 AMDGPUAsmParser::isRegister()
2432 {
2433   return isRegister(getToken(), peekToken());
2434 }
2435 
2436 unsigned
2437 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2438                                unsigned RegNum,
2439                                unsigned RegWidth,
2440                                SMLoc Loc) {
2441 
2442   assert(isRegularReg(RegKind));
2443 
2444   unsigned AlignSize = 1;
2445   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2446     // SGPR and TTMP registers must be aligned.
2447     // Max required alignment is 4 dwords.
2448     AlignSize = std::min(RegWidth, 4u);
2449   }
2450 
2451   if (RegNum % AlignSize != 0) {
2452     Error(Loc, "invalid register alignment");
2453     return AMDGPU::NoRegister;
2454   }
2455 
2456   unsigned RegIdx = RegNum / AlignSize;
2457   int RCID = getRegClass(RegKind, RegWidth);
2458   if (RCID == -1) {
2459     Error(Loc, "invalid or unsupported register size");
2460     return AMDGPU::NoRegister;
2461   }
2462 
2463   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2464   const MCRegisterClass RC = TRI->getRegClass(RCID);
2465   if (RegIdx >= RC.getNumRegs()) {
2466     Error(Loc, "register index is out of range");
2467     return AMDGPU::NoRegister;
2468   }
2469 
2470   return RC.getRegister(RegIdx);
2471 }
2472 
2473 bool
2474 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2475   int64_t RegLo, RegHi;
2476   if (!skipToken(AsmToken::LBrac, "missing register index"))
2477     return false;
2478 
2479   SMLoc FirstIdxLoc = getLoc();
2480   SMLoc SecondIdxLoc;
2481 
2482   if (!parseExpr(RegLo))
2483     return false;
2484 
2485   if (trySkipToken(AsmToken::Colon)) {
2486     SecondIdxLoc = getLoc();
2487     if (!parseExpr(RegHi))
2488       return false;
2489   } else {
2490     RegHi = RegLo;
2491   }
2492 
2493   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2494     return false;
2495 
2496   if (!isUInt<32>(RegLo)) {
2497     Error(FirstIdxLoc, "invalid register index");
2498     return false;
2499   }
2500 
2501   if (!isUInt<32>(RegHi)) {
2502     Error(SecondIdxLoc, "invalid register index");
2503     return false;
2504   }
2505 
2506   if (RegLo > RegHi) {
2507     Error(FirstIdxLoc, "first register index should not exceed second index");
2508     return false;
2509   }
2510 
2511   Num = static_cast<unsigned>(RegLo);
2512   Width = (RegHi - RegLo) + 1;
2513   return true;
2514 }
2515 
2516 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2517                                           unsigned &RegNum, unsigned &RegWidth,
2518                                           SmallVectorImpl<AsmToken> &Tokens) {
2519   assert(isToken(AsmToken::Identifier));
2520   unsigned Reg = getSpecialRegForName(getTokenStr());
2521   if (Reg) {
2522     RegNum = 0;
2523     RegWidth = 1;
2524     RegKind = IS_SPECIAL;
2525     Tokens.push_back(getToken());
2526     lex(); // skip register name
2527   }
2528   return Reg;
2529 }
2530 
2531 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2532                                           unsigned &RegNum, unsigned &RegWidth,
2533                                           SmallVectorImpl<AsmToken> &Tokens) {
2534   assert(isToken(AsmToken::Identifier));
2535   StringRef RegName = getTokenStr();
2536   auto Loc = getLoc();
2537 
2538   const RegInfo *RI = getRegularRegInfo(RegName);
2539   if (!RI) {
2540     Error(Loc, "invalid register name");
2541     return AMDGPU::NoRegister;
2542   }
2543 
2544   Tokens.push_back(getToken());
2545   lex(); // skip register name
2546 
2547   RegKind = RI->Kind;
2548   StringRef RegSuffix = RegName.substr(RI->Name.size());
2549   if (!RegSuffix.empty()) {
2550     // Single 32-bit register: vXX.
2551     if (!getRegNum(RegSuffix, RegNum)) {
2552       Error(Loc, "invalid register index");
2553       return AMDGPU::NoRegister;
2554     }
2555     RegWidth = 1;
2556   } else {
2557     // Range of registers: v[XX:YY]. ":YY" is optional.
2558     if (!ParseRegRange(RegNum, RegWidth))
2559       return AMDGPU::NoRegister;
2560   }
2561 
2562   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2563 }
2564 
2565 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2566                                        unsigned &RegWidth,
2567                                        SmallVectorImpl<AsmToken> &Tokens) {
2568   unsigned Reg = AMDGPU::NoRegister;
2569   auto ListLoc = getLoc();
2570 
2571   if (!skipToken(AsmToken::LBrac,
2572                  "expected a register or a list of registers")) {
2573     return AMDGPU::NoRegister;
2574   }
2575 
2576   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2577 
2578   auto Loc = getLoc();
2579   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2580     return AMDGPU::NoRegister;
2581   if (RegWidth != 1) {
2582     Error(Loc, "expected a single 32-bit register");
2583     return AMDGPU::NoRegister;
2584   }
2585 
2586   for (; trySkipToken(AsmToken::Comma); ) {
2587     RegisterKind NextRegKind;
2588     unsigned NextReg, NextRegNum, NextRegWidth;
2589     Loc = getLoc();
2590 
2591     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2592                              NextRegNum, NextRegWidth,
2593                              Tokens)) {
2594       return AMDGPU::NoRegister;
2595     }
2596     if (NextRegWidth != 1) {
2597       Error(Loc, "expected a single 32-bit register");
2598       return AMDGPU::NoRegister;
2599     }
2600     if (NextRegKind != RegKind) {
2601       Error(Loc, "registers in a list must be of the same kind");
2602       return AMDGPU::NoRegister;
2603     }
2604     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2605       return AMDGPU::NoRegister;
2606   }
2607 
2608   if (!skipToken(AsmToken::RBrac,
2609                  "expected a comma or a closing square bracket")) {
2610     return AMDGPU::NoRegister;
2611   }
2612 
2613   if (isRegularReg(RegKind))
2614     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2615 
2616   return Reg;
2617 }
2618 
2619 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2620                                           unsigned &RegNum, unsigned &RegWidth,
2621                                           SmallVectorImpl<AsmToken> &Tokens) {
2622   auto Loc = getLoc();
2623   Reg = AMDGPU::NoRegister;
2624 
2625   if (isToken(AsmToken::Identifier)) {
2626     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2627     if (Reg == AMDGPU::NoRegister)
2628       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2629   } else {
2630     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2631   }
2632 
2633   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2634   if (Reg == AMDGPU::NoRegister) {
2635     assert(Parser.hasPendingError());
2636     return false;
2637   }
2638 
2639   if (!subtargetHasRegister(*TRI, Reg)) {
2640     if (Reg == AMDGPU::SGPR_NULL) {
2641       Error(Loc, "'null' operand is not supported on this GPU");
2642     } else {
2643       Error(Loc, "register not available on this GPU");
2644     }
2645     return false;
2646   }
2647 
2648   return true;
2649 }
2650 
2651 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2652                                           unsigned &RegNum, unsigned &RegWidth,
2653                                           bool RestoreOnFailure /*=false*/) {
2654   Reg = AMDGPU::NoRegister;
2655 
2656   SmallVector<AsmToken, 1> Tokens;
2657   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2658     if (RestoreOnFailure) {
2659       while (!Tokens.empty()) {
2660         getLexer().UnLex(Tokens.pop_back_val());
2661       }
2662     }
2663     return true;
2664   }
2665   return false;
2666 }
2667 
2668 Optional<StringRef>
2669 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2670   switch (RegKind) {
2671   case IS_VGPR:
2672     return StringRef(".amdgcn.next_free_vgpr");
2673   case IS_SGPR:
2674     return StringRef(".amdgcn.next_free_sgpr");
2675   default:
2676     return None;
2677   }
2678 }
2679 
2680 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2681   auto SymbolName = getGprCountSymbolName(RegKind);
2682   assert(SymbolName && "initializing invalid register kind");
2683   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2684   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2685 }
2686 
2687 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2688                                             unsigned DwordRegIndex,
2689                                             unsigned RegWidth) {
2690   // Symbols are only defined for GCN targets
2691   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2692     return true;
2693 
2694   auto SymbolName = getGprCountSymbolName(RegKind);
2695   if (!SymbolName)
2696     return true;
2697   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2698 
2699   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2700   int64_t OldCount;
2701 
2702   if (!Sym->isVariable())
2703     return !Error(getLoc(),
2704                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2705   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2706     return !Error(
2707         getLoc(),
2708         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2709 
2710   if (OldCount <= NewMax)
2711     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2712 
2713   return true;
2714 }
2715 
2716 std::unique_ptr<AMDGPUOperand>
2717 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2718   const auto &Tok = getToken();
2719   SMLoc StartLoc = Tok.getLoc();
2720   SMLoc EndLoc = Tok.getEndLoc();
2721   RegisterKind RegKind;
2722   unsigned Reg, RegNum, RegWidth;
2723 
2724   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2725     return nullptr;
2726   }
2727   if (isHsaAbiVersion3(&getSTI())) {
2728     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2729       return nullptr;
2730   } else
2731     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2732   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2733 }
2734 
2735 OperandMatchResultTy
2736 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2737   // TODO: add syntactic sugar for 1/(2*PI)
2738 
2739   assert(!isRegister());
2740   assert(!isModifier());
2741 
2742   const auto& Tok = getToken();
2743   const auto& NextTok = peekToken();
2744   bool IsReal = Tok.is(AsmToken::Real);
2745   SMLoc S = getLoc();
2746   bool Negate = false;
2747 
2748   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2749     lex();
2750     IsReal = true;
2751     Negate = true;
2752   }
2753 
2754   if (IsReal) {
2755     // Floating-point expressions are not supported.
2756     // Can only allow floating-point literals with an
2757     // optional sign.
2758 
2759     StringRef Num = getTokenStr();
2760     lex();
2761 
2762     APFloat RealVal(APFloat::IEEEdouble());
2763     auto roundMode = APFloat::rmNearestTiesToEven;
2764     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2765       return MatchOperand_ParseFail;
2766     }
2767     if (Negate)
2768       RealVal.changeSign();
2769 
2770     Operands.push_back(
2771       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2772                                AMDGPUOperand::ImmTyNone, true));
2773 
2774     return MatchOperand_Success;
2775 
2776   } else {
2777     int64_t IntVal;
2778     const MCExpr *Expr;
2779     SMLoc S = getLoc();
2780 
2781     if (HasSP3AbsModifier) {
2782       // This is a workaround for handling expressions
2783       // as arguments of SP3 'abs' modifier, for example:
2784       //     |1.0|
2785       //     |-1|
2786       //     |1+x|
2787       // This syntax is not compatible with syntax of standard
2788       // MC expressions (due to the trailing '|').
2789       SMLoc EndLoc;
2790       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2791         return MatchOperand_ParseFail;
2792     } else {
2793       if (Parser.parseExpression(Expr))
2794         return MatchOperand_ParseFail;
2795     }
2796 
2797     if (Expr->evaluateAsAbsolute(IntVal)) {
2798       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2799     } else {
2800       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2801     }
2802 
2803     return MatchOperand_Success;
2804   }
2805 
2806   return MatchOperand_NoMatch;
2807 }
2808 
2809 OperandMatchResultTy
2810 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2811   if (!isRegister())
2812     return MatchOperand_NoMatch;
2813 
2814   if (auto R = parseRegister()) {
2815     assert(R->isReg());
2816     Operands.push_back(std::move(R));
2817     return MatchOperand_Success;
2818   }
2819   return MatchOperand_ParseFail;
2820 }
2821 
2822 OperandMatchResultTy
2823 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2824   auto res = parseReg(Operands);
2825   if (res != MatchOperand_NoMatch) {
2826     return res;
2827   } else if (isModifier()) {
2828     return MatchOperand_NoMatch;
2829   } else {
2830     return parseImm(Operands, HasSP3AbsMod);
2831   }
2832 }
2833 
2834 bool
2835 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2836   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2837     const auto &str = Token.getString();
2838     return str == "abs" || str == "neg" || str == "sext";
2839   }
2840   return false;
2841 }
2842 
2843 bool
2844 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2845   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2846 }
2847 
2848 bool
2849 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2850   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2851 }
2852 
2853 bool
2854 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2855   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2856 }
2857 
2858 // Check if this is an operand modifier or an opcode modifier
2859 // which may look like an expression but it is not. We should
2860 // avoid parsing these modifiers as expressions. Currently
2861 // recognized sequences are:
2862 //   |...|
2863 //   abs(...)
2864 //   neg(...)
2865 //   sext(...)
2866 //   -reg
2867 //   -|...|
2868 //   -abs(...)
2869 //   name:...
2870 // Note that simple opcode modifiers like 'gds' may be parsed as
2871 // expressions; this is a special case. See getExpressionAsToken.
2872 //
2873 bool
2874 AMDGPUAsmParser::isModifier() {
2875 
2876   AsmToken Tok = getToken();
2877   AsmToken NextToken[2];
2878   peekTokens(NextToken);
2879 
2880   return isOperandModifier(Tok, NextToken[0]) ||
2881          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2882          isOpcodeModifierWithVal(Tok, NextToken[0]);
2883 }
2884 
2885 // Check if the current token is an SP3 'neg' modifier.
2886 // Currently this modifier is allowed in the following context:
2887 //
2888 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2889 // 2. Before an 'abs' modifier: -abs(...)
2890 // 3. Before an SP3 'abs' modifier: -|...|
2891 //
2892 // In all other cases "-" is handled as a part
2893 // of an expression that follows the sign.
2894 //
2895 // Note: When "-" is followed by an integer literal,
2896 // this is interpreted as integer negation rather
2897 // than a floating-point NEG modifier applied to N.
2898 // Beside being contr-intuitive, such use of floating-point
2899 // NEG modifier would have resulted in different meaning
2900 // of integer literals used with VOP1/2/C and VOP3,
2901 // for example:
2902 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2903 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2904 // Negative fp literals with preceding "-" are
2905 // handled likewise for unifomtity
2906 //
2907 bool
2908 AMDGPUAsmParser::parseSP3NegModifier() {
2909 
2910   AsmToken NextToken[2];
2911   peekTokens(NextToken);
2912 
2913   if (isToken(AsmToken::Minus) &&
2914       (isRegister(NextToken[0], NextToken[1]) ||
2915        NextToken[0].is(AsmToken::Pipe) ||
2916        isId(NextToken[0], "abs"))) {
2917     lex();
2918     return true;
2919   }
2920 
2921   return false;
2922 }
2923 
2924 OperandMatchResultTy
2925 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2926                                               bool AllowImm) {
2927   bool Neg, SP3Neg;
2928   bool Abs, SP3Abs;
2929   SMLoc Loc;
2930 
2931   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2932   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2933     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2934     return MatchOperand_ParseFail;
2935   }
2936 
2937   SP3Neg = parseSP3NegModifier();
2938 
2939   Loc = getLoc();
2940   Neg = trySkipId("neg");
2941   if (Neg && SP3Neg) {
2942     Error(Loc, "expected register or immediate");
2943     return MatchOperand_ParseFail;
2944   }
2945   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2946     return MatchOperand_ParseFail;
2947 
2948   Abs = trySkipId("abs");
2949   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2950     return MatchOperand_ParseFail;
2951 
2952   Loc = getLoc();
2953   SP3Abs = trySkipToken(AsmToken::Pipe);
2954   if (Abs && SP3Abs) {
2955     Error(Loc, "expected register or immediate");
2956     return MatchOperand_ParseFail;
2957   }
2958 
2959   OperandMatchResultTy Res;
2960   if (AllowImm) {
2961     Res = parseRegOrImm(Operands, SP3Abs);
2962   } else {
2963     Res = parseReg(Operands);
2964   }
2965   if (Res != MatchOperand_Success) {
2966     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2967   }
2968 
2969   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2970     return MatchOperand_ParseFail;
2971   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2972     return MatchOperand_ParseFail;
2973   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2974     return MatchOperand_ParseFail;
2975 
2976   AMDGPUOperand::Modifiers Mods;
2977   Mods.Abs = Abs || SP3Abs;
2978   Mods.Neg = Neg || SP3Neg;
2979 
2980   if (Mods.hasFPModifiers()) {
2981     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2982     if (Op.isExpr()) {
2983       Error(Op.getStartLoc(), "expected an absolute expression");
2984       return MatchOperand_ParseFail;
2985     }
2986     Op.setModifiers(Mods);
2987   }
2988   return MatchOperand_Success;
2989 }
2990 
2991 OperandMatchResultTy
2992 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2993                                                bool AllowImm) {
2994   bool Sext = trySkipId("sext");
2995   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2996     return MatchOperand_ParseFail;
2997 
2998   OperandMatchResultTy Res;
2999   if (AllowImm) {
3000     Res = parseRegOrImm(Operands);
3001   } else {
3002     Res = parseReg(Operands);
3003   }
3004   if (Res != MatchOperand_Success) {
3005     return Sext? MatchOperand_ParseFail : Res;
3006   }
3007 
3008   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3009     return MatchOperand_ParseFail;
3010 
3011   AMDGPUOperand::Modifiers Mods;
3012   Mods.Sext = Sext;
3013 
3014   if (Mods.hasIntModifiers()) {
3015     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3016     if (Op.isExpr()) {
3017       Error(Op.getStartLoc(), "expected an absolute expression");
3018       return MatchOperand_ParseFail;
3019     }
3020     Op.setModifiers(Mods);
3021   }
3022 
3023   return MatchOperand_Success;
3024 }
3025 
3026 OperandMatchResultTy
3027 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3028   return parseRegOrImmWithFPInputMods(Operands, false);
3029 }
3030 
3031 OperandMatchResultTy
3032 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3033   return parseRegOrImmWithIntInputMods(Operands, false);
3034 }
3035 
3036 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3037   auto Loc = getLoc();
3038   if (trySkipId("off")) {
3039     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3040                                                 AMDGPUOperand::ImmTyOff, false));
3041     return MatchOperand_Success;
3042   }
3043 
3044   if (!isRegister())
3045     return MatchOperand_NoMatch;
3046 
3047   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3048   if (Reg) {
3049     Operands.push_back(std::move(Reg));
3050     return MatchOperand_Success;
3051   }
3052 
3053   return MatchOperand_ParseFail;
3054 
3055 }
3056 
3057 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3058   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3059 
3060   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3061       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3062       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3063       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3064     return Match_InvalidOperand;
3065 
3066   if ((TSFlags & SIInstrFlags::VOP3) &&
3067       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3068       getForcedEncodingSize() != 64)
3069     return Match_PreferE32;
3070 
3071   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3072       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3073     // v_mac_f32/16 allow only dst_sel == DWORD;
3074     auto OpNum =
3075         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3076     const auto &Op = Inst.getOperand(OpNum);
3077     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3078       return Match_InvalidOperand;
3079     }
3080   }
3081 
3082   return Match_Success;
3083 }
3084 
3085 static ArrayRef<unsigned> getAllVariants() {
3086   static const unsigned Variants[] = {
3087     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3088     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3089   };
3090 
3091   return makeArrayRef(Variants);
3092 }
3093 
3094 // What asm variants we should check
3095 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3096   if (getForcedEncodingSize() == 32) {
3097     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3098     return makeArrayRef(Variants);
3099   }
3100 
3101   if (isForcedVOP3()) {
3102     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3103     return makeArrayRef(Variants);
3104   }
3105 
3106   if (isForcedSDWA()) {
3107     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3108                                         AMDGPUAsmVariants::SDWA9};
3109     return makeArrayRef(Variants);
3110   }
3111 
3112   if (isForcedDPP()) {
3113     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3114     return makeArrayRef(Variants);
3115   }
3116 
3117   return getAllVariants();
3118 }
3119 
3120 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3121   if (getForcedEncodingSize() == 32)
3122     return "e32";
3123 
3124   if (isForcedVOP3())
3125     return "e64";
3126 
3127   if (isForcedSDWA())
3128     return "sdwa";
3129 
3130   if (isForcedDPP())
3131     return "dpp";
3132 
3133   return "";
3134 }
3135 
3136 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3137   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3138   const unsigned Num = Desc.getNumImplicitUses();
3139   for (unsigned i = 0; i < Num; ++i) {
3140     unsigned Reg = Desc.ImplicitUses[i];
3141     switch (Reg) {
3142     case AMDGPU::FLAT_SCR:
3143     case AMDGPU::VCC:
3144     case AMDGPU::VCC_LO:
3145     case AMDGPU::VCC_HI:
3146     case AMDGPU::M0:
3147       return Reg;
3148     default:
3149       break;
3150     }
3151   }
3152   return AMDGPU::NoRegister;
3153 }
3154 
3155 // NB: This code is correct only when used to check constant
3156 // bus limitations because GFX7 support no f16 inline constants.
3157 // Note that there are no cases when a GFX7 opcode violates
3158 // constant bus limitations due to the use of an f16 constant.
3159 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3160                                        unsigned OpIdx) const {
3161   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3162 
3163   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3164     return false;
3165   }
3166 
3167   const MCOperand &MO = Inst.getOperand(OpIdx);
3168 
3169   int64_t Val = MO.getImm();
3170   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3171 
3172   switch (OpSize) { // expected operand size
3173   case 8:
3174     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3175   case 4:
3176     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3177   case 2: {
3178     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3179     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3180         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3181         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3182       return AMDGPU::isInlinableIntLiteral(Val);
3183 
3184     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3185         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3186         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3187       return AMDGPU::isInlinableIntLiteralV216(Val);
3188 
3189     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3190         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3191         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3192       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3193 
3194     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3195   }
3196   default:
3197     llvm_unreachable("invalid operand size");
3198   }
3199 }
3200 
3201 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3202   if (!isGFX10Plus())
3203     return 1;
3204 
3205   switch (Opcode) {
3206   // 64-bit shift instructions can use only one scalar value input
3207   case AMDGPU::V_LSHLREV_B64_e64:
3208   case AMDGPU::V_LSHLREV_B64_gfx10:
3209   case AMDGPU::V_LSHRREV_B64_e64:
3210   case AMDGPU::V_LSHRREV_B64_gfx10:
3211   case AMDGPU::V_ASHRREV_I64_e64:
3212   case AMDGPU::V_ASHRREV_I64_gfx10:
3213   case AMDGPU::V_LSHL_B64_e64:
3214   case AMDGPU::V_LSHR_B64_e64:
3215   case AMDGPU::V_ASHR_I64_e64:
3216     return 1;
3217   default:
3218     return 2;
3219   }
3220 }
3221 
3222 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3223   const MCOperand &MO = Inst.getOperand(OpIdx);
3224   if (MO.isImm()) {
3225     return !isInlineConstant(Inst, OpIdx);
3226   } else if (MO.isReg()) {
3227     auto Reg = MO.getReg();
3228     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3229     auto PReg = mc2PseudoReg(Reg);
3230     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3231   } else {
3232     return true;
3233   }
3234 }
3235 
3236 bool
3237 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3238                                                 const OperandVector &Operands) {
3239   const unsigned Opcode = Inst.getOpcode();
3240   const MCInstrDesc &Desc = MII.get(Opcode);
3241   unsigned LastSGPR = AMDGPU::NoRegister;
3242   unsigned ConstantBusUseCount = 0;
3243   unsigned NumLiterals = 0;
3244   unsigned LiteralSize;
3245 
3246   if (Desc.TSFlags &
3247       (SIInstrFlags::VOPC |
3248        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3249        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3250        SIInstrFlags::SDWA)) {
3251     // Check special imm operands (used by madmk, etc)
3252     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3253       ++ConstantBusUseCount;
3254     }
3255 
3256     SmallDenseSet<unsigned> SGPRsUsed;
3257     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3258     if (SGPRUsed != AMDGPU::NoRegister) {
3259       SGPRsUsed.insert(SGPRUsed);
3260       ++ConstantBusUseCount;
3261     }
3262 
3263     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3264     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3265     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3266 
3267     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3268 
3269     for (int OpIdx : OpIndices) {
3270       if (OpIdx == -1) break;
3271 
3272       const MCOperand &MO = Inst.getOperand(OpIdx);
3273       if (usesConstantBus(Inst, OpIdx)) {
3274         if (MO.isReg()) {
3275           LastSGPR = mc2PseudoReg(MO.getReg());
3276           // Pairs of registers with a partial intersections like these
3277           //   s0, s[0:1]
3278           //   flat_scratch_lo, flat_scratch
3279           //   flat_scratch_lo, flat_scratch_hi
3280           // are theoretically valid but they are disabled anyway.
3281           // Note that this code mimics SIInstrInfo::verifyInstruction
3282           if (!SGPRsUsed.count(LastSGPR)) {
3283             SGPRsUsed.insert(LastSGPR);
3284             ++ConstantBusUseCount;
3285           }
3286         } else { // Expression or a literal
3287 
3288           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3289             continue; // special operand like VINTERP attr_chan
3290 
3291           // An instruction may use only one literal.
3292           // This has been validated on the previous step.
3293           // See validateVOP3Literal.
3294           // This literal may be used as more than one operand.
3295           // If all these operands are of the same size,
3296           // this literal counts as one scalar value.
3297           // Otherwise it counts as 2 scalar values.
3298           // See "GFX10 Shader Programming", section 3.6.2.3.
3299 
3300           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3301           if (Size < 4) Size = 4;
3302 
3303           if (NumLiterals == 0) {
3304             NumLiterals = 1;
3305             LiteralSize = Size;
3306           } else if (LiteralSize != Size) {
3307             NumLiterals = 2;
3308           }
3309         }
3310       }
3311     }
3312   }
3313   ConstantBusUseCount += NumLiterals;
3314 
3315   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3316     return true;
3317 
3318   SMLoc LitLoc = getLitLoc(Operands);
3319   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3320   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3321   Error(Loc, "invalid operand (violates constant bus restrictions)");
3322   return false;
3323 }
3324 
3325 bool
3326 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3327                                                  const OperandVector &Operands) {
3328   const unsigned Opcode = Inst.getOpcode();
3329   const MCInstrDesc &Desc = MII.get(Opcode);
3330 
3331   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3332   if (DstIdx == -1 ||
3333       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3334     return true;
3335   }
3336 
3337   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3338 
3339   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3340   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3341   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3342 
3343   assert(DstIdx != -1);
3344   const MCOperand &Dst = Inst.getOperand(DstIdx);
3345   assert(Dst.isReg());
3346   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3347 
3348   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3349 
3350   for (int SrcIdx : SrcIndices) {
3351     if (SrcIdx == -1) break;
3352     const MCOperand &Src = Inst.getOperand(SrcIdx);
3353     if (Src.isReg()) {
3354       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3355       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3356         Error(getRegLoc(SrcReg, Operands),
3357           "destination must be different than all sources");
3358         return false;
3359       }
3360     }
3361   }
3362 
3363   return true;
3364 }
3365 
3366 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3367 
3368   const unsigned Opc = Inst.getOpcode();
3369   const MCInstrDesc &Desc = MII.get(Opc);
3370 
3371   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3372     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3373     assert(ClampIdx != -1);
3374     return Inst.getOperand(ClampIdx).getImm() == 0;
3375   }
3376 
3377   return true;
3378 }
3379 
3380 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3381 
3382   const unsigned Opc = Inst.getOpcode();
3383   const MCInstrDesc &Desc = MII.get(Opc);
3384 
3385   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3386     return true;
3387 
3388   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3389   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3390   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3391 
3392   assert(VDataIdx != -1);
3393 
3394   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3395     return true;
3396 
3397   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3398   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3399   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3400   if (DMask == 0)
3401     DMask = 1;
3402 
3403   unsigned DataSize =
3404     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3405   if (hasPackedD16()) {
3406     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3407     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3408       DataSize = (DataSize + 1) / 2;
3409   }
3410 
3411   return (VDataSize / 4) == DataSize + TFESize;
3412 }
3413 
3414 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3415   const unsigned Opc = Inst.getOpcode();
3416   const MCInstrDesc &Desc = MII.get(Opc);
3417 
3418   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3419     return true;
3420 
3421   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3422 
3423   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3424       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3425   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3426   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3427   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3428 
3429   assert(VAddr0Idx != -1);
3430   assert(SrsrcIdx != -1);
3431   assert(SrsrcIdx > VAddr0Idx);
3432 
3433   if (DimIdx == -1)
3434     return true; // intersect_ray
3435 
3436   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3437   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3438   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3439   unsigned VAddrSize =
3440       IsNSA ? SrsrcIdx - VAddr0Idx
3441             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3442 
3443   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3444                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3445                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3446                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3447   if (!IsNSA) {
3448     if (AddrSize > 8)
3449       AddrSize = 16;
3450     else if (AddrSize > 4)
3451       AddrSize = 8;
3452   }
3453 
3454   return VAddrSize == AddrSize;
3455 }
3456 
3457 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3458 
3459   const unsigned Opc = Inst.getOpcode();
3460   const MCInstrDesc &Desc = MII.get(Opc);
3461 
3462   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3463     return true;
3464   if (!Desc.mayLoad() || !Desc.mayStore())
3465     return true; // Not atomic
3466 
3467   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3468   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3469 
3470   // This is an incomplete check because image_atomic_cmpswap
3471   // may only use 0x3 and 0xf while other atomic operations
3472   // may use 0x1 and 0x3. However these limitations are
3473   // verified when we check that dmask matches dst size.
3474   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3475 }
3476 
3477 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3478 
3479   const unsigned Opc = Inst.getOpcode();
3480   const MCInstrDesc &Desc = MII.get(Opc);
3481 
3482   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3483     return true;
3484 
3485   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3486   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3487 
3488   // GATHER4 instructions use dmask in a different fashion compared to
3489   // other MIMG instructions. The only useful DMASK values are
3490   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3491   // (red,red,red,red) etc.) The ISA document doesn't mention
3492   // this.
3493   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3494 }
3495 
3496 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3497   const unsigned Opc = Inst.getOpcode();
3498   const MCInstrDesc &Desc = MII.get(Opc);
3499 
3500   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3501     return true;
3502 
3503   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3504   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3505       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3506 
3507   if (!BaseOpcode->MSAA)
3508     return true;
3509 
3510   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3511   assert(DimIdx != -1);
3512 
3513   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3514   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3515 
3516   return DimInfo->MSAA;
3517 }
3518 
3519 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3520 {
3521   switch (Opcode) {
3522   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3523   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3524   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3525     return true;
3526   default:
3527     return false;
3528   }
3529 }
3530 
3531 // movrels* opcodes should only allow VGPRS as src0.
3532 // This is specified in .td description for vop1/vop3,
3533 // but sdwa is handled differently. See isSDWAOperand.
3534 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3535                                       const OperandVector &Operands) {
3536 
3537   const unsigned Opc = Inst.getOpcode();
3538   const MCInstrDesc &Desc = MII.get(Opc);
3539 
3540   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3541     return true;
3542 
3543   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3544   assert(Src0Idx != -1);
3545 
3546   SMLoc ErrLoc;
3547   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3548   if (Src0.isReg()) {
3549     auto Reg = mc2PseudoReg(Src0.getReg());
3550     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3551     if (!isSGPR(Reg, TRI))
3552       return true;
3553     ErrLoc = getRegLoc(Reg, Operands);
3554   } else {
3555     ErrLoc = getConstLoc(Operands);
3556   }
3557 
3558   Error(ErrLoc, "source operand must be a VGPR");
3559   return false;
3560 }
3561 
3562 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3563                                           const OperandVector &Operands) {
3564 
3565   const unsigned Opc = Inst.getOpcode();
3566 
3567   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3568     return true;
3569 
3570   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3571   assert(Src0Idx != -1);
3572 
3573   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3574   if (!Src0.isReg())
3575     return true;
3576 
3577   auto Reg = mc2PseudoReg(Src0.getReg());
3578   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3579   if (isSGPR(Reg, TRI)) {
3580     Error(getRegLoc(Reg, Operands),
3581           "source operand must be either a VGPR or an inline constant");
3582     return false;
3583   }
3584 
3585   return true;
3586 }
3587 
3588 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3589   switch (Inst.getOpcode()) {
3590   default:
3591     return true;
3592   case V_DIV_SCALE_F32_gfx6_gfx7:
3593   case V_DIV_SCALE_F32_vi:
3594   case V_DIV_SCALE_F32_gfx10:
3595   case V_DIV_SCALE_F64_gfx6_gfx7:
3596   case V_DIV_SCALE_F64_vi:
3597   case V_DIV_SCALE_F64_gfx10:
3598     break;
3599   }
3600 
3601   // TODO: Check that src0 = src1 or src2.
3602 
3603   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3604                     AMDGPU::OpName::src2_modifiers,
3605                     AMDGPU::OpName::src2_modifiers}) {
3606     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3607             .getImm() &
3608         SISrcMods::ABS) {
3609       return false;
3610     }
3611   }
3612 
3613   return true;
3614 }
3615 
3616 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3617 
3618   const unsigned Opc = Inst.getOpcode();
3619   const MCInstrDesc &Desc = MII.get(Opc);
3620 
3621   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3622     return true;
3623 
3624   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3625   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3626     if (isCI() || isSI())
3627       return false;
3628   }
3629 
3630   return true;
3631 }
3632 
3633 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3634   const unsigned Opc = Inst.getOpcode();
3635   const MCInstrDesc &Desc = MII.get(Opc);
3636 
3637   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3638     return true;
3639 
3640   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3641   if (DimIdx < 0)
3642     return true;
3643 
3644   long Imm = Inst.getOperand(DimIdx).getImm();
3645   if (Imm < 0 || Imm >= 8)
3646     return false;
3647 
3648   return true;
3649 }
3650 
3651 static bool IsRevOpcode(const unsigned Opcode)
3652 {
3653   switch (Opcode) {
3654   case AMDGPU::V_SUBREV_F32_e32:
3655   case AMDGPU::V_SUBREV_F32_e64:
3656   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3657   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3658   case AMDGPU::V_SUBREV_F32_e32_vi:
3659   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3660   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3661   case AMDGPU::V_SUBREV_F32_e64_vi:
3662 
3663   case AMDGPU::V_SUBREV_CO_U32_e32:
3664   case AMDGPU::V_SUBREV_CO_U32_e64:
3665   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3666   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3667 
3668   case AMDGPU::V_SUBBREV_U32_e32:
3669   case AMDGPU::V_SUBBREV_U32_e64:
3670   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3671   case AMDGPU::V_SUBBREV_U32_e32_vi:
3672   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3673   case AMDGPU::V_SUBBREV_U32_e64_vi:
3674 
3675   case AMDGPU::V_SUBREV_U32_e32:
3676   case AMDGPU::V_SUBREV_U32_e64:
3677   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3678   case AMDGPU::V_SUBREV_U32_e32_vi:
3679   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3680   case AMDGPU::V_SUBREV_U32_e64_vi:
3681 
3682   case AMDGPU::V_SUBREV_F16_e32:
3683   case AMDGPU::V_SUBREV_F16_e64:
3684   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3685   case AMDGPU::V_SUBREV_F16_e32_vi:
3686   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3687   case AMDGPU::V_SUBREV_F16_e64_vi:
3688 
3689   case AMDGPU::V_SUBREV_U16_e32:
3690   case AMDGPU::V_SUBREV_U16_e64:
3691   case AMDGPU::V_SUBREV_U16_e32_vi:
3692   case AMDGPU::V_SUBREV_U16_e64_vi:
3693 
3694   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3695   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3696   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3697 
3698   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3699   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3700 
3701   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3702   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3703 
3704   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3705   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3706 
3707   case AMDGPU::V_LSHRREV_B32_e32:
3708   case AMDGPU::V_LSHRREV_B32_e64:
3709   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3710   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3711   case AMDGPU::V_LSHRREV_B32_e32_vi:
3712   case AMDGPU::V_LSHRREV_B32_e64_vi:
3713   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3714   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3715 
3716   case AMDGPU::V_ASHRREV_I32_e32:
3717   case AMDGPU::V_ASHRREV_I32_e64:
3718   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3719   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3720   case AMDGPU::V_ASHRREV_I32_e32_vi:
3721   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3722   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3723   case AMDGPU::V_ASHRREV_I32_e64_vi:
3724 
3725   case AMDGPU::V_LSHLREV_B32_e32:
3726   case AMDGPU::V_LSHLREV_B32_e64:
3727   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3728   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3729   case AMDGPU::V_LSHLREV_B32_e32_vi:
3730   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3731   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3732   case AMDGPU::V_LSHLREV_B32_e64_vi:
3733 
3734   case AMDGPU::V_LSHLREV_B16_e32:
3735   case AMDGPU::V_LSHLREV_B16_e64:
3736   case AMDGPU::V_LSHLREV_B16_e32_vi:
3737   case AMDGPU::V_LSHLREV_B16_e64_vi:
3738   case AMDGPU::V_LSHLREV_B16_gfx10:
3739 
3740   case AMDGPU::V_LSHRREV_B16_e32:
3741   case AMDGPU::V_LSHRREV_B16_e64:
3742   case AMDGPU::V_LSHRREV_B16_e32_vi:
3743   case AMDGPU::V_LSHRREV_B16_e64_vi:
3744   case AMDGPU::V_LSHRREV_B16_gfx10:
3745 
3746   case AMDGPU::V_ASHRREV_I16_e32:
3747   case AMDGPU::V_ASHRREV_I16_e64:
3748   case AMDGPU::V_ASHRREV_I16_e32_vi:
3749   case AMDGPU::V_ASHRREV_I16_e64_vi:
3750   case AMDGPU::V_ASHRREV_I16_gfx10:
3751 
3752   case AMDGPU::V_LSHLREV_B64_e64:
3753   case AMDGPU::V_LSHLREV_B64_gfx10:
3754   case AMDGPU::V_LSHLREV_B64_vi:
3755 
3756   case AMDGPU::V_LSHRREV_B64_e64:
3757   case AMDGPU::V_LSHRREV_B64_gfx10:
3758   case AMDGPU::V_LSHRREV_B64_vi:
3759 
3760   case AMDGPU::V_ASHRREV_I64_e64:
3761   case AMDGPU::V_ASHRREV_I64_gfx10:
3762   case AMDGPU::V_ASHRREV_I64_vi:
3763 
3764   case AMDGPU::V_PK_LSHLREV_B16:
3765   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3766   case AMDGPU::V_PK_LSHLREV_B16_vi:
3767 
3768   case AMDGPU::V_PK_LSHRREV_B16:
3769   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3770   case AMDGPU::V_PK_LSHRREV_B16_vi:
3771   case AMDGPU::V_PK_ASHRREV_I16:
3772   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3773   case AMDGPU::V_PK_ASHRREV_I16_vi:
3774     return true;
3775   default:
3776     return false;
3777   }
3778 }
3779 
3780 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3781 
3782   using namespace SIInstrFlags;
3783   const unsigned Opcode = Inst.getOpcode();
3784   const MCInstrDesc &Desc = MII.get(Opcode);
3785 
3786   // lds_direct register is defined so that it can be used
3787   // with 9-bit operands only. Ignore encodings which do not accept these.
3788   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3789     return true;
3790 
3791   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3792   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3793   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3794 
3795   const int SrcIndices[] = { Src1Idx, Src2Idx };
3796 
3797   // lds_direct cannot be specified as either src1 or src2.
3798   for (int SrcIdx : SrcIndices) {
3799     if (SrcIdx == -1) break;
3800     const MCOperand &Src = Inst.getOperand(SrcIdx);
3801     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3802       return false;
3803     }
3804   }
3805 
3806   if (Src0Idx == -1)
3807     return true;
3808 
3809   const MCOperand &Src = Inst.getOperand(Src0Idx);
3810   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3811     return true;
3812 
3813   // lds_direct is specified as src0. Check additional limitations.
3814   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3815 }
3816 
3817 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3818   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3819     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3820     if (Op.isFlatOffset())
3821       return Op.getStartLoc();
3822   }
3823   return getLoc();
3824 }
3825 
3826 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3827                                          const OperandVector &Operands) {
3828   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3829   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3830     return true;
3831 
3832   auto Opcode = Inst.getOpcode();
3833   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3834   assert(OpNum != -1);
3835 
3836   const auto &Op = Inst.getOperand(OpNum);
3837   if (!hasFlatOffsets() && Op.getImm() != 0) {
3838     Error(getFlatOffsetLoc(Operands),
3839           "flat offset modifier is not supported on this GPU");
3840     return false;
3841   }
3842 
3843   // For FLAT segment the offset must be positive;
3844   // MSB is ignored and forced to zero.
3845   if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) {
3846     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3847     if (!isIntN(OffsetSize, Op.getImm())) {
3848       Error(getFlatOffsetLoc(Operands),
3849             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3850       return false;
3851     }
3852   } else {
3853     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3854     if (!isUIntN(OffsetSize, Op.getImm())) {
3855       Error(getFlatOffsetLoc(Operands),
3856             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3857       return false;
3858     }
3859   }
3860 
3861   return true;
3862 }
3863 
3864 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3865   // Start with second operand because SMEM Offset cannot be dst or src0.
3866   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3867     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3868     if (Op.isSMEMOffset())
3869       return Op.getStartLoc();
3870   }
3871   return getLoc();
3872 }
3873 
3874 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3875                                          const OperandVector &Operands) {
3876   if (isCI() || isSI())
3877     return true;
3878 
3879   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3880   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3881     return true;
3882 
3883   auto Opcode = Inst.getOpcode();
3884   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3885   if (OpNum == -1)
3886     return true;
3887 
3888   const auto &Op = Inst.getOperand(OpNum);
3889   if (!Op.isImm())
3890     return true;
3891 
3892   uint64_t Offset = Op.getImm();
3893   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3894   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3895       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3896     return true;
3897 
3898   Error(getSMEMOffsetLoc(Operands),
3899         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3900                                "expected a 21-bit signed offset");
3901 
3902   return false;
3903 }
3904 
3905 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3906   unsigned Opcode = Inst.getOpcode();
3907   const MCInstrDesc &Desc = MII.get(Opcode);
3908   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3909     return true;
3910 
3911   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3912   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3913 
3914   const int OpIndices[] = { Src0Idx, Src1Idx };
3915 
3916   unsigned NumExprs = 0;
3917   unsigned NumLiterals = 0;
3918   uint32_t LiteralValue;
3919 
3920   for (int OpIdx : OpIndices) {
3921     if (OpIdx == -1) break;
3922 
3923     const MCOperand &MO = Inst.getOperand(OpIdx);
3924     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3925     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3926       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3927         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3928         if (NumLiterals == 0 || LiteralValue != Value) {
3929           LiteralValue = Value;
3930           ++NumLiterals;
3931         }
3932       } else if (MO.isExpr()) {
3933         ++NumExprs;
3934       }
3935     }
3936   }
3937 
3938   return NumLiterals + NumExprs <= 1;
3939 }
3940 
3941 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3942   const unsigned Opc = Inst.getOpcode();
3943   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3944       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3945     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3946     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3947 
3948     if (OpSel & ~3)
3949       return false;
3950   }
3951   return true;
3952 }
3953 
3954 // Check if VCC register matches wavefront size
3955 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3956   auto FB = getFeatureBits();
3957   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3958     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3959 }
3960 
3961 // VOP3 literal is only allowed in GFX10+ and only one can be used
3962 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3963                                           const OperandVector &Operands) {
3964   unsigned Opcode = Inst.getOpcode();
3965   const MCInstrDesc &Desc = MII.get(Opcode);
3966   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3967     return true;
3968 
3969   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3970   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3971   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3972 
3973   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3974 
3975   unsigned NumExprs = 0;
3976   unsigned NumLiterals = 0;
3977   uint32_t LiteralValue;
3978 
3979   for (int OpIdx : OpIndices) {
3980     if (OpIdx == -1) break;
3981 
3982     const MCOperand &MO = Inst.getOperand(OpIdx);
3983     if (!MO.isImm() && !MO.isExpr())
3984       continue;
3985     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3986       continue;
3987 
3988     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3989         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
3990       Error(getConstLoc(Operands),
3991             "inline constants are not allowed for this operand");
3992       return false;
3993     }
3994 
3995     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3996       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3997       if (NumLiterals == 0 || LiteralValue != Value) {
3998         LiteralValue = Value;
3999         ++NumLiterals;
4000       }
4001     } else if (MO.isExpr()) {
4002       ++NumExprs;
4003     }
4004   }
4005   NumLiterals += NumExprs;
4006 
4007   if (!NumLiterals)
4008     return true;
4009 
4010   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4011     Error(getLitLoc(Operands), "literal operands are not supported");
4012     return false;
4013   }
4014 
4015   if (NumLiterals > 1) {
4016     Error(getLitLoc(Operands), "only one literal operand is allowed");
4017     return false;
4018   }
4019 
4020   return true;
4021 }
4022 
4023 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4024 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4025                          const MCRegisterInfo *MRI) {
4026   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4027   if (OpIdx < 0)
4028     return -1;
4029 
4030   const MCOperand &Op = Inst.getOperand(OpIdx);
4031   if (!Op.isReg())
4032     return -1;
4033 
4034   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4035   auto Reg = Sub ? Sub : Op.getReg();
4036   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4037   return AGRP32.contains(Reg) ? 1 : 0;
4038 }
4039 
4040 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4041   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4042   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4043                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4044                   SIInstrFlags::DS)) == 0)
4045     return true;
4046 
4047   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4048                                                       : AMDGPU::OpName::vdata;
4049 
4050   const MCRegisterInfo *MRI = getMRI();
4051   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4052   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4053 
4054   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4055     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4056     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4057       return false;
4058   }
4059 
4060   auto FB = getFeatureBits();
4061   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4062     if (DataAreg < 0 || DstAreg < 0)
4063       return true;
4064     return DstAreg == DataAreg;
4065   }
4066 
4067   return DstAreg < 1 && DataAreg < 1;
4068 }
4069 
4070 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4071   auto FB = getFeatureBits();
4072   if (!FB[AMDGPU::FeatureGFX90AInsts])
4073     return true;
4074 
4075   const MCRegisterInfo *MRI = getMRI();
4076   const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4077   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4078   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4079     const MCOperand &Op = Inst.getOperand(I);
4080     if (!Op.isReg())
4081       continue;
4082 
4083     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4084     if (!Sub)
4085       continue;
4086 
4087     if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4088       return false;
4089     if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4090       return false;
4091   }
4092 
4093   return true;
4094 }
4095 
4096 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4097                                             const OperandVector &Operands,
4098                                             const SMLoc &IDLoc) {
4099   int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4100                                           AMDGPU::OpName::glc1);
4101   if (GLCPos != -1) {
4102     // -1 is set by GLC_1 default operand. In all cases "glc" must be present
4103     // in the asm string, and the default value means it is not present.
4104     if (Inst.getOperand(GLCPos).getImm() == -1) {
4105       Error(IDLoc, "instruction must use glc");
4106       return false;
4107     }
4108   }
4109 
4110   return true;
4111 }
4112 
4113 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4114                                           const SMLoc &IDLoc,
4115                                           const OperandVector &Operands) {
4116   if (!validateLdsDirect(Inst)) {
4117     Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands),
4118       "invalid use of lds_direct");
4119     return false;
4120   }
4121   if (!validateSOPLiteral(Inst)) {
4122     Error(getLitLoc(Operands),
4123       "only one literal operand is allowed");
4124     return false;
4125   }
4126   if (!validateVOP3Literal(Inst, Operands)) {
4127     return false;
4128   }
4129   if (!validateConstantBusLimitations(Inst, Operands)) {
4130     return false;
4131   }
4132   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4133     return false;
4134   }
4135   if (!validateIntClampSupported(Inst)) {
4136     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4137       "integer clamping is not supported on this GPU");
4138     return false;
4139   }
4140   if (!validateOpSel(Inst)) {
4141     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4142       "invalid op_sel operand");
4143     return false;
4144   }
4145   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4146   if (!validateMIMGD16(Inst)) {
4147     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4148       "d16 modifier is not supported on this GPU");
4149     return false;
4150   }
4151   if (!validateMIMGDim(Inst)) {
4152     Error(IDLoc, "dim modifier is required on this GPU");
4153     return false;
4154   }
4155   if (!validateMIMGMSAA(Inst)) {
4156     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4157           "invalid dim; must be MSAA type");
4158     return false;
4159   }
4160   if (!validateMIMGDataSize(Inst)) {
4161     Error(IDLoc,
4162       "image data size does not match dmask and tfe");
4163     return false;
4164   }
4165   if (!validateMIMGAddrSize(Inst)) {
4166     Error(IDLoc,
4167       "image address size does not match dim and a16");
4168     return false;
4169   }
4170   if (!validateMIMGAtomicDMask(Inst)) {
4171     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4172       "invalid atomic image dmask");
4173     return false;
4174   }
4175   if (!validateMIMGGatherDMask(Inst)) {
4176     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4177       "invalid image_gather dmask: only one bit must be set");
4178     return false;
4179   }
4180   if (!validateMovrels(Inst, Operands)) {
4181     return false;
4182   }
4183   if (!validateFlatOffset(Inst, Operands)) {
4184     return false;
4185   }
4186   if (!validateSMEMOffset(Inst, Operands)) {
4187     return false;
4188   }
4189   if (!validateMAIAccWrite(Inst, Operands)) {
4190     return false;
4191   }
4192   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4193     return false;
4194   }
4195 
4196   if (!validateAGPRLdSt(Inst)) {
4197     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4198     ? "invalid register class: data and dst should be all VGPR or AGPR"
4199     : "invalid register class: agpr loads and stores not supported on this GPU"
4200     );
4201     return false;
4202   }
4203   if (!validateVGPRAlign(Inst)) {
4204     Error(IDLoc,
4205       "invalid register class: vgpr tuples must be 64 bit aligned");
4206     return false;
4207   }
4208 
4209   if (!validateDivScale(Inst)) {
4210     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4211     return false;
4212   }
4213   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4214     return false;
4215   }
4216 
4217   return true;
4218 }
4219 
4220 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4221                                             const FeatureBitset &FBS,
4222                                             unsigned VariantID = 0);
4223 
4224 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4225                                 const FeatureBitset &AvailableFeatures,
4226                                 unsigned VariantID);
4227 
4228 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4229                                        const FeatureBitset &FBS) {
4230   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4231 }
4232 
4233 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4234                                        const FeatureBitset &FBS,
4235                                        ArrayRef<unsigned> Variants) {
4236   for (auto Variant : Variants) {
4237     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4238       return true;
4239   }
4240 
4241   return false;
4242 }
4243 
4244 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4245                                                   const SMLoc &IDLoc) {
4246   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4247 
4248   // Check if requested instruction variant is supported.
4249   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4250     return false;
4251 
4252   // This instruction is not supported.
4253   // Clear any other pending errors because they are no longer relevant.
4254   getParser().clearPendingErrors();
4255 
4256   // Requested instruction variant is not supported.
4257   // Check if any other variants are supported.
4258   StringRef VariantName = getMatchedVariantName();
4259   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4260     return Error(IDLoc,
4261                  Twine(VariantName,
4262                        " variant of this instruction is not supported"));
4263   }
4264 
4265   // Finally check if this instruction is supported on any other GPU.
4266   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4267     return Error(IDLoc, "instruction not supported on this GPU");
4268   }
4269 
4270   // Instruction not supported on any GPU. Probably a typo.
4271   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4272   return Error(IDLoc, "invalid instruction" + Suggestion);
4273 }
4274 
4275 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4276                                               OperandVector &Operands,
4277                                               MCStreamer &Out,
4278                                               uint64_t &ErrorInfo,
4279                                               bool MatchingInlineAsm) {
4280   MCInst Inst;
4281   unsigned Result = Match_Success;
4282   for (auto Variant : getMatchedVariants()) {
4283     uint64_t EI;
4284     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4285                                   Variant);
4286     // We order match statuses from least to most specific. We use most specific
4287     // status as resulting
4288     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4289     if ((R == Match_Success) ||
4290         (R == Match_PreferE32) ||
4291         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4292         (R == Match_InvalidOperand && Result != Match_MissingFeature
4293                                    && Result != Match_PreferE32) ||
4294         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4295                                    && Result != Match_MissingFeature
4296                                    && Result != Match_PreferE32)) {
4297       Result = R;
4298       ErrorInfo = EI;
4299     }
4300     if (R == Match_Success)
4301       break;
4302   }
4303 
4304   if (Result == Match_Success) {
4305     if (!validateInstruction(Inst, IDLoc, Operands)) {
4306       return true;
4307     }
4308     Inst.setLoc(IDLoc);
4309     Out.emitInstruction(Inst, getSTI());
4310     return false;
4311   }
4312 
4313   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4314   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4315     return true;
4316   }
4317 
4318   switch (Result) {
4319   default: break;
4320   case Match_MissingFeature:
4321     // It has been verified that the specified instruction
4322     // mnemonic is valid. A match was found but it requires
4323     // features which are not supported on this GPU.
4324     return Error(IDLoc, "operands are not valid for this GPU or mode");
4325 
4326   case Match_InvalidOperand: {
4327     SMLoc ErrorLoc = IDLoc;
4328     if (ErrorInfo != ~0ULL) {
4329       if (ErrorInfo >= Operands.size()) {
4330         return Error(IDLoc, "too few operands for instruction");
4331       }
4332       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4333       if (ErrorLoc == SMLoc())
4334         ErrorLoc = IDLoc;
4335     }
4336     return Error(ErrorLoc, "invalid operand for instruction");
4337   }
4338 
4339   case Match_PreferE32:
4340     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4341                         "should be encoded as e32");
4342   case Match_MnemonicFail:
4343     llvm_unreachable("Invalid instructions should have been handled already");
4344   }
4345   llvm_unreachable("Implement any new match types added!");
4346 }
4347 
4348 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4349   int64_t Tmp = -1;
4350   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4351     return true;
4352   }
4353   if (getParser().parseAbsoluteExpression(Tmp)) {
4354     return true;
4355   }
4356   Ret = static_cast<uint32_t>(Tmp);
4357   return false;
4358 }
4359 
4360 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4361                                                uint32_t &Minor) {
4362   if (ParseAsAbsoluteExpression(Major))
4363     return TokError("invalid major version");
4364 
4365   if (!trySkipToken(AsmToken::Comma))
4366     return TokError("minor version number required, comma expected");
4367 
4368   if (ParseAsAbsoluteExpression(Minor))
4369     return TokError("invalid minor version");
4370 
4371   return false;
4372 }
4373 
4374 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4375   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4376     return TokError("directive only supported for amdgcn architecture");
4377 
4378   std::string Target;
4379 
4380   SMLoc TargetStart = getLoc();
4381   if (getParser().parseEscapedString(Target))
4382     return true;
4383   SMRange TargetRange = SMRange(TargetStart, getLoc());
4384 
4385   std::string ExpectedTarget;
4386   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
4387   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
4388 
4389   if (Target != ExpectedTargetOS.str())
4390     return Error(TargetRange.Start, "target must match options", TargetRange);
4391 
4392   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
4393   return false;
4394 }
4395 
4396 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4397   return Error(Range.Start, "value out of range", Range);
4398 }
4399 
4400 bool AMDGPUAsmParser::calculateGPRBlocks(
4401     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4402     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4403     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4404     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4405   // TODO(scott.linder): These calculations are duplicated from
4406   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4407   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4408 
4409   unsigned NumVGPRs = NextFreeVGPR;
4410   unsigned NumSGPRs = NextFreeSGPR;
4411 
4412   if (Version.Major >= 10)
4413     NumSGPRs = 0;
4414   else {
4415     unsigned MaxAddressableNumSGPRs =
4416         IsaInfo::getAddressableNumSGPRs(&getSTI());
4417 
4418     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4419         NumSGPRs > MaxAddressableNumSGPRs)
4420       return OutOfRangeError(SGPRRange);
4421 
4422     NumSGPRs +=
4423         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4424 
4425     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4426         NumSGPRs > MaxAddressableNumSGPRs)
4427       return OutOfRangeError(SGPRRange);
4428 
4429     if (Features.test(FeatureSGPRInitBug))
4430       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4431   }
4432 
4433   VGPRBlocks =
4434       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4435   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4436 
4437   return false;
4438 }
4439 
4440 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4441   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4442     return TokError("directive only supported for amdgcn architecture");
4443 
4444   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4445     return TokError("directive only supported for amdhsa OS");
4446 
4447   StringRef KernelName;
4448   if (getParser().parseIdentifier(KernelName))
4449     return true;
4450 
4451   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4452 
4453   StringSet<> Seen;
4454 
4455   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4456 
4457   SMRange VGPRRange;
4458   uint64_t NextFreeVGPR = 0;
4459   uint64_t AccumOffset = 0;
4460   SMRange SGPRRange;
4461   uint64_t NextFreeSGPR = 0;
4462   unsigned UserSGPRCount = 0;
4463   bool ReserveVCC = true;
4464   bool ReserveFlatScr = true;
4465   bool ReserveXNACK = hasXNACK();
4466   Optional<bool> EnableWavefrontSize32;
4467 
4468   while (true) {
4469     while (trySkipToken(AsmToken::EndOfStatement));
4470 
4471     StringRef ID;
4472     SMRange IDRange = getTok().getLocRange();
4473     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4474       return true;
4475 
4476     if (ID == ".end_amdhsa_kernel")
4477       break;
4478 
4479     if (Seen.find(ID) != Seen.end())
4480       return TokError(".amdhsa_ directives cannot be repeated");
4481     Seen.insert(ID);
4482 
4483     SMLoc ValStart = getLoc();
4484     int64_t IVal;
4485     if (getParser().parseAbsoluteExpression(IVal))
4486       return true;
4487     SMLoc ValEnd = getLoc();
4488     SMRange ValRange = SMRange(ValStart, ValEnd);
4489 
4490     if (IVal < 0)
4491       return OutOfRangeError(ValRange);
4492 
4493     uint64_t Val = IVal;
4494 
4495 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4496   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4497     return OutOfRangeError(RANGE);                                             \
4498   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4499 
4500     if (ID == ".amdhsa_group_segment_fixed_size") {
4501       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4502         return OutOfRangeError(ValRange);
4503       KD.group_segment_fixed_size = Val;
4504     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4505       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4506         return OutOfRangeError(ValRange);
4507       KD.private_segment_fixed_size = Val;
4508     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4509       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4510                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4511                        Val, ValRange);
4512       if (Val)
4513         UserSGPRCount += 4;
4514     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4515       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4516                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4517                        ValRange);
4518       if (Val)
4519         UserSGPRCount += 2;
4520     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4521       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4522                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4523                        ValRange);
4524       if (Val)
4525         UserSGPRCount += 2;
4526     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4527       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4528                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4529                        Val, ValRange);
4530       if (Val)
4531         UserSGPRCount += 2;
4532     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4533       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4534                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4535                        ValRange);
4536       if (Val)
4537         UserSGPRCount += 2;
4538     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4539       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4540                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4541                        ValRange);
4542       if (Val)
4543         UserSGPRCount += 2;
4544     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4545       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4546                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4547                        Val, ValRange);
4548       if (Val)
4549         UserSGPRCount += 1;
4550     } else if (ID == ".amdhsa_wavefront_size32") {
4551       if (IVersion.Major < 10)
4552         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4553       EnableWavefrontSize32 = Val;
4554       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4555                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4556                        Val, ValRange);
4557     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4558       PARSE_BITS_ENTRY(
4559           KD.compute_pgm_rsrc2,
4560           COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,
4561           ValRange);
4562     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4563       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4564                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4565                        ValRange);
4566     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4567       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4568                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4569                        ValRange);
4570     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4571       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4572                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4573                        ValRange);
4574     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4575       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4576                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4577                        ValRange);
4578     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4579       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4580                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4581                        ValRange);
4582     } else if (ID == ".amdhsa_next_free_vgpr") {
4583       VGPRRange = ValRange;
4584       NextFreeVGPR = Val;
4585     } else if (ID == ".amdhsa_next_free_sgpr") {
4586       SGPRRange = ValRange;
4587       NextFreeSGPR = Val;
4588     } else if (ID == ".amdhsa_accum_offset") {
4589       if (!isGFX90A())
4590         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4591       AccumOffset = Val;
4592     } else if (ID == ".amdhsa_reserve_vcc") {
4593       if (!isUInt<1>(Val))
4594         return OutOfRangeError(ValRange);
4595       ReserveVCC = Val;
4596     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4597       if (IVersion.Major < 7)
4598         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4599       if (!isUInt<1>(Val))
4600         return OutOfRangeError(ValRange);
4601       ReserveFlatScr = Val;
4602     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4603       if (IVersion.Major < 8)
4604         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4605       if (!isUInt<1>(Val))
4606         return OutOfRangeError(ValRange);
4607       ReserveXNACK = Val;
4608     } else if (ID == ".amdhsa_float_round_mode_32") {
4609       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4610                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4611     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4612       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4613                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4614     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4615       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4616                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4617     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4618       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4619                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4620                        ValRange);
4621     } else if (ID == ".amdhsa_dx10_clamp") {
4622       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4623                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4624     } else if (ID == ".amdhsa_ieee_mode") {
4625       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4626                        Val, ValRange);
4627     } else if (ID == ".amdhsa_fp16_overflow") {
4628       if (IVersion.Major < 9)
4629         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4630       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4631                        ValRange);
4632     } else if (ID == ".amdhsa_tg_split") {
4633       if (!isGFX90A())
4634         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4635       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4636                        ValRange);
4637     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4638       if (IVersion.Major < 10)
4639         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4640       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4641                        ValRange);
4642     } else if (ID == ".amdhsa_memory_ordered") {
4643       if (IVersion.Major < 10)
4644         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4645       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4646                        ValRange);
4647     } else if (ID == ".amdhsa_forward_progress") {
4648       if (IVersion.Major < 10)
4649         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4650       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4651                        ValRange);
4652     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4653       PARSE_BITS_ENTRY(
4654           KD.compute_pgm_rsrc2,
4655           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4656           ValRange);
4657     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4658       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4659                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4660                        Val, ValRange);
4661     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4662       PARSE_BITS_ENTRY(
4663           KD.compute_pgm_rsrc2,
4664           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4665           ValRange);
4666     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4667       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4668                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4669                        Val, ValRange);
4670     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4671       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4672                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4673                        Val, ValRange);
4674     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4675       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4676                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4677                        Val, ValRange);
4678     } else if (ID == ".amdhsa_exception_int_div_zero") {
4679       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4680                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4681                        Val, ValRange);
4682     } else {
4683       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4684     }
4685 
4686 #undef PARSE_BITS_ENTRY
4687   }
4688 
4689   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4690     return TokError(".amdhsa_next_free_vgpr directive is required");
4691 
4692   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4693     return TokError(".amdhsa_next_free_sgpr directive is required");
4694 
4695   unsigned VGPRBlocks;
4696   unsigned SGPRBlocks;
4697   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4698                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4699                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4700                          SGPRBlocks))
4701     return true;
4702 
4703   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4704           VGPRBlocks))
4705     return OutOfRangeError(VGPRRange);
4706   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4707                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4708 
4709   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4710           SGPRBlocks))
4711     return OutOfRangeError(SGPRRange);
4712   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4713                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4714                   SGPRBlocks);
4715 
4716   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4717     return TokError("too many user SGPRs enabled");
4718   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4719                   UserSGPRCount);
4720 
4721   if (isGFX90A()) {
4722     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4723       return TokError(".amdhsa_accum_offset directive is required");
4724     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4725       return TokError("accum_offset should be in range [4..256] in "
4726                       "increments of 4");
4727     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4728       return TokError("accum_offset exceeds total VGPR allocation");
4729     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4730                     (AccumOffset / 4 - 1));
4731   }
4732 
4733   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4734       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4735       ReserveFlatScr, ReserveXNACK);
4736   return false;
4737 }
4738 
4739 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4740   uint32_t Major;
4741   uint32_t Minor;
4742 
4743   if (ParseDirectiveMajorMinor(Major, Minor))
4744     return true;
4745 
4746   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4747   return false;
4748 }
4749 
4750 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4751   uint32_t Major;
4752   uint32_t Minor;
4753   uint32_t Stepping;
4754   StringRef VendorName;
4755   StringRef ArchName;
4756 
4757   // If this directive has no arguments, then use the ISA version for the
4758   // targeted GPU.
4759   if (isToken(AsmToken::EndOfStatement)) {
4760     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4761     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4762                                                       ISA.Stepping,
4763                                                       "AMD", "AMDGPU");
4764     return false;
4765   }
4766 
4767   if (ParseDirectiveMajorMinor(Major, Minor))
4768     return true;
4769 
4770   if (!trySkipToken(AsmToken::Comma))
4771     return TokError("stepping version number required, comma expected");
4772 
4773   if (ParseAsAbsoluteExpression(Stepping))
4774     return TokError("invalid stepping version");
4775 
4776   if (!trySkipToken(AsmToken::Comma))
4777     return TokError("vendor name required, comma expected");
4778 
4779   if (!parseString(VendorName, "invalid vendor name"))
4780     return true;
4781 
4782   if (!trySkipToken(AsmToken::Comma))
4783     return TokError("arch name required, comma expected");
4784 
4785   if (!parseString(ArchName, "invalid arch name"))
4786     return true;
4787 
4788   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4789                                                     VendorName, ArchName);
4790   return false;
4791 }
4792 
4793 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4794                                                amd_kernel_code_t &Header) {
4795   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4796   // assembly for backwards compatibility.
4797   if (ID == "max_scratch_backing_memory_byte_size") {
4798     Parser.eatToEndOfStatement();
4799     return false;
4800   }
4801 
4802   SmallString<40> ErrStr;
4803   raw_svector_ostream Err(ErrStr);
4804   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4805     return TokError(Err.str());
4806   }
4807   Lex();
4808 
4809   if (ID == "enable_wavefront_size32") {
4810     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4811       if (!isGFX10Plus())
4812         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4813       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4814         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4815     } else {
4816       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4817         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4818     }
4819   }
4820 
4821   if (ID == "wavefront_size") {
4822     if (Header.wavefront_size == 5) {
4823       if (!isGFX10Plus())
4824         return TokError("wavefront_size=5 is only allowed on GFX10+");
4825       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4826         return TokError("wavefront_size=5 requires +WavefrontSize32");
4827     } else if (Header.wavefront_size == 6) {
4828       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4829         return TokError("wavefront_size=6 requires +WavefrontSize64");
4830     }
4831   }
4832 
4833   if (ID == "enable_wgp_mode") {
4834     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4835         !isGFX10Plus())
4836       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4837   }
4838 
4839   if (ID == "enable_mem_ordered") {
4840     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4841         !isGFX10Plus())
4842       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4843   }
4844 
4845   if (ID == "enable_fwd_progress") {
4846     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4847         !isGFX10Plus())
4848       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4849   }
4850 
4851   return false;
4852 }
4853 
4854 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4855   amd_kernel_code_t Header;
4856   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4857 
4858   while (true) {
4859     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4860     // will set the current token to EndOfStatement.
4861     while(trySkipToken(AsmToken::EndOfStatement));
4862 
4863     StringRef ID;
4864     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4865       return true;
4866 
4867     if (ID == ".end_amd_kernel_code_t")
4868       break;
4869 
4870     if (ParseAMDKernelCodeTValue(ID, Header))
4871       return true;
4872   }
4873 
4874   getTargetStreamer().EmitAMDKernelCodeT(Header);
4875 
4876   return false;
4877 }
4878 
4879 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4880   StringRef KernelName;
4881   if (!parseId(KernelName, "expected symbol name"))
4882     return true;
4883 
4884   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4885                                            ELF::STT_AMDGPU_HSA_KERNEL);
4886 
4887   KernelScope.initialize(getContext());
4888   return false;
4889 }
4890 
4891 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4892   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4893     return Error(getLoc(),
4894                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4895                  "architectures");
4896   }
4897 
4898   auto ISAVersionStringFromASM = getToken().getStringContents();
4899 
4900   std::string ISAVersionStringFromSTI;
4901   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4902   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4903 
4904   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4905     return Error(getLoc(),
4906                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4907                  "arguments specified through the command line");
4908   }
4909 
4910   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4911   Lex();
4912 
4913   return false;
4914 }
4915 
4916 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4917   const char *AssemblerDirectiveBegin;
4918   const char *AssemblerDirectiveEnd;
4919   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4920       isHsaAbiVersion3(&getSTI())
4921           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4922                             HSAMD::V3::AssemblerDirectiveEnd)
4923           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4924                             HSAMD::AssemblerDirectiveEnd);
4925 
4926   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4927     return Error(getLoc(),
4928                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4929                  "not available on non-amdhsa OSes")).str());
4930   }
4931 
4932   std::string HSAMetadataString;
4933   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4934                           HSAMetadataString))
4935     return true;
4936 
4937   if (isHsaAbiVersion3(&getSTI())) {
4938     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4939       return Error(getLoc(), "invalid HSA metadata");
4940   } else {
4941     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4942       return Error(getLoc(), "invalid HSA metadata");
4943   }
4944 
4945   return false;
4946 }
4947 
4948 /// Common code to parse out a block of text (typically YAML) between start and
4949 /// end directives.
4950 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4951                                           const char *AssemblerDirectiveEnd,
4952                                           std::string &CollectString) {
4953 
4954   raw_string_ostream CollectStream(CollectString);
4955 
4956   getLexer().setSkipSpace(false);
4957 
4958   bool FoundEnd = false;
4959   while (!isToken(AsmToken::Eof)) {
4960     while (isToken(AsmToken::Space)) {
4961       CollectStream << getTokenStr();
4962       Lex();
4963     }
4964 
4965     if (trySkipId(AssemblerDirectiveEnd)) {
4966       FoundEnd = true;
4967       break;
4968     }
4969 
4970     CollectStream << Parser.parseStringToEndOfStatement()
4971                   << getContext().getAsmInfo()->getSeparatorString();
4972 
4973     Parser.eatToEndOfStatement();
4974   }
4975 
4976   getLexer().setSkipSpace(true);
4977 
4978   if (isToken(AsmToken::Eof) && !FoundEnd) {
4979     return TokError(Twine("expected directive ") +
4980                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4981   }
4982 
4983   CollectStream.flush();
4984   return false;
4985 }
4986 
4987 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4988 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4989   std::string String;
4990   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4991                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4992     return true;
4993 
4994   auto PALMetadata = getTargetStreamer().getPALMetadata();
4995   if (!PALMetadata->setFromString(String))
4996     return Error(getLoc(), "invalid PAL metadata");
4997   return false;
4998 }
4999 
5000 /// Parse the assembler directive for old linear-format PAL metadata.
5001 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5002   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5003     return Error(getLoc(),
5004                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5005                  "not available on non-amdpal OSes")).str());
5006   }
5007 
5008   auto PALMetadata = getTargetStreamer().getPALMetadata();
5009   PALMetadata->setLegacy();
5010   for (;;) {
5011     uint32_t Key, Value;
5012     if (ParseAsAbsoluteExpression(Key)) {
5013       return TokError(Twine("invalid value in ") +
5014                       Twine(PALMD::AssemblerDirective));
5015     }
5016     if (!trySkipToken(AsmToken::Comma)) {
5017       return TokError(Twine("expected an even number of values in ") +
5018                       Twine(PALMD::AssemblerDirective));
5019     }
5020     if (ParseAsAbsoluteExpression(Value)) {
5021       return TokError(Twine("invalid value in ") +
5022                       Twine(PALMD::AssemblerDirective));
5023     }
5024     PALMetadata->setRegister(Key, Value);
5025     if (!trySkipToken(AsmToken::Comma))
5026       break;
5027   }
5028   return false;
5029 }
5030 
5031 /// ParseDirectiveAMDGPULDS
5032 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5033 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5034   if (getParser().checkForValidSection())
5035     return true;
5036 
5037   StringRef Name;
5038   SMLoc NameLoc = getLoc();
5039   if (getParser().parseIdentifier(Name))
5040     return TokError("expected identifier in directive");
5041 
5042   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5043   if (parseToken(AsmToken::Comma, "expected ','"))
5044     return true;
5045 
5046   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5047 
5048   int64_t Size;
5049   SMLoc SizeLoc = getLoc();
5050   if (getParser().parseAbsoluteExpression(Size))
5051     return true;
5052   if (Size < 0)
5053     return Error(SizeLoc, "size must be non-negative");
5054   if (Size > LocalMemorySize)
5055     return Error(SizeLoc, "size is too large");
5056 
5057   int64_t Alignment = 4;
5058   if (trySkipToken(AsmToken::Comma)) {
5059     SMLoc AlignLoc = getLoc();
5060     if (getParser().parseAbsoluteExpression(Alignment))
5061       return true;
5062     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5063       return Error(AlignLoc, "alignment must be a power of two");
5064 
5065     // Alignment larger than the size of LDS is possible in theory, as long
5066     // as the linker manages to place to symbol at address 0, but we do want
5067     // to make sure the alignment fits nicely into a 32-bit integer.
5068     if (Alignment >= 1u << 31)
5069       return Error(AlignLoc, "alignment is too large");
5070   }
5071 
5072   if (parseToken(AsmToken::EndOfStatement,
5073                  "unexpected token in '.amdgpu_lds' directive"))
5074     return true;
5075 
5076   Symbol->redefineIfPossible();
5077   if (!Symbol->isUndefined())
5078     return Error(NameLoc, "invalid symbol redefinition");
5079 
5080   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5081   return false;
5082 }
5083 
5084 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5085   StringRef IDVal = DirectiveID.getString();
5086 
5087   if (isHsaAbiVersion3(&getSTI())) {
5088     if (IDVal == ".amdgcn_target")
5089       return ParseDirectiveAMDGCNTarget();
5090 
5091     if (IDVal == ".amdhsa_kernel")
5092       return ParseDirectiveAMDHSAKernel();
5093 
5094     // TODO: Restructure/combine with PAL metadata directive.
5095     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5096       return ParseDirectiveHSAMetadata();
5097   } else {
5098     if (IDVal == ".hsa_code_object_version")
5099       return ParseDirectiveHSACodeObjectVersion();
5100 
5101     if (IDVal == ".hsa_code_object_isa")
5102       return ParseDirectiveHSACodeObjectISA();
5103 
5104     if (IDVal == ".amd_kernel_code_t")
5105       return ParseDirectiveAMDKernelCodeT();
5106 
5107     if (IDVal == ".amdgpu_hsa_kernel")
5108       return ParseDirectiveAMDGPUHsaKernel();
5109 
5110     if (IDVal == ".amd_amdgpu_isa")
5111       return ParseDirectiveISAVersion();
5112 
5113     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5114       return ParseDirectiveHSAMetadata();
5115   }
5116 
5117   if (IDVal == ".amdgpu_lds")
5118     return ParseDirectiveAMDGPULDS();
5119 
5120   if (IDVal == PALMD::AssemblerDirectiveBegin)
5121     return ParseDirectivePALMetadataBegin();
5122 
5123   if (IDVal == PALMD::AssemblerDirective)
5124     return ParseDirectivePALMetadata();
5125 
5126   return true;
5127 }
5128 
5129 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5130                                            unsigned RegNo) const {
5131 
5132   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5133        R.isValid(); ++R) {
5134     if (*R == RegNo)
5135       return isGFX9Plus();
5136   }
5137 
5138   // GFX10 has 2 more SGPRs 104 and 105.
5139   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5140        R.isValid(); ++R) {
5141     if (*R == RegNo)
5142       return hasSGPR104_SGPR105();
5143   }
5144 
5145   switch (RegNo) {
5146   case AMDGPU::SRC_SHARED_BASE:
5147   case AMDGPU::SRC_SHARED_LIMIT:
5148   case AMDGPU::SRC_PRIVATE_BASE:
5149   case AMDGPU::SRC_PRIVATE_LIMIT:
5150   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5151     return isGFX9Plus();
5152   case AMDGPU::TBA:
5153   case AMDGPU::TBA_LO:
5154   case AMDGPU::TBA_HI:
5155   case AMDGPU::TMA:
5156   case AMDGPU::TMA_LO:
5157   case AMDGPU::TMA_HI:
5158     return !isGFX9Plus();
5159   case AMDGPU::XNACK_MASK:
5160   case AMDGPU::XNACK_MASK_LO:
5161   case AMDGPU::XNACK_MASK_HI:
5162     return (isVI() || isGFX9()) && hasXNACK();
5163   case AMDGPU::SGPR_NULL:
5164     return isGFX10Plus();
5165   default:
5166     break;
5167   }
5168 
5169   if (isCI())
5170     return true;
5171 
5172   if (isSI() || isGFX10Plus()) {
5173     // No flat_scr on SI.
5174     // On GFX10 flat scratch is not a valid register operand and can only be
5175     // accessed with s_setreg/s_getreg.
5176     switch (RegNo) {
5177     case AMDGPU::FLAT_SCR:
5178     case AMDGPU::FLAT_SCR_LO:
5179     case AMDGPU::FLAT_SCR_HI:
5180       return false;
5181     default:
5182       return true;
5183     }
5184   }
5185 
5186   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5187   // SI/CI have.
5188   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5189        R.isValid(); ++R) {
5190     if (*R == RegNo)
5191       return hasSGPR102_SGPR103();
5192   }
5193 
5194   return true;
5195 }
5196 
5197 OperandMatchResultTy
5198 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5199                               OperandMode Mode) {
5200   // Try to parse with a custom parser
5201   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5202 
5203   // If we successfully parsed the operand or if there as an error parsing,
5204   // we are done.
5205   //
5206   // If we are parsing after we reach EndOfStatement then this means we
5207   // are appending default values to the Operands list.  This is only done
5208   // by custom parser, so we shouldn't continue on to the generic parsing.
5209   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5210       isToken(AsmToken::EndOfStatement))
5211     return ResTy;
5212 
5213   SMLoc RBraceLoc;
5214   SMLoc LBraceLoc = getLoc();
5215   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5216     unsigned Prefix = Operands.size();
5217 
5218     for (;;) {
5219       auto Loc = getLoc();
5220       ResTy = parseReg(Operands);
5221       if (ResTy == MatchOperand_NoMatch)
5222         Error(Loc, "expected a register");
5223       if (ResTy != MatchOperand_Success)
5224         return MatchOperand_ParseFail;
5225 
5226       RBraceLoc = getLoc();
5227       if (trySkipToken(AsmToken::RBrac))
5228         break;
5229 
5230       if (!skipToken(AsmToken::Comma,
5231                      "expected a comma or a closing square bracket")) {
5232         return MatchOperand_ParseFail;
5233       }
5234     }
5235 
5236     if (Operands.size() - Prefix > 1) {
5237       Operands.insert(Operands.begin() + Prefix,
5238                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5239       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5240     }
5241 
5242     return MatchOperand_Success;
5243   }
5244 
5245   return parseRegOrImm(Operands);
5246 }
5247 
5248 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5249   // Clear any forced encodings from the previous instruction.
5250   setForcedEncodingSize(0);
5251   setForcedDPP(false);
5252   setForcedSDWA(false);
5253 
5254   if (Name.endswith("_e64")) {
5255     setForcedEncodingSize(64);
5256     return Name.substr(0, Name.size() - 4);
5257   } else if (Name.endswith("_e32")) {
5258     setForcedEncodingSize(32);
5259     return Name.substr(0, Name.size() - 4);
5260   } else if (Name.endswith("_dpp")) {
5261     setForcedDPP(true);
5262     return Name.substr(0, Name.size() - 4);
5263   } else if (Name.endswith("_sdwa")) {
5264     setForcedSDWA(true);
5265     return Name.substr(0, Name.size() - 5);
5266   }
5267   return Name;
5268 }
5269 
5270 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5271                                        StringRef Name,
5272                                        SMLoc NameLoc, OperandVector &Operands) {
5273   // Add the instruction mnemonic
5274   Name = parseMnemonicSuffix(Name);
5275   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5276 
5277   bool IsMIMG = Name.startswith("image_");
5278 
5279   while (!trySkipToken(AsmToken::EndOfStatement)) {
5280     OperandMode Mode = OperandMode_Default;
5281     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5282       Mode = OperandMode_NSA;
5283     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5284 
5285     if (Res != MatchOperand_Success) {
5286       checkUnsupportedInstruction(Name, NameLoc);
5287       if (!Parser.hasPendingError()) {
5288         // FIXME: use real operand location rather than the current location.
5289         StringRef Msg =
5290           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5291                                             "not a valid operand.";
5292         Error(getLoc(), Msg);
5293       }
5294       while (!trySkipToken(AsmToken::EndOfStatement)) {
5295         lex();
5296       }
5297       return true;
5298     }
5299 
5300     // Eat the comma or space if there is one.
5301     trySkipToken(AsmToken::Comma);
5302   }
5303 
5304   return false;
5305 }
5306 
5307 //===----------------------------------------------------------------------===//
5308 // Utility functions
5309 //===----------------------------------------------------------------------===//
5310 
5311 OperandMatchResultTy
5312 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5313 
5314   if (!trySkipId(Prefix, AsmToken::Colon))
5315     return MatchOperand_NoMatch;
5316 
5317   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5318 }
5319 
5320 OperandMatchResultTy
5321 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5322                                     AMDGPUOperand::ImmTy ImmTy,
5323                                     bool (*ConvertResult)(int64_t&)) {
5324   SMLoc S = getLoc();
5325   int64_t Value = 0;
5326 
5327   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5328   if (Res != MatchOperand_Success)
5329     return Res;
5330 
5331   if (ConvertResult && !ConvertResult(Value)) {
5332     Error(S, "invalid " + StringRef(Prefix) + " value.");
5333   }
5334 
5335   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5336   return MatchOperand_Success;
5337 }
5338 
5339 OperandMatchResultTy
5340 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5341                                              OperandVector &Operands,
5342                                              AMDGPUOperand::ImmTy ImmTy,
5343                                              bool (*ConvertResult)(int64_t&)) {
5344   SMLoc S = getLoc();
5345   if (!trySkipId(Prefix, AsmToken::Colon))
5346     return MatchOperand_NoMatch;
5347 
5348   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5349     return MatchOperand_ParseFail;
5350 
5351   unsigned Val = 0;
5352   const unsigned MaxSize = 4;
5353 
5354   // FIXME: How to verify the number of elements matches the number of src
5355   // operands?
5356   for (int I = 0; ; ++I) {
5357     int64_t Op;
5358     SMLoc Loc = getLoc();
5359     if (!parseExpr(Op))
5360       return MatchOperand_ParseFail;
5361 
5362     if (Op != 0 && Op != 1) {
5363       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5364       return MatchOperand_ParseFail;
5365     }
5366 
5367     Val |= (Op << I);
5368 
5369     if (trySkipToken(AsmToken::RBrac))
5370       break;
5371 
5372     if (I + 1 == MaxSize) {
5373       Error(getLoc(), "expected a closing square bracket");
5374       return MatchOperand_ParseFail;
5375     }
5376 
5377     if (!skipToken(AsmToken::Comma, "expected a comma"))
5378       return MatchOperand_ParseFail;
5379   }
5380 
5381   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5382   return MatchOperand_Success;
5383 }
5384 
5385 OperandMatchResultTy
5386 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5387                                AMDGPUOperand::ImmTy ImmTy) {
5388   int64_t Bit;
5389   SMLoc S = getLoc();
5390 
5391   if (trySkipId(Name)) {
5392     Bit = 1;
5393   } else if (trySkipId("no", Name)) {
5394     Bit = 0;
5395   } else {
5396     return MatchOperand_NoMatch;
5397   }
5398 
5399   if (Name == "r128" && !hasMIMG_R128()) {
5400     Error(S, "r128 modifier is not supported on this GPU");
5401     return MatchOperand_ParseFail;
5402   }
5403   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5404     Error(S, "a16 modifier is not supported on this GPU");
5405     return MatchOperand_ParseFail;
5406   }
5407   if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC) {
5408     Error(S, "dlc modifier is not supported on this GPU");
5409     return MatchOperand_ParseFail;
5410   }
5411   if (!isGFX90A() && ImmTy == AMDGPUOperand::ImmTySCCB)
5412     return MatchOperand_ParseFail;
5413 
5414   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5415     ImmTy = AMDGPUOperand::ImmTyR128A16;
5416 
5417   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5418   return MatchOperand_Success;
5419 }
5420 
5421 static void addOptionalImmOperand(
5422   MCInst& Inst, const OperandVector& Operands,
5423   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5424   AMDGPUOperand::ImmTy ImmT,
5425   int64_t Default = 0) {
5426   auto i = OptionalIdx.find(ImmT);
5427   if (i != OptionalIdx.end()) {
5428     unsigned Idx = i->second;
5429     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5430   } else {
5431     Inst.addOperand(MCOperand::createImm(Default));
5432   }
5433 }
5434 
5435 OperandMatchResultTy
5436 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5437                                        StringRef &Value,
5438                                        SMLoc &StringLoc) {
5439   if (!trySkipId(Prefix, AsmToken::Colon))
5440     return MatchOperand_NoMatch;
5441 
5442   StringLoc = getLoc();
5443   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5444                                                   : MatchOperand_ParseFail;
5445 }
5446 
5447 //===----------------------------------------------------------------------===//
5448 // MTBUF format
5449 //===----------------------------------------------------------------------===//
5450 
5451 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5452                                   int64_t MaxVal,
5453                                   int64_t &Fmt) {
5454   int64_t Val;
5455   SMLoc Loc = getLoc();
5456 
5457   auto Res = parseIntWithPrefix(Pref, Val);
5458   if (Res == MatchOperand_ParseFail)
5459     return false;
5460   if (Res == MatchOperand_NoMatch)
5461     return true;
5462 
5463   if (Val < 0 || Val > MaxVal) {
5464     Error(Loc, Twine("out of range ", StringRef(Pref)));
5465     return false;
5466   }
5467 
5468   Fmt = Val;
5469   return true;
5470 }
5471 
5472 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5473 // values to live in a joint format operand in the MCInst encoding.
5474 OperandMatchResultTy
5475 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5476   using namespace llvm::AMDGPU::MTBUFFormat;
5477 
5478   int64_t Dfmt = DFMT_UNDEF;
5479   int64_t Nfmt = NFMT_UNDEF;
5480 
5481   // dfmt and nfmt can appear in either order, and each is optional.
5482   for (int I = 0; I < 2; ++I) {
5483     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5484       return MatchOperand_ParseFail;
5485 
5486     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5487       return MatchOperand_ParseFail;
5488     }
5489     // Skip optional comma between dfmt/nfmt
5490     // but guard against 2 commas following each other.
5491     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5492         !peekToken().is(AsmToken::Comma)) {
5493       trySkipToken(AsmToken::Comma);
5494     }
5495   }
5496 
5497   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5498     return MatchOperand_NoMatch;
5499 
5500   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5501   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5502 
5503   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5504   return MatchOperand_Success;
5505 }
5506 
5507 OperandMatchResultTy
5508 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5509   using namespace llvm::AMDGPU::MTBUFFormat;
5510 
5511   int64_t Fmt = UFMT_UNDEF;
5512 
5513   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5514     return MatchOperand_ParseFail;
5515 
5516   if (Fmt == UFMT_UNDEF)
5517     return MatchOperand_NoMatch;
5518 
5519   Format = Fmt;
5520   return MatchOperand_Success;
5521 }
5522 
5523 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5524                                     int64_t &Nfmt,
5525                                     StringRef FormatStr,
5526                                     SMLoc Loc) {
5527   using namespace llvm::AMDGPU::MTBUFFormat;
5528   int64_t Format;
5529 
5530   Format = getDfmt(FormatStr);
5531   if (Format != DFMT_UNDEF) {
5532     Dfmt = Format;
5533     return true;
5534   }
5535 
5536   Format = getNfmt(FormatStr, getSTI());
5537   if (Format != NFMT_UNDEF) {
5538     Nfmt = Format;
5539     return true;
5540   }
5541 
5542   Error(Loc, "unsupported format");
5543   return false;
5544 }
5545 
5546 OperandMatchResultTy
5547 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5548                                           SMLoc FormatLoc,
5549                                           int64_t &Format) {
5550   using namespace llvm::AMDGPU::MTBUFFormat;
5551 
5552   int64_t Dfmt = DFMT_UNDEF;
5553   int64_t Nfmt = NFMT_UNDEF;
5554   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5555     return MatchOperand_ParseFail;
5556 
5557   if (trySkipToken(AsmToken::Comma)) {
5558     StringRef Str;
5559     SMLoc Loc = getLoc();
5560     if (!parseId(Str, "expected a format string") ||
5561         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5562       return MatchOperand_ParseFail;
5563     }
5564     if (Dfmt == DFMT_UNDEF) {
5565       Error(Loc, "duplicate numeric format");
5566       return MatchOperand_ParseFail;
5567     } else if (Nfmt == NFMT_UNDEF) {
5568       Error(Loc, "duplicate data format");
5569       return MatchOperand_ParseFail;
5570     }
5571   }
5572 
5573   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5574   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5575 
5576   if (isGFX10Plus()) {
5577     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5578     if (Ufmt == UFMT_UNDEF) {
5579       Error(FormatLoc, "unsupported format");
5580       return MatchOperand_ParseFail;
5581     }
5582     Format = Ufmt;
5583   } else {
5584     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5585   }
5586 
5587   return MatchOperand_Success;
5588 }
5589 
5590 OperandMatchResultTy
5591 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5592                                             SMLoc Loc,
5593                                             int64_t &Format) {
5594   using namespace llvm::AMDGPU::MTBUFFormat;
5595 
5596   auto Id = getUnifiedFormat(FormatStr);
5597   if (Id == UFMT_UNDEF)
5598     return MatchOperand_NoMatch;
5599 
5600   if (!isGFX10Plus()) {
5601     Error(Loc, "unified format is not supported on this GPU");
5602     return MatchOperand_ParseFail;
5603   }
5604 
5605   Format = Id;
5606   return MatchOperand_Success;
5607 }
5608 
5609 OperandMatchResultTy
5610 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5611   using namespace llvm::AMDGPU::MTBUFFormat;
5612   SMLoc Loc = getLoc();
5613 
5614   if (!parseExpr(Format))
5615     return MatchOperand_ParseFail;
5616   if (!isValidFormatEncoding(Format, getSTI())) {
5617     Error(Loc, "out of range format");
5618     return MatchOperand_ParseFail;
5619   }
5620 
5621   return MatchOperand_Success;
5622 }
5623 
5624 OperandMatchResultTy
5625 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5626   using namespace llvm::AMDGPU::MTBUFFormat;
5627 
5628   if (!trySkipId("format", AsmToken::Colon))
5629     return MatchOperand_NoMatch;
5630 
5631   if (trySkipToken(AsmToken::LBrac)) {
5632     StringRef FormatStr;
5633     SMLoc Loc = getLoc();
5634     if (!parseId(FormatStr, "expected a format string"))
5635       return MatchOperand_ParseFail;
5636 
5637     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5638     if (Res == MatchOperand_NoMatch)
5639       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5640     if (Res != MatchOperand_Success)
5641       return Res;
5642 
5643     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5644       return MatchOperand_ParseFail;
5645 
5646     return MatchOperand_Success;
5647   }
5648 
5649   return parseNumericFormat(Format);
5650 }
5651 
5652 OperandMatchResultTy
5653 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5654   using namespace llvm::AMDGPU::MTBUFFormat;
5655 
5656   int64_t Format = getDefaultFormatEncoding(getSTI());
5657   OperandMatchResultTy Res;
5658   SMLoc Loc = getLoc();
5659 
5660   // Parse legacy format syntax.
5661   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5662   if (Res == MatchOperand_ParseFail)
5663     return Res;
5664 
5665   bool FormatFound = (Res == MatchOperand_Success);
5666 
5667   Operands.push_back(
5668     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5669 
5670   if (FormatFound)
5671     trySkipToken(AsmToken::Comma);
5672 
5673   if (isToken(AsmToken::EndOfStatement)) {
5674     // We are expecting an soffset operand,
5675     // but let matcher handle the error.
5676     return MatchOperand_Success;
5677   }
5678 
5679   // Parse soffset.
5680   Res = parseRegOrImm(Operands);
5681   if (Res != MatchOperand_Success)
5682     return Res;
5683 
5684   trySkipToken(AsmToken::Comma);
5685 
5686   if (!FormatFound) {
5687     Res = parseSymbolicOrNumericFormat(Format);
5688     if (Res == MatchOperand_ParseFail)
5689       return Res;
5690     if (Res == MatchOperand_Success) {
5691       auto Size = Operands.size();
5692       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5693       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5694       Op.setImm(Format);
5695     }
5696     return MatchOperand_Success;
5697   }
5698 
5699   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5700     Error(getLoc(), "duplicate format");
5701     return MatchOperand_ParseFail;
5702   }
5703   return MatchOperand_Success;
5704 }
5705 
5706 //===----------------------------------------------------------------------===//
5707 // ds
5708 //===----------------------------------------------------------------------===//
5709 
5710 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5711                                     const OperandVector &Operands) {
5712   OptionalImmIndexMap OptionalIdx;
5713 
5714   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5715     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5716 
5717     // Add the register arguments
5718     if (Op.isReg()) {
5719       Op.addRegOperands(Inst, 1);
5720       continue;
5721     }
5722 
5723     // Handle optional arguments
5724     OptionalIdx[Op.getImmTy()] = i;
5725   }
5726 
5727   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5728   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5729   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5730 
5731   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5732 }
5733 
5734 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5735                                 bool IsGdsHardcoded) {
5736   OptionalImmIndexMap OptionalIdx;
5737 
5738   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5739     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5740 
5741     // Add the register arguments
5742     if (Op.isReg()) {
5743       Op.addRegOperands(Inst, 1);
5744       continue;
5745     }
5746 
5747     if (Op.isToken() && Op.getToken() == "gds") {
5748       IsGdsHardcoded = true;
5749       continue;
5750     }
5751 
5752     // Handle optional arguments
5753     OptionalIdx[Op.getImmTy()] = i;
5754   }
5755 
5756   AMDGPUOperand::ImmTy OffsetType =
5757     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5758      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5759      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5760                                                       AMDGPUOperand::ImmTyOffset;
5761 
5762   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5763 
5764   if (!IsGdsHardcoded) {
5765     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5766   }
5767   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5768 }
5769 
5770 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5771   OptionalImmIndexMap OptionalIdx;
5772 
5773   unsigned OperandIdx[4];
5774   unsigned EnMask = 0;
5775   int SrcIdx = 0;
5776 
5777   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5778     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5779 
5780     // Add the register arguments
5781     if (Op.isReg()) {
5782       assert(SrcIdx < 4);
5783       OperandIdx[SrcIdx] = Inst.size();
5784       Op.addRegOperands(Inst, 1);
5785       ++SrcIdx;
5786       continue;
5787     }
5788 
5789     if (Op.isOff()) {
5790       assert(SrcIdx < 4);
5791       OperandIdx[SrcIdx] = Inst.size();
5792       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5793       ++SrcIdx;
5794       continue;
5795     }
5796 
5797     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5798       Op.addImmOperands(Inst, 1);
5799       continue;
5800     }
5801 
5802     if (Op.isToken() && Op.getToken() == "done")
5803       continue;
5804 
5805     // Handle optional arguments
5806     OptionalIdx[Op.getImmTy()] = i;
5807   }
5808 
5809   assert(SrcIdx == 4);
5810 
5811   bool Compr = false;
5812   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5813     Compr = true;
5814     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5815     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5816     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5817   }
5818 
5819   for (auto i = 0; i < SrcIdx; ++i) {
5820     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5821       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5822     }
5823   }
5824 
5825   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5826   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5827 
5828   Inst.addOperand(MCOperand::createImm(EnMask));
5829 }
5830 
5831 //===----------------------------------------------------------------------===//
5832 // s_waitcnt
5833 //===----------------------------------------------------------------------===//
5834 
5835 static bool
5836 encodeCnt(
5837   const AMDGPU::IsaVersion ISA,
5838   int64_t &IntVal,
5839   int64_t CntVal,
5840   bool Saturate,
5841   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5842   unsigned (*decode)(const IsaVersion &Version, unsigned))
5843 {
5844   bool Failed = false;
5845 
5846   IntVal = encode(ISA, IntVal, CntVal);
5847   if (CntVal != decode(ISA, IntVal)) {
5848     if (Saturate) {
5849       IntVal = encode(ISA, IntVal, -1);
5850     } else {
5851       Failed = true;
5852     }
5853   }
5854   return Failed;
5855 }
5856 
5857 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5858 
5859   SMLoc CntLoc = getLoc();
5860   StringRef CntName = getTokenStr();
5861 
5862   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5863       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5864     return false;
5865 
5866   int64_t CntVal;
5867   SMLoc ValLoc = getLoc();
5868   if (!parseExpr(CntVal))
5869     return false;
5870 
5871   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5872 
5873   bool Failed = true;
5874   bool Sat = CntName.endswith("_sat");
5875 
5876   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5877     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5878   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5879     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5880   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5881     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5882   } else {
5883     Error(CntLoc, "invalid counter name " + CntName);
5884     return false;
5885   }
5886 
5887   if (Failed) {
5888     Error(ValLoc, "too large value for " + CntName);
5889     return false;
5890   }
5891 
5892   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5893     return false;
5894 
5895   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5896     if (isToken(AsmToken::EndOfStatement)) {
5897       Error(getLoc(), "expected a counter name");
5898       return false;
5899     }
5900   }
5901 
5902   return true;
5903 }
5904 
5905 OperandMatchResultTy
5906 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5907   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5908   int64_t Waitcnt = getWaitcntBitMask(ISA);
5909   SMLoc S = getLoc();
5910 
5911   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5912     while (!isToken(AsmToken::EndOfStatement)) {
5913       if (!parseCnt(Waitcnt))
5914         return MatchOperand_ParseFail;
5915     }
5916   } else {
5917     if (!parseExpr(Waitcnt))
5918       return MatchOperand_ParseFail;
5919   }
5920 
5921   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5922   return MatchOperand_Success;
5923 }
5924 
5925 bool
5926 AMDGPUOperand::isSWaitCnt() const {
5927   return isImm();
5928 }
5929 
5930 //===----------------------------------------------------------------------===//
5931 // hwreg
5932 //===----------------------------------------------------------------------===//
5933 
5934 bool
5935 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5936                                 OperandInfoTy &Offset,
5937                                 OperandInfoTy &Width) {
5938   using namespace llvm::AMDGPU::Hwreg;
5939 
5940   // The register may be specified by name or using a numeric code
5941   HwReg.Loc = getLoc();
5942   if (isToken(AsmToken::Identifier) &&
5943       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5944     HwReg.IsSymbolic = true;
5945     lex(); // skip register name
5946   } else if (!parseExpr(HwReg.Id, "a register name")) {
5947     return false;
5948   }
5949 
5950   if (trySkipToken(AsmToken::RParen))
5951     return true;
5952 
5953   // parse optional params
5954   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
5955     return false;
5956 
5957   Offset.Loc = getLoc();
5958   if (!parseExpr(Offset.Id))
5959     return false;
5960 
5961   if (!skipToken(AsmToken::Comma, "expected a comma"))
5962     return false;
5963 
5964   Width.Loc = getLoc();
5965   return parseExpr(Width.Id) &&
5966          skipToken(AsmToken::RParen, "expected a closing parenthesis");
5967 }
5968 
5969 bool
5970 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5971                                const OperandInfoTy &Offset,
5972                                const OperandInfoTy &Width) {
5973 
5974   using namespace llvm::AMDGPU::Hwreg;
5975 
5976   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5977     Error(HwReg.Loc,
5978           "specified hardware register is not supported on this GPU");
5979     return false;
5980   }
5981   if (!isValidHwreg(HwReg.Id)) {
5982     Error(HwReg.Loc,
5983           "invalid code of hardware register: only 6-bit values are legal");
5984     return false;
5985   }
5986   if (!isValidHwregOffset(Offset.Id)) {
5987     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
5988     return false;
5989   }
5990   if (!isValidHwregWidth(Width.Id)) {
5991     Error(Width.Loc,
5992           "invalid bitfield width: only values from 1 to 32 are legal");
5993     return false;
5994   }
5995   return true;
5996 }
5997 
5998 OperandMatchResultTy
5999 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6000   using namespace llvm::AMDGPU::Hwreg;
6001 
6002   int64_t ImmVal = 0;
6003   SMLoc Loc = getLoc();
6004 
6005   if (trySkipId("hwreg", AsmToken::LParen)) {
6006     OperandInfoTy HwReg(ID_UNKNOWN_);
6007     OperandInfoTy Offset(OFFSET_DEFAULT_);
6008     OperandInfoTy Width(WIDTH_DEFAULT_);
6009     if (parseHwregBody(HwReg, Offset, Width) &&
6010         validateHwreg(HwReg, Offset, Width)) {
6011       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6012     } else {
6013       return MatchOperand_ParseFail;
6014     }
6015   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6016     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6017       Error(Loc, "invalid immediate: only 16-bit values are legal");
6018       return MatchOperand_ParseFail;
6019     }
6020   } else {
6021     return MatchOperand_ParseFail;
6022   }
6023 
6024   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6025   return MatchOperand_Success;
6026 }
6027 
6028 bool AMDGPUOperand::isHwreg() const {
6029   return isImmTy(ImmTyHwreg);
6030 }
6031 
6032 //===----------------------------------------------------------------------===//
6033 // sendmsg
6034 //===----------------------------------------------------------------------===//
6035 
6036 bool
6037 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6038                                   OperandInfoTy &Op,
6039                                   OperandInfoTy &Stream) {
6040   using namespace llvm::AMDGPU::SendMsg;
6041 
6042   Msg.Loc = getLoc();
6043   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6044     Msg.IsSymbolic = true;
6045     lex(); // skip message name
6046   } else if (!parseExpr(Msg.Id, "a message name")) {
6047     return false;
6048   }
6049 
6050   if (trySkipToken(AsmToken::Comma)) {
6051     Op.IsDefined = true;
6052     Op.Loc = getLoc();
6053     if (isToken(AsmToken::Identifier) &&
6054         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6055       lex(); // skip operation name
6056     } else if (!parseExpr(Op.Id, "an operation name")) {
6057       return false;
6058     }
6059 
6060     if (trySkipToken(AsmToken::Comma)) {
6061       Stream.IsDefined = true;
6062       Stream.Loc = getLoc();
6063       if (!parseExpr(Stream.Id))
6064         return false;
6065     }
6066   }
6067 
6068   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6069 }
6070 
6071 bool
6072 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6073                                  const OperandInfoTy &Op,
6074                                  const OperandInfoTy &Stream) {
6075   using namespace llvm::AMDGPU::SendMsg;
6076 
6077   // Validation strictness depends on whether message is specified
6078   // in a symbolc or in a numeric form. In the latter case
6079   // only encoding possibility is checked.
6080   bool Strict = Msg.IsSymbolic;
6081 
6082   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6083     Error(Msg.Loc, "invalid message id");
6084     return false;
6085   }
6086   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6087     if (Op.IsDefined) {
6088       Error(Op.Loc, "message does not support operations");
6089     } else {
6090       Error(Msg.Loc, "missing message operation");
6091     }
6092     return false;
6093   }
6094   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6095     Error(Op.Loc, "invalid operation id");
6096     return false;
6097   }
6098   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6099     Error(Stream.Loc, "message operation does not support streams");
6100     return false;
6101   }
6102   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6103     Error(Stream.Loc, "invalid message stream id");
6104     return false;
6105   }
6106   return true;
6107 }
6108 
6109 OperandMatchResultTy
6110 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6111   using namespace llvm::AMDGPU::SendMsg;
6112 
6113   int64_t ImmVal = 0;
6114   SMLoc Loc = getLoc();
6115 
6116   if (trySkipId("sendmsg", AsmToken::LParen)) {
6117     OperandInfoTy Msg(ID_UNKNOWN_);
6118     OperandInfoTy Op(OP_NONE_);
6119     OperandInfoTy Stream(STREAM_ID_NONE_);
6120     if (parseSendMsgBody(Msg, Op, Stream) &&
6121         validateSendMsg(Msg, Op, Stream)) {
6122       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6123     } else {
6124       return MatchOperand_ParseFail;
6125     }
6126   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6127     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6128       Error(Loc, "invalid immediate: only 16-bit values are legal");
6129       return MatchOperand_ParseFail;
6130     }
6131   } else {
6132     return MatchOperand_ParseFail;
6133   }
6134 
6135   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6136   return MatchOperand_Success;
6137 }
6138 
6139 bool AMDGPUOperand::isSendMsg() const {
6140   return isImmTy(ImmTySendMsg);
6141 }
6142 
6143 //===----------------------------------------------------------------------===//
6144 // v_interp
6145 //===----------------------------------------------------------------------===//
6146 
6147 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6148   StringRef Str;
6149   SMLoc S = getLoc();
6150 
6151   if (!parseId(Str))
6152     return MatchOperand_NoMatch;
6153 
6154   int Slot = StringSwitch<int>(Str)
6155     .Case("p10", 0)
6156     .Case("p20", 1)
6157     .Case("p0", 2)
6158     .Default(-1);
6159 
6160   if (Slot == -1) {
6161     Error(S, "invalid interpolation slot");
6162     return MatchOperand_ParseFail;
6163   }
6164 
6165   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6166                                               AMDGPUOperand::ImmTyInterpSlot));
6167   return MatchOperand_Success;
6168 }
6169 
6170 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6171   StringRef Str;
6172   SMLoc S = getLoc();
6173 
6174   if (!parseId(Str))
6175     return MatchOperand_NoMatch;
6176 
6177   if (!Str.startswith("attr")) {
6178     Error(S, "invalid interpolation attribute");
6179     return MatchOperand_ParseFail;
6180   }
6181 
6182   StringRef Chan = Str.take_back(2);
6183   int AttrChan = StringSwitch<int>(Chan)
6184     .Case(".x", 0)
6185     .Case(".y", 1)
6186     .Case(".z", 2)
6187     .Case(".w", 3)
6188     .Default(-1);
6189   if (AttrChan == -1) {
6190     Error(S, "invalid or missing interpolation attribute channel");
6191     return MatchOperand_ParseFail;
6192   }
6193 
6194   Str = Str.drop_back(2).drop_front(4);
6195 
6196   uint8_t Attr;
6197   if (Str.getAsInteger(10, Attr)) {
6198     Error(S, "invalid or missing interpolation attribute number");
6199     return MatchOperand_ParseFail;
6200   }
6201 
6202   if (Attr > 63) {
6203     Error(S, "out of bounds interpolation attribute number");
6204     return MatchOperand_ParseFail;
6205   }
6206 
6207   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6208 
6209   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6210                                               AMDGPUOperand::ImmTyInterpAttr));
6211   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6212                                               AMDGPUOperand::ImmTyAttrChan));
6213   return MatchOperand_Success;
6214 }
6215 
6216 //===----------------------------------------------------------------------===//
6217 // exp
6218 //===----------------------------------------------------------------------===//
6219 
6220 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6221   using namespace llvm::AMDGPU::Exp;
6222 
6223   StringRef Str;
6224   SMLoc S = getLoc();
6225 
6226   if (!parseId(Str))
6227     return MatchOperand_NoMatch;
6228 
6229   unsigned Id = getTgtId(Str);
6230   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6231     Error(S, (Id == ET_INVALID) ?
6232                 "invalid exp target" :
6233                 "exp target is not supported on this GPU");
6234     return MatchOperand_ParseFail;
6235   }
6236 
6237   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6238                                               AMDGPUOperand::ImmTyExpTgt));
6239   return MatchOperand_Success;
6240 }
6241 
6242 //===----------------------------------------------------------------------===//
6243 // parser helpers
6244 //===----------------------------------------------------------------------===//
6245 
6246 bool
6247 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6248   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6249 }
6250 
6251 bool
6252 AMDGPUAsmParser::isId(const StringRef Id) const {
6253   return isId(getToken(), Id);
6254 }
6255 
6256 bool
6257 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6258   return getTokenKind() == Kind;
6259 }
6260 
6261 bool
6262 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6263   if (isId(Id)) {
6264     lex();
6265     return true;
6266   }
6267   return false;
6268 }
6269 
6270 bool
6271 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6272   if (isToken(AsmToken::Identifier)) {
6273     StringRef Tok = getTokenStr();
6274     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6275       lex();
6276       return true;
6277     }
6278   }
6279   return false;
6280 }
6281 
6282 bool
6283 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6284   if (isId(Id) && peekToken().is(Kind)) {
6285     lex();
6286     lex();
6287     return true;
6288   }
6289   return false;
6290 }
6291 
6292 bool
6293 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6294   if (isToken(Kind)) {
6295     lex();
6296     return true;
6297   }
6298   return false;
6299 }
6300 
6301 bool
6302 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6303                            const StringRef ErrMsg) {
6304   if (!trySkipToken(Kind)) {
6305     Error(getLoc(), ErrMsg);
6306     return false;
6307   }
6308   return true;
6309 }
6310 
6311 bool
6312 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6313   SMLoc S = getLoc();
6314 
6315   const MCExpr *Expr;
6316   if (Parser.parseExpression(Expr))
6317     return false;
6318 
6319   if (Expr->evaluateAsAbsolute(Imm))
6320     return true;
6321 
6322   if (Expected.empty()) {
6323     Error(S, "expected absolute expression");
6324   } else {
6325     Error(S, Twine("expected ", Expected) +
6326              Twine(" or an absolute expression"));
6327   }
6328   return false;
6329 }
6330 
6331 bool
6332 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6333   SMLoc S = getLoc();
6334 
6335   const MCExpr *Expr;
6336   if (Parser.parseExpression(Expr))
6337     return false;
6338 
6339   int64_t IntVal;
6340   if (Expr->evaluateAsAbsolute(IntVal)) {
6341     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6342   } else {
6343     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6344   }
6345   return true;
6346 }
6347 
6348 bool
6349 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6350   if (isToken(AsmToken::String)) {
6351     Val = getToken().getStringContents();
6352     lex();
6353     return true;
6354   } else {
6355     Error(getLoc(), ErrMsg);
6356     return false;
6357   }
6358 }
6359 
6360 bool
6361 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6362   if (isToken(AsmToken::Identifier)) {
6363     Val = getTokenStr();
6364     lex();
6365     return true;
6366   } else {
6367     if (!ErrMsg.empty())
6368       Error(getLoc(), ErrMsg);
6369     return false;
6370   }
6371 }
6372 
6373 AsmToken
6374 AMDGPUAsmParser::getToken() const {
6375   return Parser.getTok();
6376 }
6377 
6378 AsmToken
6379 AMDGPUAsmParser::peekToken() {
6380   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6381 }
6382 
6383 void
6384 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6385   auto TokCount = getLexer().peekTokens(Tokens);
6386 
6387   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6388     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6389 }
6390 
6391 AsmToken::TokenKind
6392 AMDGPUAsmParser::getTokenKind() const {
6393   return getLexer().getKind();
6394 }
6395 
6396 SMLoc
6397 AMDGPUAsmParser::getLoc() const {
6398   return getToken().getLoc();
6399 }
6400 
6401 StringRef
6402 AMDGPUAsmParser::getTokenStr() const {
6403   return getToken().getString();
6404 }
6405 
6406 void
6407 AMDGPUAsmParser::lex() {
6408   Parser.Lex();
6409 }
6410 
6411 SMLoc
6412 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6413                                const OperandVector &Operands) const {
6414   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6415     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6416     if (Test(Op))
6417       return Op.getStartLoc();
6418   }
6419   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6420 }
6421 
6422 SMLoc
6423 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6424                            const OperandVector &Operands) const {
6425   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6426   return getOperandLoc(Test, Operands);
6427 }
6428 
6429 SMLoc
6430 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6431                            const OperandVector &Operands) const {
6432   auto Test = [=](const AMDGPUOperand& Op) {
6433     return Op.isRegKind() && Op.getReg() == Reg;
6434   };
6435   return getOperandLoc(Test, Operands);
6436 }
6437 
6438 SMLoc
6439 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6440   auto Test = [](const AMDGPUOperand& Op) {
6441     return Op.IsImmKindLiteral() || Op.isExpr();
6442   };
6443   return getOperandLoc(Test, Operands);
6444 }
6445 
6446 SMLoc
6447 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6448   auto Test = [](const AMDGPUOperand& Op) {
6449     return Op.isImmKindConst();
6450   };
6451   return getOperandLoc(Test, Operands);
6452 }
6453 
6454 //===----------------------------------------------------------------------===//
6455 // swizzle
6456 //===----------------------------------------------------------------------===//
6457 
6458 LLVM_READNONE
6459 static unsigned
6460 encodeBitmaskPerm(const unsigned AndMask,
6461                   const unsigned OrMask,
6462                   const unsigned XorMask) {
6463   using namespace llvm::AMDGPU::Swizzle;
6464 
6465   return BITMASK_PERM_ENC |
6466          (AndMask << BITMASK_AND_SHIFT) |
6467          (OrMask  << BITMASK_OR_SHIFT)  |
6468          (XorMask << BITMASK_XOR_SHIFT);
6469 }
6470 
6471 bool
6472 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6473                                      const unsigned MinVal,
6474                                      const unsigned MaxVal,
6475                                      const StringRef ErrMsg,
6476                                      SMLoc &Loc) {
6477   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6478     return false;
6479   }
6480   Loc = getLoc();
6481   if (!parseExpr(Op)) {
6482     return false;
6483   }
6484   if (Op < MinVal || Op > MaxVal) {
6485     Error(Loc, ErrMsg);
6486     return false;
6487   }
6488 
6489   return true;
6490 }
6491 
6492 bool
6493 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6494                                       const unsigned MinVal,
6495                                       const unsigned MaxVal,
6496                                       const StringRef ErrMsg) {
6497   SMLoc Loc;
6498   for (unsigned i = 0; i < OpNum; ++i) {
6499     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6500       return false;
6501   }
6502 
6503   return true;
6504 }
6505 
6506 bool
6507 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6508   using namespace llvm::AMDGPU::Swizzle;
6509 
6510   int64_t Lane[LANE_NUM];
6511   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6512                            "expected a 2-bit lane id")) {
6513     Imm = QUAD_PERM_ENC;
6514     for (unsigned I = 0; I < LANE_NUM; ++I) {
6515       Imm |= Lane[I] << (LANE_SHIFT * I);
6516     }
6517     return true;
6518   }
6519   return false;
6520 }
6521 
6522 bool
6523 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6524   using namespace llvm::AMDGPU::Swizzle;
6525 
6526   SMLoc Loc;
6527   int64_t GroupSize;
6528   int64_t LaneIdx;
6529 
6530   if (!parseSwizzleOperand(GroupSize,
6531                            2, 32,
6532                            "group size must be in the interval [2,32]",
6533                            Loc)) {
6534     return false;
6535   }
6536   if (!isPowerOf2_64(GroupSize)) {
6537     Error(Loc, "group size must be a power of two");
6538     return false;
6539   }
6540   if (parseSwizzleOperand(LaneIdx,
6541                           0, GroupSize - 1,
6542                           "lane id must be in the interval [0,group size - 1]",
6543                           Loc)) {
6544     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6545     return true;
6546   }
6547   return false;
6548 }
6549 
6550 bool
6551 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6552   using namespace llvm::AMDGPU::Swizzle;
6553 
6554   SMLoc Loc;
6555   int64_t GroupSize;
6556 
6557   if (!parseSwizzleOperand(GroupSize,
6558                            2, 32,
6559                            "group size must be in the interval [2,32]",
6560                            Loc)) {
6561     return false;
6562   }
6563   if (!isPowerOf2_64(GroupSize)) {
6564     Error(Loc, "group size must be a power of two");
6565     return false;
6566   }
6567 
6568   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6569   return true;
6570 }
6571 
6572 bool
6573 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6574   using namespace llvm::AMDGPU::Swizzle;
6575 
6576   SMLoc Loc;
6577   int64_t GroupSize;
6578 
6579   if (!parseSwizzleOperand(GroupSize,
6580                            1, 16,
6581                            "group size must be in the interval [1,16]",
6582                            Loc)) {
6583     return false;
6584   }
6585   if (!isPowerOf2_64(GroupSize)) {
6586     Error(Loc, "group size must be a power of two");
6587     return false;
6588   }
6589 
6590   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6591   return true;
6592 }
6593 
6594 bool
6595 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6596   using namespace llvm::AMDGPU::Swizzle;
6597 
6598   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6599     return false;
6600   }
6601 
6602   StringRef Ctl;
6603   SMLoc StrLoc = getLoc();
6604   if (!parseString(Ctl)) {
6605     return false;
6606   }
6607   if (Ctl.size() != BITMASK_WIDTH) {
6608     Error(StrLoc, "expected a 5-character mask");
6609     return false;
6610   }
6611 
6612   unsigned AndMask = 0;
6613   unsigned OrMask = 0;
6614   unsigned XorMask = 0;
6615 
6616   for (size_t i = 0; i < Ctl.size(); ++i) {
6617     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6618     switch(Ctl[i]) {
6619     default:
6620       Error(StrLoc, "invalid mask");
6621       return false;
6622     case '0':
6623       break;
6624     case '1':
6625       OrMask |= Mask;
6626       break;
6627     case 'p':
6628       AndMask |= Mask;
6629       break;
6630     case 'i':
6631       AndMask |= Mask;
6632       XorMask |= Mask;
6633       break;
6634     }
6635   }
6636 
6637   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6638   return true;
6639 }
6640 
6641 bool
6642 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6643 
6644   SMLoc OffsetLoc = getLoc();
6645 
6646   if (!parseExpr(Imm, "a swizzle macro")) {
6647     return false;
6648   }
6649   if (!isUInt<16>(Imm)) {
6650     Error(OffsetLoc, "expected a 16-bit offset");
6651     return false;
6652   }
6653   return true;
6654 }
6655 
6656 bool
6657 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6658   using namespace llvm::AMDGPU::Swizzle;
6659 
6660   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6661 
6662     SMLoc ModeLoc = getLoc();
6663     bool Ok = false;
6664 
6665     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6666       Ok = parseSwizzleQuadPerm(Imm);
6667     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6668       Ok = parseSwizzleBitmaskPerm(Imm);
6669     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6670       Ok = parseSwizzleBroadcast(Imm);
6671     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6672       Ok = parseSwizzleSwap(Imm);
6673     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6674       Ok = parseSwizzleReverse(Imm);
6675     } else {
6676       Error(ModeLoc, "expected a swizzle mode");
6677     }
6678 
6679     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6680   }
6681 
6682   return false;
6683 }
6684 
6685 OperandMatchResultTy
6686 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6687   SMLoc S = getLoc();
6688   int64_t Imm = 0;
6689 
6690   if (trySkipId("offset")) {
6691 
6692     bool Ok = false;
6693     if (skipToken(AsmToken::Colon, "expected a colon")) {
6694       if (trySkipId("swizzle")) {
6695         Ok = parseSwizzleMacro(Imm);
6696       } else {
6697         Ok = parseSwizzleOffset(Imm);
6698       }
6699     }
6700 
6701     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6702 
6703     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6704   } else {
6705     // Swizzle "offset" operand is optional.
6706     // If it is omitted, try parsing other optional operands.
6707     return parseOptionalOpr(Operands);
6708   }
6709 }
6710 
6711 bool
6712 AMDGPUOperand::isSwizzle() const {
6713   return isImmTy(ImmTySwizzle);
6714 }
6715 
6716 //===----------------------------------------------------------------------===//
6717 // VGPR Index Mode
6718 //===----------------------------------------------------------------------===//
6719 
6720 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6721 
6722   using namespace llvm::AMDGPU::VGPRIndexMode;
6723 
6724   if (trySkipToken(AsmToken::RParen)) {
6725     return OFF;
6726   }
6727 
6728   int64_t Imm = 0;
6729 
6730   while (true) {
6731     unsigned Mode = 0;
6732     SMLoc S = getLoc();
6733 
6734     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6735       if (trySkipId(IdSymbolic[ModeId])) {
6736         Mode = 1 << ModeId;
6737         break;
6738       }
6739     }
6740 
6741     if (Mode == 0) {
6742       Error(S, (Imm == 0)?
6743                "expected a VGPR index mode or a closing parenthesis" :
6744                "expected a VGPR index mode");
6745       return UNDEF;
6746     }
6747 
6748     if (Imm & Mode) {
6749       Error(S, "duplicate VGPR index mode");
6750       return UNDEF;
6751     }
6752     Imm |= Mode;
6753 
6754     if (trySkipToken(AsmToken::RParen))
6755       break;
6756     if (!skipToken(AsmToken::Comma,
6757                    "expected a comma or a closing parenthesis"))
6758       return UNDEF;
6759   }
6760 
6761   return Imm;
6762 }
6763 
6764 OperandMatchResultTy
6765 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6766 
6767   using namespace llvm::AMDGPU::VGPRIndexMode;
6768 
6769   int64_t Imm = 0;
6770   SMLoc S = getLoc();
6771 
6772   if (trySkipId("gpr_idx", AsmToken::LParen)) {
6773     Imm = parseGPRIdxMacro();
6774     if (Imm == UNDEF)
6775       return MatchOperand_ParseFail;
6776   } else {
6777     if (getParser().parseAbsoluteExpression(Imm))
6778       return MatchOperand_ParseFail;
6779     if (Imm < 0 || !isUInt<4>(Imm)) {
6780       Error(S, "invalid immediate: only 4-bit values are legal");
6781       return MatchOperand_ParseFail;
6782     }
6783   }
6784 
6785   Operands.push_back(
6786       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6787   return MatchOperand_Success;
6788 }
6789 
6790 bool AMDGPUOperand::isGPRIdxMode() const {
6791   return isImmTy(ImmTyGprIdxMode);
6792 }
6793 
6794 //===----------------------------------------------------------------------===//
6795 // sopp branch targets
6796 //===----------------------------------------------------------------------===//
6797 
6798 OperandMatchResultTy
6799 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6800 
6801   // Make sure we are not parsing something
6802   // that looks like a label or an expression but is not.
6803   // This will improve error messages.
6804   if (isRegister() || isModifier())
6805     return MatchOperand_NoMatch;
6806 
6807   if (!parseExpr(Operands))
6808     return MatchOperand_ParseFail;
6809 
6810   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6811   assert(Opr.isImm() || Opr.isExpr());
6812   SMLoc Loc = Opr.getStartLoc();
6813 
6814   // Currently we do not support arbitrary expressions as branch targets.
6815   // Only labels and absolute expressions are accepted.
6816   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6817     Error(Loc, "expected an absolute expression or a label");
6818   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6819     Error(Loc, "expected a 16-bit signed jump offset");
6820   }
6821 
6822   return MatchOperand_Success;
6823 }
6824 
6825 //===----------------------------------------------------------------------===//
6826 // Boolean holding registers
6827 //===----------------------------------------------------------------------===//
6828 
6829 OperandMatchResultTy
6830 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6831   return parseReg(Operands);
6832 }
6833 
6834 //===----------------------------------------------------------------------===//
6835 // mubuf
6836 //===----------------------------------------------------------------------===//
6837 
6838 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
6839   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
6840 }
6841 
6842 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSCCB() const {
6843   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySCCB);
6844 }
6845 
6846 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
6847   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
6848 }
6849 
6850 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const {
6851   return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC);
6852 }
6853 
6854 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
6855   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
6856 }
6857 
6858 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6859                                const OperandVector &Operands,
6860                                bool IsAtomic,
6861                                bool IsAtomicReturn,
6862                                bool IsLds) {
6863   bool IsLdsOpcode = IsLds;
6864   bool HasLdsModifier = false;
6865   OptionalImmIndexMap OptionalIdx;
6866   assert(IsAtomicReturn ? IsAtomic : true);
6867   unsigned FirstOperandIdx = 1;
6868 
6869   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6870     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6871 
6872     // Add the register arguments
6873     if (Op.isReg()) {
6874       Op.addRegOperands(Inst, 1);
6875       // Insert a tied src for atomic return dst.
6876       // This cannot be postponed as subsequent calls to
6877       // addImmOperands rely on correct number of MC operands.
6878       if (IsAtomicReturn && i == FirstOperandIdx)
6879         Op.addRegOperands(Inst, 1);
6880       continue;
6881     }
6882 
6883     // Handle the case where soffset is an immediate
6884     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6885       Op.addImmOperands(Inst, 1);
6886       continue;
6887     }
6888 
6889     HasLdsModifier |= Op.isLDS();
6890 
6891     // Handle tokens like 'offen' which are sometimes hard-coded into the
6892     // asm string.  There are no MCInst operands for these.
6893     if (Op.isToken()) {
6894       continue;
6895     }
6896     assert(Op.isImm());
6897 
6898     // Handle optional arguments
6899     OptionalIdx[Op.getImmTy()] = i;
6900   }
6901 
6902   // This is a workaround for an llvm quirk which may result in an
6903   // incorrect instruction selection. Lds and non-lds versions of
6904   // MUBUF instructions are identical except that lds versions
6905   // have mandatory 'lds' modifier. However this modifier follows
6906   // optional modifiers and llvm asm matcher regards this 'lds'
6907   // modifier as an optional one. As a result, an lds version
6908   // of opcode may be selected even if it has no 'lds' modifier.
6909   if (IsLdsOpcode && !HasLdsModifier) {
6910     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6911     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6912       Inst.setOpcode(NoLdsOpcode);
6913       IsLdsOpcode = false;
6914     }
6915   }
6916 
6917   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6918   if (!IsAtomic || IsAtomicReturn) {
6919     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC,
6920                           IsAtomicReturn ? -1 : 0);
6921   }
6922   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6923 
6924   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6925     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6926   }
6927 
6928   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6929   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
6930   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySCCB);
6931 }
6932 
6933 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6934   OptionalImmIndexMap OptionalIdx;
6935 
6936   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6937     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6938 
6939     // Add the register arguments
6940     if (Op.isReg()) {
6941       Op.addRegOperands(Inst, 1);
6942       continue;
6943     }
6944 
6945     // Handle the case where soffset is an immediate
6946     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6947       Op.addImmOperands(Inst, 1);
6948       continue;
6949     }
6950 
6951     // Handle tokens like 'offen' which are sometimes hard-coded into the
6952     // asm string.  There are no MCInst operands for these.
6953     if (Op.isToken()) {
6954       continue;
6955     }
6956     assert(Op.isImm());
6957 
6958     // Handle optional arguments
6959     OptionalIdx[Op.getImmTy()] = i;
6960   }
6961 
6962   addOptionalImmOperand(Inst, Operands, OptionalIdx,
6963                         AMDGPUOperand::ImmTyOffset);
6964   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6965   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6966   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6967   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6968   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6969   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
6970   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySCCB);
6971 }
6972 
6973 //===----------------------------------------------------------------------===//
6974 // mimg
6975 //===----------------------------------------------------------------------===//
6976 
6977 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6978                               bool IsAtomic) {
6979   unsigned I = 1;
6980   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6981   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6982     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6983   }
6984 
6985   if (IsAtomic) {
6986     // Add src, same as dst
6987     assert(Desc.getNumDefs() == 1);
6988     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6989   }
6990 
6991   OptionalImmIndexMap OptionalIdx;
6992 
6993   for (unsigned E = Operands.size(); I != E; ++I) {
6994     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6995 
6996     // Add the register arguments
6997     if (Op.isReg()) {
6998       Op.addRegOperands(Inst, 1);
6999     } else if (Op.isImmModifier()) {
7000       OptionalIdx[Op.getImmTy()] = I;
7001     } else if (!Op.isToken()) {
7002       llvm_unreachable("unexpected operand type");
7003     }
7004   }
7005 
7006   bool IsGFX10Plus = isGFX10Plus();
7007 
7008   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7009   if (IsGFX10Plus)
7010     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7011   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7012 
7013   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::sccb) != -1)
7014     addOptionalImmOperand(Inst, Operands, OptionalIdx,
7015                           AMDGPUOperand::ImmTySCCB);
7016 
7017   if (IsGFX10Plus)
7018     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
7019 
7020   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
7021   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
7022   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7023   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7024     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7025   if (IsGFX10Plus)
7026     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7027   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7028   if (!IsGFX10Plus)
7029     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7030   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7031 }
7032 
7033 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7034   cvtMIMG(Inst, Operands, true);
7035 }
7036 
7037 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7038                                       const OperandVector &Operands) {
7039   for (unsigned I = 1; I < Operands.size(); ++I) {
7040     auto &Operand = (AMDGPUOperand &)*Operands[I];
7041     if (Operand.isReg())
7042       Operand.addRegOperands(Inst, 1);
7043   }
7044 
7045   Inst.addOperand(MCOperand::createImm(1)); // a16
7046 }
7047 
7048 //===----------------------------------------------------------------------===//
7049 // smrd
7050 //===----------------------------------------------------------------------===//
7051 
7052 bool AMDGPUOperand::isSMRDOffset8() const {
7053   return isImm() && isUInt<8>(getImm());
7054 }
7055 
7056 bool AMDGPUOperand::isSMEMOffset() const {
7057   return isImm(); // Offset range is checked later by validator.
7058 }
7059 
7060 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7061   // 32-bit literals are only supported on CI and we only want to use them
7062   // when the offset is > 8-bits.
7063   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7064 }
7065 
7066 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7067   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7068 }
7069 
7070 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7071   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7072 }
7073 
7074 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7075   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7076 }
7077 
7078 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7079   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7080 }
7081 
7082 //===----------------------------------------------------------------------===//
7083 // vop3
7084 //===----------------------------------------------------------------------===//
7085 
7086 static bool ConvertOmodMul(int64_t &Mul) {
7087   if (Mul != 1 && Mul != 2 && Mul != 4)
7088     return false;
7089 
7090   Mul >>= 1;
7091   return true;
7092 }
7093 
7094 static bool ConvertOmodDiv(int64_t &Div) {
7095   if (Div == 1) {
7096     Div = 0;
7097     return true;
7098   }
7099 
7100   if (Div == 2) {
7101     Div = 3;
7102     return true;
7103   }
7104 
7105   return false;
7106 }
7107 
7108 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7109 // This is intentional and ensures compatibility with sp3.
7110 // See bug 35397 for details.
7111 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7112   if (BoundCtrl == 0 || BoundCtrl == 1) {
7113     BoundCtrl = 1;
7114     return true;
7115   }
7116   return false;
7117 }
7118 
7119 // Note: the order in this table matches the order of operands in AsmString.
7120 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7121   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7122   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7123   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7124   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7125   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7126   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7127   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7128   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7129   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7130   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
7131   {"scc",     AMDGPUOperand::ImmTySCCB, true, nullptr},
7132   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
7133   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
7134   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7135   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7136   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7137   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7138   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7139   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7140   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7141   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7142   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7143   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7144   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7145   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7146   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7147   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7148   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7149   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7150   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7151   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7152   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7153   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7154   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7155   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7156   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7157   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7158   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7159   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7160   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7161   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7162   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7163   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7164   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7165 };
7166 
7167 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7168 
7169   OperandMatchResultTy res = parseOptionalOpr(Operands);
7170 
7171   // This is a hack to enable hardcoded mandatory operands which follow
7172   // optional operands.
7173   //
7174   // Current design assumes that all operands after the first optional operand
7175   // are also optional. However implementation of some instructions violates
7176   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7177   //
7178   // To alleviate this problem, we have to (implicitly) parse extra operands
7179   // to make sure autogenerated parser of custom operands never hit hardcoded
7180   // mandatory operands.
7181 
7182   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7183     if (res != MatchOperand_Success ||
7184         isToken(AsmToken::EndOfStatement))
7185       break;
7186 
7187     trySkipToken(AsmToken::Comma);
7188     res = parseOptionalOpr(Operands);
7189   }
7190 
7191   return res;
7192 }
7193 
7194 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7195   OperandMatchResultTy res;
7196   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7197     // try to parse any optional operand here
7198     if (Op.IsBit) {
7199       res = parseNamedBit(Op.Name, Operands, Op.Type);
7200     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7201       res = parseOModOperand(Operands);
7202     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7203                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7204                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7205       res = parseSDWASel(Operands, Op.Name, Op.Type);
7206     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7207       res = parseSDWADstUnused(Operands);
7208     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7209                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7210                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7211                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7212       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7213                                         Op.ConvertResult);
7214     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7215       res = parseDim(Operands);
7216     } else {
7217       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7218     }
7219     if (res != MatchOperand_NoMatch) {
7220       return res;
7221     }
7222   }
7223   return MatchOperand_NoMatch;
7224 }
7225 
7226 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7227   StringRef Name = getTokenStr();
7228   if (Name == "mul") {
7229     return parseIntWithPrefix("mul", Operands,
7230                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7231   }
7232 
7233   if (Name == "div") {
7234     return parseIntWithPrefix("div", Operands,
7235                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7236   }
7237 
7238   return MatchOperand_NoMatch;
7239 }
7240 
7241 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7242   cvtVOP3P(Inst, Operands);
7243 
7244   int Opc = Inst.getOpcode();
7245 
7246   int SrcNum;
7247   const int Ops[] = { AMDGPU::OpName::src0,
7248                       AMDGPU::OpName::src1,
7249                       AMDGPU::OpName::src2 };
7250   for (SrcNum = 0;
7251        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7252        ++SrcNum);
7253   assert(SrcNum > 0);
7254 
7255   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7256   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7257 
7258   if ((OpSel & (1 << SrcNum)) != 0) {
7259     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7260     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7261     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7262   }
7263 }
7264 
7265 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7266       // 1. This operand is input modifiers
7267   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7268       // 2. This is not last operand
7269       && Desc.NumOperands > (OpNum + 1)
7270       // 3. Next operand is register class
7271       && Desc.OpInfo[OpNum + 1].RegClass != -1
7272       // 4. Next register is not tied to any other operand
7273       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7274 }
7275 
7276 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7277 {
7278   OptionalImmIndexMap OptionalIdx;
7279   unsigned Opc = Inst.getOpcode();
7280 
7281   unsigned I = 1;
7282   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7283   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7284     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7285   }
7286 
7287   for (unsigned E = Operands.size(); I != E; ++I) {
7288     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7289     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7290       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7291     } else if (Op.isInterpSlot() ||
7292                Op.isInterpAttr() ||
7293                Op.isAttrChan()) {
7294       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7295     } else if (Op.isImmModifier()) {
7296       OptionalIdx[Op.getImmTy()] = I;
7297     } else {
7298       llvm_unreachable("unhandled operand type");
7299     }
7300   }
7301 
7302   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7303     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7304   }
7305 
7306   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7307     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7308   }
7309 
7310   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7311     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7312   }
7313 }
7314 
7315 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7316                               OptionalImmIndexMap &OptionalIdx) {
7317   unsigned Opc = Inst.getOpcode();
7318 
7319   unsigned I = 1;
7320   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7321   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7322     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7323   }
7324 
7325   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7326     // This instruction has src modifiers
7327     for (unsigned E = Operands.size(); I != E; ++I) {
7328       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7329       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7330         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7331       } else if (Op.isImmModifier()) {
7332         OptionalIdx[Op.getImmTy()] = I;
7333       } else if (Op.isRegOrImm()) {
7334         Op.addRegOrImmOperands(Inst, 1);
7335       } else {
7336         llvm_unreachable("unhandled operand type");
7337       }
7338     }
7339   } else {
7340     // No src modifiers
7341     for (unsigned E = Operands.size(); I != E; ++I) {
7342       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7343       if (Op.isMod()) {
7344         OptionalIdx[Op.getImmTy()] = I;
7345       } else {
7346         Op.addRegOrImmOperands(Inst, 1);
7347       }
7348     }
7349   }
7350 
7351   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7352     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7353   }
7354 
7355   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7356     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7357   }
7358 
7359   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7360   // it has src2 register operand that is tied to dst operand
7361   // we don't allow modifiers for this operand in assembler so src2_modifiers
7362   // should be 0.
7363   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7364       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7365       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7366       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7367       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7368       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7369       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7370       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7371       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7372       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7373       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7374     auto it = Inst.begin();
7375     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7376     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7377     ++it;
7378     // Copy the operand to ensure it's not invalidated when Inst grows.
7379     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7380   }
7381 }
7382 
7383 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7384   OptionalImmIndexMap OptionalIdx;
7385   cvtVOP3(Inst, Operands, OptionalIdx);
7386 }
7387 
7388 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
7389                                const OperandVector &Operands) {
7390   OptionalImmIndexMap OptIdx;
7391   const int Opc = Inst.getOpcode();
7392   const MCInstrDesc &Desc = MII.get(Opc);
7393 
7394   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7395 
7396   cvtVOP3(Inst, Operands, OptIdx);
7397 
7398   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7399     assert(!IsPacked);
7400     Inst.addOperand(Inst.getOperand(0));
7401   }
7402 
7403   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7404   // instruction, and then figure out where to actually put the modifiers
7405 
7406   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7407 
7408   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7409   if (OpSelHiIdx != -1) {
7410     int DefaultVal = IsPacked ? -1 : 0;
7411     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7412                           DefaultVal);
7413   }
7414 
7415   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7416   if (NegLoIdx != -1) {
7417     assert(IsPacked);
7418     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7419     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7420   }
7421 
7422   const int Ops[] = { AMDGPU::OpName::src0,
7423                       AMDGPU::OpName::src1,
7424                       AMDGPU::OpName::src2 };
7425   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7426                          AMDGPU::OpName::src1_modifiers,
7427                          AMDGPU::OpName::src2_modifiers };
7428 
7429   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7430 
7431   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7432   unsigned OpSelHi = 0;
7433   unsigned NegLo = 0;
7434   unsigned NegHi = 0;
7435 
7436   if (OpSelHiIdx != -1) {
7437     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7438   }
7439 
7440   if (NegLoIdx != -1) {
7441     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7442     NegLo = Inst.getOperand(NegLoIdx).getImm();
7443     NegHi = Inst.getOperand(NegHiIdx).getImm();
7444   }
7445 
7446   for (int J = 0; J < 3; ++J) {
7447     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7448     if (OpIdx == -1)
7449       break;
7450 
7451     uint32_t ModVal = 0;
7452 
7453     if ((OpSel & (1 << J)) != 0)
7454       ModVal |= SISrcMods::OP_SEL_0;
7455 
7456     if ((OpSelHi & (1 << J)) != 0)
7457       ModVal |= SISrcMods::OP_SEL_1;
7458 
7459     if ((NegLo & (1 << J)) != 0)
7460       ModVal |= SISrcMods::NEG;
7461 
7462     if ((NegHi & (1 << J)) != 0)
7463       ModVal |= SISrcMods::NEG_HI;
7464 
7465     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7466 
7467     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7468   }
7469 }
7470 
7471 //===----------------------------------------------------------------------===//
7472 // dpp
7473 //===----------------------------------------------------------------------===//
7474 
7475 bool AMDGPUOperand::isDPP8() const {
7476   return isImmTy(ImmTyDPP8);
7477 }
7478 
7479 bool AMDGPUOperand::isDPPCtrl() const {
7480   using namespace AMDGPU::DPP;
7481 
7482   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7483   if (result) {
7484     int64_t Imm = getImm();
7485     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7486            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7487            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7488            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7489            (Imm == DppCtrl::WAVE_SHL1) ||
7490            (Imm == DppCtrl::WAVE_ROL1) ||
7491            (Imm == DppCtrl::WAVE_SHR1) ||
7492            (Imm == DppCtrl::WAVE_ROR1) ||
7493            (Imm == DppCtrl::ROW_MIRROR) ||
7494            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7495            (Imm == DppCtrl::BCAST15) ||
7496            (Imm == DppCtrl::BCAST31) ||
7497            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7498            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7499   }
7500   return false;
7501 }
7502 
7503 //===----------------------------------------------------------------------===//
7504 // mAI
7505 //===----------------------------------------------------------------------===//
7506 
7507 bool AMDGPUOperand::isBLGP() const {
7508   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7509 }
7510 
7511 bool AMDGPUOperand::isCBSZ() const {
7512   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7513 }
7514 
7515 bool AMDGPUOperand::isABID() const {
7516   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7517 }
7518 
7519 bool AMDGPUOperand::isS16Imm() const {
7520   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7521 }
7522 
7523 bool AMDGPUOperand::isU16Imm() const {
7524   return isImm() && isUInt<16>(getImm());
7525 }
7526 
7527 //===----------------------------------------------------------------------===//
7528 // dim
7529 //===----------------------------------------------------------------------===//
7530 
7531 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7532   // We want to allow "dim:1D" etc.,
7533   // but the initial 1 is tokenized as an integer.
7534   std::string Token;
7535   if (isToken(AsmToken::Integer)) {
7536     SMLoc Loc = getToken().getEndLoc();
7537     Token = std::string(getTokenStr());
7538     lex();
7539     if (getLoc() != Loc)
7540       return false;
7541   }
7542 
7543   StringRef Suffix;
7544   if (!parseId(Suffix))
7545     return false;
7546   Token += Suffix;
7547 
7548   StringRef DimId = Token;
7549   if (DimId.startswith("SQ_RSRC_IMG_"))
7550     DimId = DimId.drop_front(12);
7551 
7552   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7553   if (!DimInfo)
7554     return false;
7555 
7556   Encoding = DimInfo->Encoding;
7557   return true;
7558 }
7559 
7560 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7561   if (!isGFX10Plus())
7562     return MatchOperand_NoMatch;
7563 
7564   SMLoc S = getLoc();
7565 
7566   if (!trySkipId("dim", AsmToken::Colon))
7567     return MatchOperand_NoMatch;
7568 
7569   unsigned Encoding;
7570   SMLoc Loc = getLoc();
7571   if (!parseDimId(Encoding)) {
7572     Error(Loc, "invalid dim value");
7573     return MatchOperand_ParseFail;
7574   }
7575 
7576   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7577                                               AMDGPUOperand::ImmTyDim));
7578   return MatchOperand_Success;
7579 }
7580 
7581 //===----------------------------------------------------------------------===//
7582 // dpp
7583 //===----------------------------------------------------------------------===//
7584 
7585 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7586   SMLoc S = getLoc();
7587 
7588   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7589     return MatchOperand_NoMatch;
7590 
7591   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7592 
7593   int64_t Sels[8];
7594 
7595   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7596     return MatchOperand_ParseFail;
7597 
7598   for (size_t i = 0; i < 8; ++i) {
7599     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7600       return MatchOperand_ParseFail;
7601 
7602     SMLoc Loc = getLoc();
7603     if (getParser().parseAbsoluteExpression(Sels[i]))
7604       return MatchOperand_ParseFail;
7605     if (0 > Sels[i] || 7 < Sels[i]) {
7606       Error(Loc, "expected a 3-bit value");
7607       return MatchOperand_ParseFail;
7608     }
7609   }
7610 
7611   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7612     return MatchOperand_ParseFail;
7613 
7614   unsigned DPP8 = 0;
7615   for (size_t i = 0; i < 8; ++i)
7616     DPP8 |= (Sels[i] << (i * 3));
7617 
7618   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7619   return MatchOperand_Success;
7620 }
7621 
7622 bool
7623 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7624                                     const OperandVector &Operands) {
7625   if (Ctrl == "row_newbcast")
7626       return isGFX90A();
7627 
7628   // DPP64 is supported for row_newbcast only.
7629   const MCRegisterInfo *MRI = getMRI();
7630   if (Operands.size() > 2 && Operands[1]->isReg() &&
7631       MRI->getSubReg(Operands[1]->getReg(), AMDGPU::sub1))
7632     return false;
7633 
7634   if (Ctrl == "row_share" ||
7635       Ctrl == "row_xmask")
7636     return isGFX10Plus();
7637 
7638   if (Ctrl == "wave_shl" ||
7639       Ctrl == "wave_shr" ||
7640       Ctrl == "wave_rol" ||
7641       Ctrl == "wave_ror" ||
7642       Ctrl == "row_bcast")
7643     return isVI() || isGFX9();
7644 
7645   return Ctrl == "row_mirror" ||
7646          Ctrl == "row_half_mirror" ||
7647          Ctrl == "quad_perm" ||
7648          Ctrl == "row_shl" ||
7649          Ctrl == "row_shr" ||
7650          Ctrl == "row_ror";
7651 }
7652 
7653 int64_t
7654 AMDGPUAsmParser::parseDPPCtrlPerm() {
7655   // quad_perm:[%d,%d,%d,%d]
7656 
7657   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7658     return -1;
7659 
7660   int64_t Val = 0;
7661   for (int i = 0; i < 4; ++i) {
7662     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7663       return -1;
7664 
7665     int64_t Temp;
7666     SMLoc Loc = getLoc();
7667     if (getParser().parseAbsoluteExpression(Temp))
7668       return -1;
7669     if (Temp < 0 || Temp > 3) {
7670       Error(Loc, "expected a 2-bit value");
7671       return -1;
7672     }
7673 
7674     Val += (Temp << i * 2);
7675   }
7676 
7677   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7678     return -1;
7679 
7680   return Val;
7681 }
7682 
7683 int64_t
7684 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7685   using namespace AMDGPU::DPP;
7686 
7687   // sel:%d
7688 
7689   int64_t Val;
7690   SMLoc Loc = getLoc();
7691 
7692   if (getParser().parseAbsoluteExpression(Val))
7693     return -1;
7694 
7695   struct DppCtrlCheck {
7696     int64_t Ctrl;
7697     int Lo;
7698     int Hi;
7699   };
7700 
7701   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7702     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7703     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7704     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7705     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7706     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7707     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7708     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7709     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7710     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7711     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7712     .Default({-1, 0, 0});
7713 
7714   bool Valid;
7715   if (Check.Ctrl == -1) {
7716     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7717     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7718   } else {
7719     Valid = Check.Lo <= Val && Val <= Check.Hi;
7720     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7721   }
7722 
7723   if (!Valid) {
7724     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7725     return -1;
7726   }
7727 
7728   return Val;
7729 }
7730 
7731 OperandMatchResultTy
7732 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7733   using namespace AMDGPU::DPP;
7734 
7735   if (!isToken(AsmToken::Identifier) ||
7736       !isSupportedDPPCtrl(getTokenStr(), Operands))
7737     return MatchOperand_NoMatch;
7738 
7739   SMLoc S = getLoc();
7740   int64_t Val = -1;
7741   StringRef Ctrl;
7742 
7743   parseId(Ctrl);
7744 
7745   if (Ctrl == "row_mirror") {
7746     Val = DppCtrl::ROW_MIRROR;
7747   } else if (Ctrl == "row_half_mirror") {
7748     Val = DppCtrl::ROW_HALF_MIRROR;
7749   } else {
7750     if (skipToken(AsmToken::Colon, "expected a colon")) {
7751       if (Ctrl == "quad_perm") {
7752         Val = parseDPPCtrlPerm();
7753       } else {
7754         Val = parseDPPCtrlSel(Ctrl);
7755       }
7756     }
7757   }
7758 
7759   if (Val == -1)
7760     return MatchOperand_ParseFail;
7761 
7762   Operands.push_back(
7763     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7764   return MatchOperand_Success;
7765 }
7766 
7767 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7768   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7769 }
7770 
7771 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7772   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7773 }
7774 
7775 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7776   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7777 }
7778 
7779 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7780   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7781 }
7782 
7783 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7784   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7785 }
7786 
7787 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7788   OptionalImmIndexMap OptionalIdx;
7789 
7790   unsigned I = 1;
7791   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7792   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7793     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7794   }
7795 
7796   int Fi = 0;
7797   for (unsigned E = Operands.size(); I != E; ++I) {
7798     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7799                                             MCOI::TIED_TO);
7800     if (TiedTo != -1) {
7801       assert((unsigned)TiedTo < Inst.getNumOperands());
7802       // handle tied old or src2 for MAC instructions
7803       Inst.addOperand(Inst.getOperand(TiedTo));
7804     }
7805     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7806     // Add the register arguments
7807     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7808       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7809       // Skip it.
7810       continue;
7811     }
7812 
7813     if (IsDPP8) {
7814       if (Op.isDPP8()) {
7815         Op.addImmOperands(Inst, 1);
7816       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7817         Op.addRegWithFPInputModsOperands(Inst, 2);
7818       } else if (Op.isFI()) {
7819         Fi = Op.getImm();
7820       } else if (Op.isReg()) {
7821         Op.addRegOperands(Inst, 1);
7822       } else {
7823         llvm_unreachable("Invalid operand type");
7824       }
7825     } else {
7826       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7827         Op.addRegWithFPInputModsOperands(Inst, 2);
7828       } else if (Op.isDPPCtrl()) {
7829         Op.addImmOperands(Inst, 1);
7830       } else if (Op.isImm()) {
7831         // Handle optional arguments
7832         OptionalIdx[Op.getImmTy()] = I;
7833       } else {
7834         llvm_unreachable("Invalid operand type");
7835       }
7836     }
7837   }
7838 
7839   if (IsDPP8) {
7840     using namespace llvm::AMDGPU::DPP;
7841     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7842   } else {
7843     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7844     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7845     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7846     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7847       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7848     }
7849   }
7850 }
7851 
7852 //===----------------------------------------------------------------------===//
7853 // sdwa
7854 //===----------------------------------------------------------------------===//
7855 
7856 OperandMatchResultTy
7857 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7858                               AMDGPUOperand::ImmTy Type) {
7859   using namespace llvm::AMDGPU::SDWA;
7860 
7861   SMLoc S = getLoc();
7862   StringRef Value;
7863   OperandMatchResultTy res;
7864 
7865   SMLoc StringLoc;
7866   res = parseStringWithPrefix(Prefix, Value, StringLoc);
7867   if (res != MatchOperand_Success) {
7868     return res;
7869   }
7870 
7871   int64_t Int;
7872   Int = StringSwitch<int64_t>(Value)
7873         .Case("BYTE_0", SdwaSel::BYTE_0)
7874         .Case("BYTE_1", SdwaSel::BYTE_1)
7875         .Case("BYTE_2", SdwaSel::BYTE_2)
7876         .Case("BYTE_3", SdwaSel::BYTE_3)
7877         .Case("WORD_0", SdwaSel::WORD_0)
7878         .Case("WORD_1", SdwaSel::WORD_1)
7879         .Case("DWORD", SdwaSel::DWORD)
7880         .Default(0xffffffff);
7881 
7882   if (Int == 0xffffffff) {
7883     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
7884     return MatchOperand_ParseFail;
7885   }
7886 
7887   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7888   return MatchOperand_Success;
7889 }
7890 
7891 OperandMatchResultTy
7892 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7893   using namespace llvm::AMDGPU::SDWA;
7894 
7895   SMLoc S = getLoc();
7896   StringRef Value;
7897   OperandMatchResultTy res;
7898 
7899   SMLoc StringLoc;
7900   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
7901   if (res != MatchOperand_Success) {
7902     return res;
7903   }
7904 
7905   int64_t Int;
7906   Int = StringSwitch<int64_t>(Value)
7907         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
7908         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
7909         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
7910         .Default(0xffffffff);
7911 
7912   if (Int == 0xffffffff) {
7913     Error(StringLoc, "invalid dst_unused value");
7914     return MatchOperand_ParseFail;
7915   }
7916 
7917   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7918   return MatchOperand_Success;
7919 }
7920 
7921 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7922   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7923 }
7924 
7925 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7926   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7927 }
7928 
7929 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7930   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7931 }
7932 
7933 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7934   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7935 }
7936 
7937 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7938   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7939 }
7940 
7941 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7942                               uint64_t BasicInstType,
7943                               bool SkipDstVcc,
7944                               bool SkipSrcVcc) {
7945   using namespace llvm::AMDGPU::SDWA;
7946 
7947   OptionalImmIndexMap OptionalIdx;
7948   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7949   bool SkippedVcc = false;
7950 
7951   unsigned I = 1;
7952   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7953   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7954     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7955   }
7956 
7957   for (unsigned E = Operands.size(); I != E; ++I) {
7958     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7959     if (SkipVcc && !SkippedVcc && Op.isReg() &&
7960         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7961       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7962       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7963       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7964       // Skip VCC only if we didn't skip it on previous iteration.
7965       // Note that src0 and src1 occupy 2 slots each because of modifiers.
7966       if (BasicInstType == SIInstrFlags::VOP2 &&
7967           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7968            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7969         SkippedVcc = true;
7970         continue;
7971       } else if (BasicInstType == SIInstrFlags::VOPC &&
7972                  Inst.getNumOperands() == 0) {
7973         SkippedVcc = true;
7974         continue;
7975       }
7976     }
7977     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7978       Op.addRegOrImmWithInputModsOperands(Inst, 2);
7979     } else if (Op.isImm()) {
7980       // Handle optional arguments
7981       OptionalIdx[Op.getImmTy()] = I;
7982     } else {
7983       llvm_unreachable("Invalid operand type");
7984     }
7985     SkippedVcc = false;
7986   }
7987 
7988   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7989       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7990       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7991     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7992     switch (BasicInstType) {
7993     case SIInstrFlags::VOP1:
7994       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7995       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7996         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7997       }
7998       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7999       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8000       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8001       break;
8002 
8003     case SIInstrFlags::VOP2:
8004       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8005       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8006         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8007       }
8008       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8009       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8010       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8011       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8012       break;
8013 
8014     case SIInstrFlags::VOPC:
8015       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8016         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8017       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8018       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8019       break;
8020 
8021     default:
8022       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8023     }
8024   }
8025 
8026   // special case v_mac_{f16, f32}:
8027   // it has src2 register operand that is tied to dst operand
8028   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8029       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8030     auto it = Inst.begin();
8031     std::advance(
8032       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8033     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8034   }
8035 }
8036 
8037 //===----------------------------------------------------------------------===//
8038 // mAI
8039 //===----------------------------------------------------------------------===//
8040 
8041 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8042   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8043 }
8044 
8045 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8046   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8047 }
8048 
8049 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8050   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8051 }
8052 
8053 /// Force static initialization.
8054 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8055   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8056   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8057 }
8058 
8059 #define GET_REGISTER_MATCHER
8060 #define GET_MATCHER_IMPLEMENTATION
8061 #define GET_MNEMONIC_SPELL_CHECKER
8062 #define GET_MNEMONIC_CHECKER
8063 #include "AMDGPUGenAsmMatcher.inc"
8064 
8065 // This fuction should be defined after auto-generated include so that we have
8066 // MatchClassKind enum defined
8067 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8068                                                      unsigned Kind) {
8069   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8070   // But MatchInstructionImpl() expects to meet token and fails to validate
8071   // operand. This method checks if we are given immediate operand but expect to
8072   // get corresponding token.
8073   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8074   switch (Kind) {
8075   case MCK_addr64:
8076     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8077   case MCK_gds:
8078     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8079   case MCK_lds:
8080     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8081   case MCK_glc:
8082     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
8083   case MCK_idxen:
8084     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8085   case MCK_offen:
8086     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8087   case MCK_SSrcB32:
8088     // When operands have expression values, they will return true for isToken,
8089     // because it is not possible to distinguish between a token and an
8090     // expression at parse time. MatchInstructionImpl() will always try to
8091     // match an operand as a token, when isToken returns true, and when the
8092     // name of the expression is not a valid token, the match will fail,
8093     // so we need to handle it here.
8094     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8095   case MCK_SSrcF32:
8096     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8097   case MCK_SoppBrTarget:
8098     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8099   case MCK_VReg32OrOff:
8100     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8101   case MCK_InterpSlot:
8102     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8103   case MCK_Attr:
8104     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8105   case MCK_AttrChan:
8106     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8107   case MCK_ImmSMEMOffset:
8108     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8109   case MCK_SReg_64:
8110   case MCK_SReg_64_XEXEC:
8111     // Null is defined as a 32-bit register but
8112     // it should also be enabled with 64-bit operands.
8113     // The following code enables it for SReg_64 operands
8114     // used as source and destination. Remaining source
8115     // operands are handled in isInlinableImm.
8116     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8117   default:
8118     return Match_InvalidOperand;
8119   }
8120 }
8121 
8122 //===----------------------------------------------------------------------===//
8123 // endpgm
8124 //===----------------------------------------------------------------------===//
8125 
8126 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8127   SMLoc S = getLoc();
8128   int64_t Imm = 0;
8129 
8130   if (!parseExpr(Imm)) {
8131     // The operand is optional, if not present default to 0
8132     Imm = 0;
8133   }
8134 
8135   if (!isUInt<16>(Imm)) {
8136     Error(S, "expected a 16-bit value");
8137     return MatchOperand_ParseFail;
8138   }
8139 
8140   Operands.push_back(
8141       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8142   return MatchOperand_Success;
8143 }
8144 
8145 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8146