1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/Support/AMDGPUMetadata.h"
32 #include "llvm/Support/AMDHSAKernelDescriptor.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/MachineValueType.h"
35 #include "llvm/Support/TargetParser.h"
36 #include "llvm/Support/TargetRegistry.h"
37 
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41 
42 namespace {
43 
44 class AMDGPUAsmParser;
45 
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47 
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51 
52 class AMDGPUOperand : public MCParsedAsmOperand {
53   enum KindTy {
54     Token,
55     Immediate,
56     Register,
57     Expression
58   } Kind;
59 
60   SMLoc StartLoc, EndLoc;
61   const AMDGPUAsmParser *AsmParser;
62 
63 public:
64   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66 
67   using Ptr = std::unique_ptr<AMDGPUOperand>;
68 
69   struct Modifiers {
70     bool Abs = false;
71     bool Neg = false;
72     bool Sext = false;
73 
74     bool hasFPModifiers() const { return Abs || Neg; }
75     bool hasIntModifiers() const { return Sext; }
76     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77 
78     int64_t getFPModifiersOperand() const {
79       int64_t Operand = 0;
80       Operand |= Abs ? SISrcMods::ABS : 0u;
81       Operand |= Neg ? SISrcMods::NEG : 0u;
82       return Operand;
83     }
84 
85     int64_t getIntModifiersOperand() const {
86       int64_t Operand = 0;
87       Operand |= Sext ? SISrcMods::SEXT : 0u;
88       return Operand;
89     }
90 
91     int64_t getModifiersOperand() const {
92       assert(!(hasFPModifiers() && hasIntModifiers())
93            && "fp and int modifiers should not be used simultaneously");
94       if (hasFPModifiers()) {
95         return getFPModifiersOperand();
96       } else if (hasIntModifiers()) {
97         return getIntModifiersOperand();
98       } else {
99         return 0;
100       }
101     }
102 
103     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104   };
105 
106   enum ImmTy {
107     ImmTyNone,
108     ImmTyGDS,
109     ImmTyLDS,
110     ImmTyOffen,
111     ImmTyIdxen,
112     ImmTyAddr64,
113     ImmTyOffset,
114     ImmTyInstOffset,
115     ImmTyOffset0,
116     ImmTyOffset1,
117     ImmTyCPol,
118     ImmTySWZ,
119     ImmTyTFE,
120     ImmTyD16,
121     ImmTyClampSI,
122     ImmTyOModSI,
123     ImmTyDPP8,
124     ImmTyDppCtrl,
125     ImmTyDppRowMask,
126     ImmTyDppBankMask,
127     ImmTyDppBoundCtrl,
128     ImmTyDppFi,
129     ImmTySdwaDstSel,
130     ImmTySdwaSrc0Sel,
131     ImmTySdwaSrc1Sel,
132     ImmTySdwaDstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTySwizzle,
155     ImmTyGprIdxMode,
156     ImmTyHigh,
157     ImmTyBLGP,
158     ImmTyCBSZ,
159     ImmTyABID,
160     ImmTyEndpgm,
161   };
162 
163   enum ImmKindTy {
164     ImmKindTyNone,
165     ImmKindTyLiteral,
166     ImmKindTyConst,
167   };
168 
169 private:
170   struct TokOp {
171     const char *Data;
172     unsigned Length;
173   };
174 
175   struct ImmOp {
176     int64_t Val;
177     ImmTy Type;
178     bool IsFPImm;
179     mutable ImmKindTy Kind;
180     Modifiers Mods;
181   };
182 
183   struct RegOp {
184     unsigned RegNo;
185     Modifiers Mods;
186   };
187 
188   union {
189     TokOp Tok;
190     ImmOp Imm;
191     RegOp Reg;
192     const MCExpr *Expr;
193   };
194 
195 public:
196   bool isToken() const override {
197     if (Kind == Token)
198       return true;
199 
200     // When parsing operands, we can't always tell if something was meant to be
201     // a token, like 'gds', or an expression that references a global variable.
202     // In this case, we assume the string is an expression, and if we need to
203     // interpret is a token, then we treat the symbol name as the token.
204     return isSymbolRefExpr();
205   }
206 
207   bool isSymbolRefExpr() const {
208     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209   }
210 
211   bool isImm() const override {
212     return Kind == Immediate;
213   }
214 
215   void setImmKindNone() const {
216     assert(isImm());
217     Imm.Kind = ImmKindTyNone;
218   }
219 
220   void setImmKindLiteral() const {
221     assert(isImm());
222     Imm.Kind = ImmKindTyLiteral;
223   }
224 
225   void setImmKindConst() const {
226     assert(isImm());
227     Imm.Kind = ImmKindTyConst;
228   }
229 
230   bool IsImmKindLiteral() const {
231     return isImm() && Imm.Kind == ImmKindTyLiteral;
232   }
233 
234   bool isImmKindConst() const {
235     return isImm() && Imm.Kind == ImmKindTyConst;
236   }
237 
238   bool isInlinableImm(MVT type) const;
239   bool isLiteralImm(MVT type) const;
240 
241   bool isRegKind() const {
242     return Kind == Register;
243   }
244 
245   bool isReg() const override {
246     return isRegKind() && !hasModifiers();
247   }
248 
249   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251   }
252 
253   bool isRegOrImmWithInt16InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255   }
256 
257   bool isRegOrImmWithInt32InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259   }
260 
261   bool isRegOrImmWithInt64InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263   }
264 
265   bool isRegOrImmWithFP16InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267   }
268 
269   bool isRegOrImmWithFP32InputMods() const {
270     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271   }
272 
273   bool isRegOrImmWithFP64InputMods() const {
274     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275   }
276 
277   bool isVReg() const {
278     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279            isRegClass(AMDGPU::VReg_64RegClassID) ||
280            isRegClass(AMDGPU::VReg_96RegClassID) ||
281            isRegClass(AMDGPU::VReg_128RegClassID) ||
282            isRegClass(AMDGPU::VReg_160RegClassID) ||
283            isRegClass(AMDGPU::VReg_192RegClassID) ||
284            isRegClass(AMDGPU::VReg_256RegClassID) ||
285            isRegClass(AMDGPU::VReg_512RegClassID) ||
286            isRegClass(AMDGPU::VReg_1024RegClassID);
287   }
288 
289   bool isVReg32() const {
290     return isRegClass(AMDGPU::VGPR_32RegClassID);
291   }
292 
293   bool isVReg32OrOff() const {
294     return isOff() || isVReg32();
295   }
296 
297   bool isNull() const {
298     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299   }
300 
301   bool isVRegWithInputMods() const;
302 
303   bool isSDWAOperand(MVT type) const;
304   bool isSDWAFP16Operand() const;
305   bool isSDWAFP32Operand() const;
306   bool isSDWAInt16Operand() const;
307   bool isSDWAInt32Operand() const;
308 
309   bool isImmTy(ImmTy ImmT) const {
310     return isImm() && Imm.Type == ImmT;
311   }
312 
313   bool isImmModifier() const {
314     return isImm() && Imm.Type != ImmTyNone;
315   }
316 
317   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
318   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
319   bool isDMask() const { return isImmTy(ImmTyDMask); }
320   bool isDim() const { return isImmTy(ImmTyDim); }
321   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
322   bool isDA() const { return isImmTy(ImmTyDA); }
323   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
324   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
325   bool isLWE() const { return isImmTy(ImmTyLWE); }
326   bool isOff() const { return isImmTy(ImmTyOff); }
327   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
328   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
329   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
330   bool isOffen() const { return isImmTy(ImmTyOffen); }
331   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
332   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
333   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
334   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
335   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336 
337   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
338   bool isGDS() const { return isImmTy(ImmTyGDS); }
339   bool isLDS() const { return isImmTy(ImmTyLDS); }
340   bool isCPol() const { return isImmTy(ImmTyCPol); }
341   bool isSWZ() const { return isImmTy(ImmTySWZ); }
342   bool isTFE() const { return isImmTy(ImmTyTFE); }
343   bool isD16() const { return isImmTy(ImmTyD16); }
344   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
345   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
346   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
347   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
348   bool isFI() const { return isImmTy(ImmTyDppFi); }
349   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
350   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
351   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
352   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
353   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
354   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
355   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
356   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
357   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
358   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
359   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
360   bool isHigh() const { return isImmTy(ImmTyHigh); }
361 
362   bool isMod() const {
363     return isClampSI() || isOModSI();
364   }
365 
366   bool isRegOrImm() const {
367     return isReg() || isImm();
368   }
369 
370   bool isRegClass(unsigned RCID) const;
371 
372   bool isInlineValue() const;
373 
374   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
375     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
376   }
377 
378   bool isSCSrcB16() const {
379     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
380   }
381 
382   bool isSCSrcV2B16() const {
383     return isSCSrcB16();
384   }
385 
386   bool isSCSrcB32() const {
387     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
388   }
389 
390   bool isSCSrcB64() const {
391     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
392   }
393 
394   bool isBoolReg() const;
395 
396   bool isSCSrcF16() const {
397     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
398   }
399 
400   bool isSCSrcV2F16() const {
401     return isSCSrcF16();
402   }
403 
404   bool isSCSrcF32() const {
405     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
406   }
407 
408   bool isSCSrcF64() const {
409     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
410   }
411 
412   bool isSSrcB32() const {
413     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
414   }
415 
416   bool isSSrcB16() const {
417     return isSCSrcB16() || isLiteralImm(MVT::i16);
418   }
419 
420   bool isSSrcV2B16() const {
421     llvm_unreachable("cannot happen");
422     return isSSrcB16();
423   }
424 
425   bool isSSrcB64() const {
426     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
427     // See isVSrc64().
428     return isSCSrcB64() || isLiteralImm(MVT::i64);
429   }
430 
431   bool isSSrcF32() const {
432     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
433   }
434 
435   bool isSSrcF64() const {
436     return isSCSrcB64() || isLiteralImm(MVT::f64);
437   }
438 
439   bool isSSrcF16() const {
440     return isSCSrcB16() || isLiteralImm(MVT::f16);
441   }
442 
443   bool isSSrcV2F16() const {
444     llvm_unreachable("cannot happen");
445     return isSSrcF16();
446   }
447 
448   bool isSSrcV2FP32() const {
449     llvm_unreachable("cannot happen");
450     return isSSrcF32();
451   }
452 
453   bool isSCSrcV2FP32() const {
454     llvm_unreachable("cannot happen");
455     return isSCSrcF32();
456   }
457 
458   bool isSSrcV2INT32() const {
459     llvm_unreachable("cannot happen");
460     return isSSrcB32();
461   }
462 
463   bool isSCSrcV2INT32() const {
464     llvm_unreachable("cannot happen");
465     return isSCSrcB32();
466   }
467 
468   bool isSSrcOrLdsB32() const {
469     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
470            isLiteralImm(MVT::i32) || isExpr();
471   }
472 
473   bool isVCSrcB32() const {
474     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
475   }
476 
477   bool isVCSrcB64() const {
478     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
479   }
480 
481   bool isVCSrcB16() const {
482     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
483   }
484 
485   bool isVCSrcV2B16() const {
486     return isVCSrcB16();
487   }
488 
489   bool isVCSrcF32() const {
490     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
491   }
492 
493   bool isVCSrcF64() const {
494     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
495   }
496 
497   bool isVCSrcF16() const {
498     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
499   }
500 
501   bool isVCSrcV2F16() const {
502     return isVCSrcF16();
503   }
504 
505   bool isVSrcB32() const {
506     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
507   }
508 
509   bool isVSrcB64() const {
510     return isVCSrcF64() || isLiteralImm(MVT::i64);
511   }
512 
513   bool isVSrcB16() const {
514     return isVCSrcB16() || isLiteralImm(MVT::i16);
515   }
516 
517   bool isVSrcV2B16() const {
518     return isVSrcB16() || isLiteralImm(MVT::v2i16);
519   }
520 
521   bool isVCSrcV2FP32() const {
522     return isVCSrcF64();
523   }
524 
525   bool isVSrcV2FP32() const {
526     return isVSrcF64() || isLiteralImm(MVT::v2f32);
527   }
528 
529   bool isVCSrcV2INT32() const {
530     return isVCSrcB64();
531   }
532 
533   bool isVSrcV2INT32() const {
534     return isVSrcB64() || isLiteralImm(MVT::v2i32);
535   }
536 
537   bool isVSrcF32() const {
538     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
539   }
540 
541   bool isVSrcF64() const {
542     return isVCSrcF64() || isLiteralImm(MVT::f64);
543   }
544 
545   bool isVSrcF16() const {
546     return isVCSrcF16() || isLiteralImm(MVT::f16);
547   }
548 
549   bool isVSrcV2F16() const {
550     return isVSrcF16() || isLiteralImm(MVT::v2f16);
551   }
552 
553   bool isVISrcB32() const {
554     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
555   }
556 
557   bool isVISrcB16() const {
558     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
559   }
560 
561   bool isVISrcV2B16() const {
562     return isVISrcB16();
563   }
564 
565   bool isVISrcF32() const {
566     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
567   }
568 
569   bool isVISrcF16() const {
570     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
571   }
572 
573   bool isVISrcV2F16() const {
574     return isVISrcF16() || isVISrcB32();
575   }
576 
577   bool isVISrc_64B64() const {
578     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
579   }
580 
581   bool isVISrc_64F64() const {
582     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
583   }
584 
585   bool isVISrc_64V2FP32() const {
586     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
587   }
588 
589   bool isVISrc_64V2INT32() const {
590     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
591   }
592 
593   bool isVISrc_256B64() const {
594     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
595   }
596 
597   bool isVISrc_256F64() const {
598     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
599   }
600 
601   bool isVISrc_128B16() const {
602     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
603   }
604 
605   bool isVISrc_128V2B16() const {
606     return isVISrc_128B16();
607   }
608 
609   bool isVISrc_128B32() const {
610     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
611   }
612 
613   bool isVISrc_128F32() const {
614     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
615   }
616 
617   bool isVISrc_256V2FP32() const {
618     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
619   }
620 
621   bool isVISrc_256V2INT32() const {
622     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
623   }
624 
625   bool isVISrc_512B32() const {
626     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
627   }
628 
629   bool isVISrc_512B16() const {
630     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
631   }
632 
633   bool isVISrc_512V2B16() const {
634     return isVISrc_512B16();
635   }
636 
637   bool isVISrc_512F32() const {
638     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
639   }
640 
641   bool isVISrc_512F16() const {
642     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
643   }
644 
645   bool isVISrc_512V2F16() const {
646     return isVISrc_512F16() || isVISrc_512B32();
647   }
648 
649   bool isVISrc_1024B32() const {
650     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
651   }
652 
653   bool isVISrc_1024B16() const {
654     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
655   }
656 
657   bool isVISrc_1024V2B16() const {
658     return isVISrc_1024B16();
659   }
660 
661   bool isVISrc_1024F32() const {
662     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
663   }
664 
665   bool isVISrc_1024F16() const {
666     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
667   }
668 
669   bool isVISrc_1024V2F16() const {
670     return isVISrc_1024F16() || isVISrc_1024B32();
671   }
672 
673   bool isAISrcB32() const {
674     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
675   }
676 
677   bool isAISrcB16() const {
678     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
679   }
680 
681   bool isAISrcV2B16() const {
682     return isAISrcB16();
683   }
684 
685   bool isAISrcF32() const {
686     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
687   }
688 
689   bool isAISrcF16() const {
690     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
691   }
692 
693   bool isAISrcV2F16() const {
694     return isAISrcF16() || isAISrcB32();
695   }
696 
697   bool isAISrc_64B64() const {
698     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
699   }
700 
701   bool isAISrc_64F64() const {
702     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
703   }
704 
705   bool isAISrc_128B32() const {
706     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
707   }
708 
709   bool isAISrc_128B16() const {
710     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
711   }
712 
713   bool isAISrc_128V2B16() const {
714     return isAISrc_128B16();
715   }
716 
717   bool isAISrc_128F32() const {
718     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
719   }
720 
721   bool isAISrc_128F16() const {
722     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
723   }
724 
725   bool isAISrc_128V2F16() const {
726     return isAISrc_128F16() || isAISrc_128B32();
727   }
728 
729   bool isVISrc_128F16() const {
730     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
731   }
732 
733   bool isVISrc_128V2F16() const {
734     return isVISrc_128F16() || isVISrc_128B32();
735   }
736 
737   bool isAISrc_256B64() const {
738     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
739   }
740 
741   bool isAISrc_256F64() const {
742     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
743   }
744 
745   bool isAISrc_512B32() const {
746     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
747   }
748 
749   bool isAISrc_512B16() const {
750     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
751   }
752 
753   bool isAISrc_512V2B16() const {
754     return isAISrc_512B16();
755   }
756 
757   bool isAISrc_512F32() const {
758     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
759   }
760 
761   bool isAISrc_512F16() const {
762     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
763   }
764 
765   bool isAISrc_512V2F16() const {
766     return isAISrc_512F16() || isAISrc_512B32();
767   }
768 
769   bool isAISrc_1024B32() const {
770     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
771   }
772 
773   bool isAISrc_1024B16() const {
774     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
775   }
776 
777   bool isAISrc_1024V2B16() const {
778     return isAISrc_1024B16();
779   }
780 
781   bool isAISrc_1024F32() const {
782     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
783   }
784 
785   bool isAISrc_1024F16() const {
786     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
787   }
788 
789   bool isAISrc_1024V2F16() const {
790     return isAISrc_1024F16() || isAISrc_1024B32();
791   }
792 
793   bool isKImmFP32() const {
794     return isLiteralImm(MVT::f32);
795   }
796 
797   bool isKImmFP16() const {
798     return isLiteralImm(MVT::f16);
799   }
800 
801   bool isMem() const override {
802     return false;
803   }
804 
805   bool isExpr() const {
806     return Kind == Expression;
807   }
808 
809   bool isSoppBrTarget() const {
810     return isExpr() || isImm();
811   }
812 
813   bool isSWaitCnt() const;
814   bool isHwreg() const;
815   bool isSendMsg() const;
816   bool isSwizzle() const;
817   bool isSMRDOffset8() const;
818   bool isSMEMOffset() const;
819   bool isSMRDLiteralOffset() const;
820   bool isDPP8() const;
821   bool isDPPCtrl() const;
822   bool isBLGP() const;
823   bool isCBSZ() const;
824   bool isABID() const;
825   bool isGPRIdxMode() const;
826   bool isS16Imm() const;
827   bool isU16Imm() const;
828   bool isEndpgm() const;
829 
830   StringRef getExpressionAsToken() const {
831     assert(isExpr());
832     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
833     return S->getSymbol().getName();
834   }
835 
836   StringRef getToken() const {
837     assert(isToken());
838 
839     if (Kind == Expression)
840       return getExpressionAsToken();
841 
842     return StringRef(Tok.Data, Tok.Length);
843   }
844 
845   int64_t getImm() const {
846     assert(isImm());
847     return Imm.Val;
848   }
849 
850   void setImm(int64_t Val) {
851     assert(isImm());
852     Imm.Val = Val;
853   }
854 
855   ImmTy getImmTy() const {
856     assert(isImm());
857     return Imm.Type;
858   }
859 
860   unsigned getReg() const override {
861     assert(isRegKind());
862     return Reg.RegNo;
863   }
864 
865   SMLoc getStartLoc() const override {
866     return StartLoc;
867   }
868 
869   SMLoc getEndLoc() const override {
870     return EndLoc;
871   }
872 
873   SMRange getLocRange() const {
874     return SMRange(StartLoc, EndLoc);
875   }
876 
877   Modifiers getModifiers() const {
878     assert(isRegKind() || isImmTy(ImmTyNone));
879     return isRegKind() ? Reg.Mods : Imm.Mods;
880   }
881 
882   void setModifiers(Modifiers Mods) {
883     assert(isRegKind() || isImmTy(ImmTyNone));
884     if (isRegKind())
885       Reg.Mods = Mods;
886     else
887       Imm.Mods = Mods;
888   }
889 
890   bool hasModifiers() const {
891     return getModifiers().hasModifiers();
892   }
893 
894   bool hasFPModifiers() const {
895     return getModifiers().hasFPModifiers();
896   }
897 
898   bool hasIntModifiers() const {
899     return getModifiers().hasIntModifiers();
900   }
901 
902   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
903 
904   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
905 
906   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
907 
908   template <unsigned Bitwidth>
909   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
910 
911   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
912     addKImmFPOperands<16>(Inst, N);
913   }
914 
915   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
916     addKImmFPOperands<32>(Inst, N);
917   }
918 
919   void addRegOperands(MCInst &Inst, unsigned N) const;
920 
921   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
922     addRegOperands(Inst, N);
923   }
924 
925   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
926     if (isRegKind())
927       addRegOperands(Inst, N);
928     else if (isExpr())
929       Inst.addOperand(MCOperand::createExpr(Expr));
930     else
931       addImmOperands(Inst, N);
932   }
933 
934   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
935     Modifiers Mods = getModifiers();
936     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
937     if (isRegKind()) {
938       addRegOperands(Inst, N);
939     } else {
940       addImmOperands(Inst, N, false);
941     }
942   }
943 
944   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
945     assert(!hasIntModifiers());
946     addRegOrImmWithInputModsOperands(Inst, N);
947   }
948 
949   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
950     assert(!hasFPModifiers());
951     addRegOrImmWithInputModsOperands(Inst, N);
952   }
953 
954   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
955     Modifiers Mods = getModifiers();
956     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
957     assert(isRegKind());
958     addRegOperands(Inst, N);
959   }
960 
961   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
962     assert(!hasIntModifiers());
963     addRegWithInputModsOperands(Inst, N);
964   }
965 
966   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
967     assert(!hasFPModifiers());
968     addRegWithInputModsOperands(Inst, N);
969   }
970 
971   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
972     if (isImm())
973       addImmOperands(Inst, N);
974     else {
975       assert(isExpr());
976       Inst.addOperand(MCOperand::createExpr(Expr));
977     }
978   }
979 
980   static void printImmTy(raw_ostream& OS, ImmTy Type) {
981     switch (Type) {
982     case ImmTyNone: OS << "None"; break;
983     case ImmTyGDS: OS << "GDS"; break;
984     case ImmTyLDS: OS << "LDS"; break;
985     case ImmTyOffen: OS << "Offen"; break;
986     case ImmTyIdxen: OS << "Idxen"; break;
987     case ImmTyAddr64: OS << "Addr64"; break;
988     case ImmTyOffset: OS << "Offset"; break;
989     case ImmTyInstOffset: OS << "InstOffset"; break;
990     case ImmTyOffset0: OS << "Offset0"; break;
991     case ImmTyOffset1: OS << "Offset1"; break;
992     case ImmTyCPol: OS << "CPol"; break;
993     case ImmTySWZ: OS << "SWZ"; break;
994     case ImmTyTFE: OS << "TFE"; break;
995     case ImmTyD16: OS << "D16"; break;
996     case ImmTyFORMAT: OS << "FORMAT"; break;
997     case ImmTyClampSI: OS << "ClampSI"; break;
998     case ImmTyOModSI: OS << "OModSI"; break;
999     case ImmTyDPP8: OS << "DPP8"; break;
1000     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1001     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1002     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1003     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1004     case ImmTyDppFi: OS << "FI"; break;
1005     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1006     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1007     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1008     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1009     case ImmTyDMask: OS << "DMask"; break;
1010     case ImmTyDim: OS << "Dim"; break;
1011     case ImmTyUNorm: OS << "UNorm"; break;
1012     case ImmTyDA: OS << "DA"; break;
1013     case ImmTyR128A16: OS << "R128A16"; break;
1014     case ImmTyA16: OS << "A16"; break;
1015     case ImmTyLWE: OS << "LWE"; break;
1016     case ImmTyOff: OS << "Off"; break;
1017     case ImmTyExpTgt: OS << "ExpTgt"; break;
1018     case ImmTyExpCompr: OS << "ExpCompr"; break;
1019     case ImmTyExpVM: OS << "ExpVM"; break;
1020     case ImmTyHwreg: OS << "Hwreg"; break;
1021     case ImmTySendMsg: OS << "SendMsg"; break;
1022     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1023     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1024     case ImmTyAttrChan: OS << "AttrChan"; break;
1025     case ImmTyOpSel: OS << "OpSel"; break;
1026     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1027     case ImmTyNegLo: OS << "NegLo"; break;
1028     case ImmTyNegHi: OS << "NegHi"; break;
1029     case ImmTySwizzle: OS << "Swizzle"; break;
1030     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1031     case ImmTyHigh: OS << "High"; break;
1032     case ImmTyBLGP: OS << "BLGP"; break;
1033     case ImmTyCBSZ: OS << "CBSZ"; break;
1034     case ImmTyABID: OS << "ABID"; break;
1035     case ImmTyEndpgm: OS << "Endpgm"; break;
1036     }
1037   }
1038 
1039   void print(raw_ostream &OS) const override {
1040     switch (Kind) {
1041     case Register:
1042       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1043       break;
1044     case Immediate:
1045       OS << '<' << getImm();
1046       if (getImmTy() != ImmTyNone) {
1047         OS << " type: "; printImmTy(OS, getImmTy());
1048       }
1049       OS << " mods: " << Imm.Mods << '>';
1050       break;
1051     case Token:
1052       OS << '\'' << getToken() << '\'';
1053       break;
1054     case Expression:
1055       OS << "<expr " << *Expr << '>';
1056       break;
1057     }
1058   }
1059 
1060   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1061                                       int64_t Val, SMLoc Loc,
1062                                       ImmTy Type = ImmTyNone,
1063                                       bool IsFPImm = false) {
1064     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1065     Op->Imm.Val = Val;
1066     Op->Imm.IsFPImm = IsFPImm;
1067     Op->Imm.Kind = ImmKindTyNone;
1068     Op->Imm.Type = Type;
1069     Op->Imm.Mods = Modifiers();
1070     Op->StartLoc = Loc;
1071     Op->EndLoc = Loc;
1072     return Op;
1073   }
1074 
1075   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1076                                         StringRef Str, SMLoc Loc,
1077                                         bool HasExplicitEncodingSize = true) {
1078     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1079     Res->Tok.Data = Str.data();
1080     Res->Tok.Length = Str.size();
1081     Res->StartLoc = Loc;
1082     Res->EndLoc = Loc;
1083     return Res;
1084   }
1085 
1086   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1087                                       unsigned RegNo, SMLoc S,
1088                                       SMLoc E) {
1089     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1090     Op->Reg.RegNo = RegNo;
1091     Op->Reg.Mods = Modifiers();
1092     Op->StartLoc = S;
1093     Op->EndLoc = E;
1094     return Op;
1095   }
1096 
1097   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1098                                        const class MCExpr *Expr, SMLoc S) {
1099     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1100     Op->Expr = Expr;
1101     Op->StartLoc = S;
1102     Op->EndLoc = S;
1103     return Op;
1104   }
1105 };
1106 
1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1108   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1109   return OS;
1110 }
1111 
1112 //===----------------------------------------------------------------------===//
1113 // AsmParser
1114 //===----------------------------------------------------------------------===//
1115 
1116 // Holds info related to the current kernel, e.g. count of SGPRs used.
1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118 // .amdgpu_hsa_kernel or at EOF.
1119 class KernelScopeInfo {
1120   int SgprIndexUnusedMin = -1;
1121   int VgprIndexUnusedMin = -1;
1122   MCContext *Ctx = nullptr;
1123 
1124   void usesSgprAt(int i) {
1125     if (i >= SgprIndexUnusedMin) {
1126       SgprIndexUnusedMin = ++i;
1127       if (Ctx) {
1128         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1130       }
1131     }
1132   }
1133 
1134   void usesVgprAt(int i) {
1135     if (i >= VgprIndexUnusedMin) {
1136       VgprIndexUnusedMin = ++i;
1137       if (Ctx) {
1138         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1140       }
1141     }
1142   }
1143 
1144 public:
1145   KernelScopeInfo() = default;
1146 
1147   void initialize(MCContext &Context) {
1148     Ctx = &Context;
1149     usesSgprAt(SgprIndexUnusedMin = -1);
1150     usesVgprAt(VgprIndexUnusedMin = -1);
1151   }
1152 
1153   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1154     switch (RegKind) {
1155       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1156       case IS_AGPR: // fall through
1157       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1158       default: break;
1159     }
1160   }
1161 };
1162 
1163 class AMDGPUAsmParser : public MCTargetAsmParser {
1164   MCAsmParser &Parser;
1165 
1166   // Number of extra operands parsed after the first optional operand.
1167   // This may be necessary to skip hardcoded mandatory operands.
1168   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1169 
1170   unsigned ForcedEncodingSize = 0;
1171   bool ForcedDPP = false;
1172   bool ForcedSDWA = false;
1173   KernelScopeInfo KernelScope;
1174   unsigned CPolSeen;
1175 
1176   /// @name Auto-generated Match Functions
1177   /// {
1178 
1179 #define GET_ASSEMBLER_HEADER
1180 #include "AMDGPUGenAsmMatcher.inc"
1181 
1182   /// }
1183 
1184 private:
1185   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1186   bool OutOfRangeError(SMRange Range);
1187   /// Calculate VGPR/SGPR blocks required for given target, reserved
1188   /// registers, and user-specified NextFreeXGPR values.
1189   ///
1190   /// \param Features [in] Target features, used for bug corrections.
1191   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1192   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1193   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1194   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1195   /// descriptor field, if valid.
1196   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1197   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1198   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1199   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1200   /// \param VGPRBlocks [out] Result VGPR block count.
1201   /// \param SGPRBlocks [out] Result SGPR block count.
1202   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1203                           bool FlatScrUsed, bool XNACKUsed,
1204                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1205                           SMRange VGPRRange, unsigned NextFreeSGPR,
1206                           SMRange SGPRRange, unsigned &VGPRBlocks,
1207                           unsigned &SGPRBlocks);
1208   bool ParseDirectiveAMDGCNTarget();
1209   bool ParseDirectiveAMDHSAKernel();
1210   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1211   bool ParseDirectiveHSACodeObjectVersion();
1212   bool ParseDirectiveHSACodeObjectISA();
1213   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1214   bool ParseDirectiveAMDKernelCodeT();
1215   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1216   bool ParseDirectiveAMDGPUHsaKernel();
1217 
1218   bool ParseDirectiveISAVersion();
1219   bool ParseDirectiveHSAMetadata();
1220   bool ParseDirectivePALMetadataBegin();
1221   bool ParseDirectivePALMetadata();
1222   bool ParseDirectiveAMDGPULDS();
1223 
1224   /// Common code to parse out a block of text (typically YAML) between start and
1225   /// end directives.
1226   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1227                            const char *AssemblerDirectiveEnd,
1228                            std::string &CollectString);
1229 
1230   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1231                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1232   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1233                            unsigned &RegNum, unsigned &RegWidth,
1234                            bool RestoreOnFailure = false);
1235   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1236                            unsigned &RegNum, unsigned &RegWidth,
1237                            SmallVectorImpl<AsmToken> &Tokens);
1238   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1239                            unsigned &RegWidth,
1240                            SmallVectorImpl<AsmToken> &Tokens);
1241   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1242                            unsigned &RegWidth,
1243                            SmallVectorImpl<AsmToken> &Tokens);
1244   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1245                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1246   bool ParseRegRange(unsigned& Num, unsigned& Width);
1247   unsigned getRegularReg(RegisterKind RegKind,
1248                          unsigned RegNum,
1249                          unsigned RegWidth,
1250                          SMLoc Loc);
1251 
1252   bool isRegister();
1253   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1254   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1255   void initializeGprCountSymbol(RegisterKind RegKind);
1256   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1257                              unsigned RegWidth);
1258   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1259                     bool IsAtomic, bool IsLds = false);
1260   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1261                  bool IsGdsHardcoded);
1262 
1263 public:
1264   enum AMDGPUMatchResultTy {
1265     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1266   };
1267   enum OperandMode {
1268     OperandMode_Default,
1269     OperandMode_NSA,
1270   };
1271 
1272   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1273 
1274   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1275                const MCInstrInfo &MII,
1276                const MCTargetOptions &Options)
1277       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1278     MCAsmParserExtension::Initialize(Parser);
1279 
1280     if (getFeatureBits().none()) {
1281       // Set default features.
1282       copySTI().ToggleFeature("southern-islands");
1283     }
1284 
1285     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1286 
1287     {
1288       // TODO: make those pre-defined variables read-only.
1289       // Currently there is none suitable machinery in the core llvm-mc for this.
1290       // MCSymbol::isRedefinable is intended for another purpose, and
1291       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1292       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1293       MCContext &Ctx = getContext();
1294       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1295         MCSymbol *Sym =
1296             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1297         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1298         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1299         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1300         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1301         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1302       } else {
1303         MCSymbol *Sym =
1304             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1305         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1306         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1307         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1308         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1309         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1310       }
1311       if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1312         initializeGprCountSymbol(IS_VGPR);
1313         initializeGprCountSymbol(IS_SGPR);
1314       } else
1315         KernelScope.initialize(getContext());
1316     }
1317   }
1318 
1319   bool hasXNACK() const {
1320     return AMDGPU::hasXNACK(getSTI());
1321   }
1322 
1323   bool hasMIMG_R128() const {
1324     return AMDGPU::hasMIMG_R128(getSTI());
1325   }
1326 
1327   bool hasPackedD16() const {
1328     return AMDGPU::hasPackedD16(getSTI());
1329   }
1330 
1331   bool hasGFX10A16() const {
1332     return AMDGPU::hasGFX10A16(getSTI());
1333   }
1334 
1335   bool isSI() const {
1336     return AMDGPU::isSI(getSTI());
1337   }
1338 
1339   bool isCI() const {
1340     return AMDGPU::isCI(getSTI());
1341   }
1342 
1343   bool isVI() const {
1344     return AMDGPU::isVI(getSTI());
1345   }
1346 
1347   bool isGFX9() const {
1348     return AMDGPU::isGFX9(getSTI());
1349   }
1350 
1351   bool isGFX90A() const {
1352     return AMDGPU::isGFX90A(getSTI());
1353   }
1354 
1355   bool isGFX9Plus() const {
1356     return AMDGPU::isGFX9Plus(getSTI());
1357   }
1358 
1359   bool isGFX10() const {
1360     return AMDGPU::isGFX10(getSTI());
1361   }
1362 
1363   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1364 
1365   bool isGFX10_BEncoding() const {
1366     return AMDGPU::isGFX10_BEncoding(getSTI());
1367   }
1368 
1369   bool hasInv2PiInlineImm() const {
1370     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1371   }
1372 
1373   bool hasFlatOffsets() const {
1374     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1375   }
1376 
1377   bool hasSGPR102_SGPR103() const {
1378     return !isVI() && !isGFX9();
1379   }
1380 
1381   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1382 
1383   bool hasIntClamp() const {
1384     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1385   }
1386 
1387   AMDGPUTargetStreamer &getTargetStreamer() {
1388     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1389     return static_cast<AMDGPUTargetStreamer &>(TS);
1390   }
1391 
1392   const MCRegisterInfo *getMRI() const {
1393     // We need this const_cast because for some reason getContext() is not const
1394     // in MCAsmParser.
1395     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1396   }
1397 
1398   const MCInstrInfo *getMII() const {
1399     return &MII;
1400   }
1401 
1402   const FeatureBitset &getFeatureBits() const {
1403     return getSTI().getFeatureBits();
1404   }
1405 
1406   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1407   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1408   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1409 
1410   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1411   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1412   bool isForcedDPP() const { return ForcedDPP; }
1413   bool isForcedSDWA() const { return ForcedSDWA; }
1414   ArrayRef<unsigned> getMatchedVariants() const;
1415   StringRef getMatchedVariantName() const;
1416 
1417   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1418   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1419                      bool RestoreOnFailure);
1420   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1421   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1422                                         SMLoc &EndLoc) override;
1423   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1424   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1425                                       unsigned Kind) override;
1426   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1427                                OperandVector &Operands, MCStreamer &Out,
1428                                uint64_t &ErrorInfo,
1429                                bool MatchingInlineAsm) override;
1430   bool ParseDirective(AsmToken DirectiveID) override;
1431   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1432                                     OperandMode Mode = OperandMode_Default);
1433   StringRef parseMnemonicSuffix(StringRef Name);
1434   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1435                         SMLoc NameLoc, OperandVector &Operands) override;
1436   //bool ProcessInstruction(MCInst &Inst);
1437 
1438   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1439 
1440   OperandMatchResultTy
1441   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1442                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1443                      bool (*ConvertResult)(int64_t &) = nullptr);
1444 
1445   OperandMatchResultTy
1446   parseOperandArrayWithPrefix(const char *Prefix,
1447                               OperandVector &Operands,
1448                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1449                               bool (*ConvertResult)(int64_t&) = nullptr);
1450 
1451   OperandMatchResultTy
1452   parseNamedBit(StringRef Name, OperandVector &Operands,
1453                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1454   OperandMatchResultTy parseCPol(OperandVector &Operands);
1455   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1456                                              StringRef &Value,
1457                                              SMLoc &StringLoc);
1458 
1459   bool isModifier();
1460   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1461   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1462   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1463   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1464   bool parseSP3NegModifier();
1465   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1466   OperandMatchResultTy parseReg(OperandVector &Operands);
1467   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1468   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1469   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1470   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1471   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1472   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1473   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1474   OperandMatchResultTy parseUfmt(int64_t &Format);
1475   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1476   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1477   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1478   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1479   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1480   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1481   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1482 
1483   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1484   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1485   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1486   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1487 
1488   bool parseCnt(int64_t &IntVal);
1489   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1490   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1491 
1492 private:
1493   struct OperandInfoTy {
1494     SMLoc Loc;
1495     int64_t Id;
1496     bool IsSymbolic = false;
1497     bool IsDefined = false;
1498 
1499     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1500   };
1501 
1502   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1503   bool validateSendMsg(const OperandInfoTy &Msg,
1504                        const OperandInfoTy &Op,
1505                        const OperandInfoTy &Stream);
1506 
1507   bool parseHwregBody(OperandInfoTy &HwReg,
1508                       OperandInfoTy &Offset,
1509                       OperandInfoTy &Width);
1510   bool validateHwreg(const OperandInfoTy &HwReg,
1511                      const OperandInfoTy &Offset,
1512                      const OperandInfoTy &Width);
1513 
1514   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1515   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1516 
1517   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1518                       const OperandVector &Operands) const;
1519   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1520   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1521   SMLoc getLitLoc(const OperandVector &Operands) const;
1522   SMLoc getConstLoc(const OperandVector &Operands) const;
1523 
1524   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1525   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1526   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1527   bool validateSOPLiteral(const MCInst &Inst) const;
1528   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1529   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1530   bool validateIntClampSupported(const MCInst &Inst);
1531   bool validateMIMGAtomicDMask(const MCInst &Inst);
1532   bool validateMIMGGatherDMask(const MCInst &Inst);
1533   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1534   bool validateMIMGDataSize(const MCInst &Inst);
1535   bool validateMIMGAddrSize(const MCInst &Inst);
1536   bool validateMIMGD16(const MCInst &Inst);
1537   bool validateMIMGDim(const MCInst &Inst);
1538   bool validateMIMGMSAA(const MCInst &Inst);
1539   bool validateOpSel(const MCInst &Inst);
1540   bool validateVccOperand(unsigned Reg) const;
1541   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1542   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1543   bool validateAGPRLdSt(const MCInst &Inst) const;
1544   bool validateVGPRAlign(const MCInst &Inst) const;
1545   bool validateDivScale(const MCInst &Inst);
1546   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1547                              const SMLoc &IDLoc);
1548   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1549   unsigned getConstantBusLimit(unsigned Opcode) const;
1550   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1551   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1552   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1553 
1554   bool isSupportedMnemo(StringRef Mnemo,
1555                         const FeatureBitset &FBS);
1556   bool isSupportedMnemo(StringRef Mnemo,
1557                         const FeatureBitset &FBS,
1558                         ArrayRef<unsigned> Variants);
1559   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1560 
1561   bool isId(const StringRef Id) const;
1562   bool isId(const AsmToken &Token, const StringRef Id) const;
1563   bool isToken(const AsmToken::TokenKind Kind) const;
1564   bool trySkipId(const StringRef Id);
1565   bool trySkipId(const StringRef Pref, const StringRef Id);
1566   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1567   bool trySkipToken(const AsmToken::TokenKind Kind);
1568   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1569   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1570   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1571 
1572   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1573   AsmToken::TokenKind getTokenKind() const;
1574   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1575   bool parseExpr(OperandVector &Operands);
1576   StringRef getTokenStr() const;
1577   AsmToken peekToken();
1578   AsmToken getToken() const;
1579   SMLoc getLoc() const;
1580   void lex();
1581 
1582 public:
1583   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1584   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1585 
1586   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1587   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1588   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1589   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1590   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1591   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1592 
1593   bool parseSwizzleOperand(int64_t &Op,
1594                            const unsigned MinVal,
1595                            const unsigned MaxVal,
1596                            const StringRef ErrMsg,
1597                            SMLoc &Loc);
1598   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1599                             const unsigned MinVal,
1600                             const unsigned MaxVal,
1601                             const StringRef ErrMsg);
1602   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1603   bool parseSwizzleOffset(int64_t &Imm);
1604   bool parseSwizzleMacro(int64_t &Imm);
1605   bool parseSwizzleQuadPerm(int64_t &Imm);
1606   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1607   bool parseSwizzleBroadcast(int64_t &Imm);
1608   bool parseSwizzleSwap(int64_t &Imm);
1609   bool parseSwizzleReverse(int64_t &Imm);
1610 
1611   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1612   int64_t parseGPRIdxMacro();
1613 
1614   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1615   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1616   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1617   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1618 
1619   AMDGPUOperand::Ptr defaultCPol() const;
1620 
1621   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1622   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1623   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1624   AMDGPUOperand::Ptr defaultFlatOffset() const;
1625 
1626   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1627 
1628   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1629                OptionalImmIndexMap &OptionalIdx);
1630   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1631   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1632   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1633 
1634   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1635 
1636   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1637                bool IsAtomic = false);
1638   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1639   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1640 
1641   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1642 
1643   bool parseDimId(unsigned &Encoding);
1644   OperandMatchResultTy parseDim(OperandVector &Operands);
1645   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1646   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1647   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1648   int64_t parseDPPCtrlSel(StringRef Ctrl);
1649   int64_t parseDPPCtrlPerm();
1650   AMDGPUOperand::Ptr defaultRowMask() const;
1651   AMDGPUOperand::Ptr defaultBankMask() const;
1652   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1653   AMDGPUOperand::Ptr defaultFI() const;
1654   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1655   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1656 
1657   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1658                                     AMDGPUOperand::ImmTy Type);
1659   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1660   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1661   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1662   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1663   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1664   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1665   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1666                uint64_t BasicInstType,
1667                bool SkipDstVcc = false,
1668                bool SkipSrcVcc = false);
1669 
1670   AMDGPUOperand::Ptr defaultBLGP() const;
1671   AMDGPUOperand::Ptr defaultCBSZ() const;
1672   AMDGPUOperand::Ptr defaultABID() const;
1673 
1674   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1675   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1676 };
1677 
1678 struct OptionalOperand {
1679   const char *Name;
1680   AMDGPUOperand::ImmTy Type;
1681   bool IsBit;
1682   bool (*ConvertResult)(int64_t&);
1683 };
1684 
1685 } // end anonymous namespace
1686 
1687 // May be called with integer type with equivalent bitwidth.
1688 static const fltSemantics *getFltSemantics(unsigned Size) {
1689   switch (Size) {
1690   case 4:
1691     return &APFloat::IEEEsingle();
1692   case 8:
1693     return &APFloat::IEEEdouble();
1694   case 2:
1695     return &APFloat::IEEEhalf();
1696   default:
1697     llvm_unreachable("unsupported fp type");
1698   }
1699 }
1700 
1701 static const fltSemantics *getFltSemantics(MVT VT) {
1702   return getFltSemantics(VT.getSizeInBits() / 8);
1703 }
1704 
1705 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1706   switch (OperandType) {
1707   case AMDGPU::OPERAND_REG_IMM_INT32:
1708   case AMDGPU::OPERAND_REG_IMM_FP32:
1709   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1710   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1711   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1712   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1713   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1714   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1715   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1716   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1717     return &APFloat::IEEEsingle();
1718   case AMDGPU::OPERAND_REG_IMM_INT64:
1719   case AMDGPU::OPERAND_REG_IMM_FP64:
1720   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1721   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1722   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1723     return &APFloat::IEEEdouble();
1724   case AMDGPU::OPERAND_REG_IMM_INT16:
1725   case AMDGPU::OPERAND_REG_IMM_FP16:
1726   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1727   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1728   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1729   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1730   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1731   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1732   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1733   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1734   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1735   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1736     return &APFloat::IEEEhalf();
1737   default:
1738     llvm_unreachable("unsupported fp type");
1739   }
1740 }
1741 
1742 //===----------------------------------------------------------------------===//
1743 // Operand
1744 //===----------------------------------------------------------------------===//
1745 
1746 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1747   bool Lost;
1748 
1749   // Convert literal to single precision
1750   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1751                                                APFloat::rmNearestTiesToEven,
1752                                                &Lost);
1753   // We allow precision lost but not overflow or underflow
1754   if (Status != APFloat::opOK &&
1755       Lost &&
1756       ((Status & APFloat::opOverflow)  != 0 ||
1757        (Status & APFloat::opUnderflow) != 0)) {
1758     return false;
1759   }
1760 
1761   return true;
1762 }
1763 
1764 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1765   return isUIntN(Size, Val) || isIntN(Size, Val);
1766 }
1767 
1768 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1769   if (VT.getScalarType() == MVT::i16) {
1770     // FP immediate values are broken.
1771     return isInlinableIntLiteral(Val);
1772   }
1773 
1774   // f16/v2f16 operands work correctly for all values.
1775   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1776 }
1777 
1778 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1779 
1780   // This is a hack to enable named inline values like
1781   // shared_base with both 32-bit and 64-bit operands.
1782   // Note that these values are defined as
1783   // 32-bit operands only.
1784   if (isInlineValue()) {
1785     return true;
1786   }
1787 
1788   if (!isImmTy(ImmTyNone)) {
1789     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1790     return false;
1791   }
1792   // TODO: We should avoid using host float here. It would be better to
1793   // check the float bit values which is what a few other places do.
1794   // We've had bot failures before due to weird NaN support on mips hosts.
1795 
1796   APInt Literal(64, Imm.Val);
1797 
1798   if (Imm.IsFPImm) { // We got fp literal token
1799     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1800       return AMDGPU::isInlinableLiteral64(Imm.Val,
1801                                           AsmParser->hasInv2PiInlineImm());
1802     }
1803 
1804     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1805     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1806       return false;
1807 
1808     if (type.getScalarSizeInBits() == 16) {
1809       return isInlineableLiteralOp16(
1810         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1811         type, AsmParser->hasInv2PiInlineImm());
1812     }
1813 
1814     // Check if single precision literal is inlinable
1815     return AMDGPU::isInlinableLiteral32(
1816       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1817       AsmParser->hasInv2PiInlineImm());
1818   }
1819 
1820   // We got int literal token.
1821   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1822     return AMDGPU::isInlinableLiteral64(Imm.Val,
1823                                         AsmParser->hasInv2PiInlineImm());
1824   }
1825 
1826   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1827     return false;
1828   }
1829 
1830   if (type.getScalarSizeInBits() == 16) {
1831     return isInlineableLiteralOp16(
1832       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1833       type, AsmParser->hasInv2PiInlineImm());
1834   }
1835 
1836   return AMDGPU::isInlinableLiteral32(
1837     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1838     AsmParser->hasInv2PiInlineImm());
1839 }
1840 
1841 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1842   // Check that this immediate can be added as literal
1843   if (!isImmTy(ImmTyNone)) {
1844     return false;
1845   }
1846 
1847   if (!Imm.IsFPImm) {
1848     // We got int literal token.
1849 
1850     if (type == MVT::f64 && hasFPModifiers()) {
1851       // Cannot apply fp modifiers to int literals preserving the same semantics
1852       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1853       // disable these cases.
1854       return false;
1855     }
1856 
1857     unsigned Size = type.getSizeInBits();
1858     if (Size == 64)
1859       Size = 32;
1860 
1861     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1862     // types.
1863     return isSafeTruncation(Imm.Val, Size);
1864   }
1865 
1866   // We got fp literal token
1867   if (type == MVT::f64) { // Expected 64-bit fp operand
1868     // We would set low 64-bits of literal to zeroes but we accept this literals
1869     return true;
1870   }
1871 
1872   if (type == MVT::i64) { // Expected 64-bit int operand
1873     // We don't allow fp literals in 64-bit integer instructions. It is
1874     // unclear how we should encode them.
1875     return false;
1876   }
1877 
1878   // We allow fp literals with f16x2 operands assuming that the specified
1879   // literal goes into the lower half and the upper half is zero. We also
1880   // require that the literal may be losslesly converted to f16.
1881   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1882                      (type == MVT::v2i16)? MVT::i16 :
1883                      (type == MVT::v2f32)? MVT::f32 : type;
1884 
1885   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1886   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1887 }
1888 
1889 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1890   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1891 }
1892 
1893 bool AMDGPUOperand::isVRegWithInputMods() const {
1894   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1895          // GFX90A allows DPP on 64-bit operands.
1896          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1897           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1898 }
1899 
1900 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1901   if (AsmParser->isVI())
1902     return isVReg32();
1903   else if (AsmParser->isGFX9Plus())
1904     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1905   else
1906     return false;
1907 }
1908 
1909 bool AMDGPUOperand::isSDWAFP16Operand() const {
1910   return isSDWAOperand(MVT::f16);
1911 }
1912 
1913 bool AMDGPUOperand::isSDWAFP32Operand() const {
1914   return isSDWAOperand(MVT::f32);
1915 }
1916 
1917 bool AMDGPUOperand::isSDWAInt16Operand() const {
1918   return isSDWAOperand(MVT::i16);
1919 }
1920 
1921 bool AMDGPUOperand::isSDWAInt32Operand() const {
1922   return isSDWAOperand(MVT::i32);
1923 }
1924 
1925 bool AMDGPUOperand::isBoolReg() const {
1926   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1927          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1928 }
1929 
1930 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1931 {
1932   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1933   assert(Size == 2 || Size == 4 || Size == 8);
1934 
1935   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1936 
1937   if (Imm.Mods.Abs) {
1938     Val &= ~FpSignMask;
1939   }
1940   if (Imm.Mods.Neg) {
1941     Val ^= FpSignMask;
1942   }
1943 
1944   return Val;
1945 }
1946 
1947 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1948   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1949                              Inst.getNumOperands())) {
1950     addLiteralImmOperand(Inst, Imm.Val,
1951                          ApplyModifiers &
1952                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1953   } else {
1954     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1955     Inst.addOperand(MCOperand::createImm(Imm.Val));
1956     setImmKindNone();
1957   }
1958 }
1959 
1960 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1961   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1962   auto OpNum = Inst.getNumOperands();
1963   // Check that this operand accepts literals
1964   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1965 
1966   if (ApplyModifiers) {
1967     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1968     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1969     Val = applyInputFPModifiers(Val, Size);
1970   }
1971 
1972   APInt Literal(64, Val);
1973   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1974 
1975   if (Imm.IsFPImm) { // We got fp literal token
1976     switch (OpTy) {
1977     case AMDGPU::OPERAND_REG_IMM_INT64:
1978     case AMDGPU::OPERAND_REG_IMM_FP64:
1979     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1980     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1981     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1982       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1983                                        AsmParser->hasInv2PiInlineImm())) {
1984         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1985         setImmKindConst();
1986         return;
1987       }
1988 
1989       // Non-inlineable
1990       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1991         // For fp operands we check if low 32 bits are zeros
1992         if (Literal.getLoBits(32) != 0) {
1993           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1994           "Can't encode literal as exact 64-bit floating-point operand. "
1995           "Low 32-bits will be set to zero");
1996         }
1997 
1998         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1999         setImmKindLiteral();
2000         return;
2001       }
2002 
2003       // We don't allow fp literals in 64-bit integer instructions. It is
2004       // unclear how we should encode them. This case should be checked earlier
2005       // in predicate methods (isLiteralImm())
2006       llvm_unreachable("fp literal in 64-bit integer instruction.");
2007 
2008     case AMDGPU::OPERAND_REG_IMM_INT32:
2009     case AMDGPU::OPERAND_REG_IMM_FP32:
2010     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2011     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2012     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2013     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2014     case AMDGPU::OPERAND_REG_IMM_INT16:
2015     case AMDGPU::OPERAND_REG_IMM_FP16:
2016     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2017     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2018     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2019     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2020     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2021     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2022     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2023     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2024     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2025     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2026     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2027     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2028     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2029     case AMDGPU::OPERAND_REG_IMM_V2INT32: {
2030       bool lost;
2031       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2032       // Convert literal to single precision
2033       FPLiteral.convert(*getOpFltSemantics(OpTy),
2034                         APFloat::rmNearestTiesToEven, &lost);
2035       // We allow precision lost but not overflow or underflow. This should be
2036       // checked earlier in isLiteralImm()
2037 
2038       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2039       Inst.addOperand(MCOperand::createImm(ImmVal));
2040       setImmKindLiteral();
2041       return;
2042     }
2043     default:
2044       llvm_unreachable("invalid operand size");
2045     }
2046 
2047     return;
2048   }
2049 
2050   // We got int literal token.
2051   // Only sign extend inline immediates.
2052   switch (OpTy) {
2053   case AMDGPU::OPERAND_REG_IMM_INT32:
2054   case AMDGPU::OPERAND_REG_IMM_FP32:
2055   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2056   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2057   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2058   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2059   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2060   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2061   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2062   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2063   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2064   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2065     if (isSafeTruncation(Val, 32) &&
2066         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2067                                      AsmParser->hasInv2PiInlineImm())) {
2068       Inst.addOperand(MCOperand::createImm(Val));
2069       setImmKindConst();
2070       return;
2071     }
2072 
2073     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2074     setImmKindLiteral();
2075     return;
2076 
2077   case AMDGPU::OPERAND_REG_IMM_INT64:
2078   case AMDGPU::OPERAND_REG_IMM_FP64:
2079   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2080   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2081   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2082     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2083       Inst.addOperand(MCOperand::createImm(Val));
2084       setImmKindConst();
2085       return;
2086     }
2087 
2088     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2089     setImmKindLiteral();
2090     return;
2091 
2092   case AMDGPU::OPERAND_REG_IMM_INT16:
2093   case AMDGPU::OPERAND_REG_IMM_FP16:
2094   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2095   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2096   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2097   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2098     if (isSafeTruncation(Val, 16) &&
2099         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2100                                      AsmParser->hasInv2PiInlineImm())) {
2101       Inst.addOperand(MCOperand::createImm(Val));
2102       setImmKindConst();
2103       return;
2104     }
2105 
2106     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2107     setImmKindLiteral();
2108     return;
2109 
2110   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2111   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2112   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2113   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2114     assert(isSafeTruncation(Val, 16));
2115     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2116                                         AsmParser->hasInv2PiInlineImm()));
2117 
2118     Inst.addOperand(MCOperand::createImm(Val));
2119     return;
2120   }
2121   default:
2122     llvm_unreachable("invalid operand size");
2123   }
2124 }
2125 
2126 template <unsigned Bitwidth>
2127 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2128   APInt Literal(64, Imm.Val);
2129   setImmKindNone();
2130 
2131   if (!Imm.IsFPImm) {
2132     // We got int literal token.
2133     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2134     return;
2135   }
2136 
2137   bool Lost;
2138   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2139   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2140                     APFloat::rmNearestTiesToEven, &Lost);
2141   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2142 }
2143 
2144 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2145   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2146 }
2147 
2148 static bool isInlineValue(unsigned Reg) {
2149   switch (Reg) {
2150   case AMDGPU::SRC_SHARED_BASE:
2151   case AMDGPU::SRC_SHARED_LIMIT:
2152   case AMDGPU::SRC_PRIVATE_BASE:
2153   case AMDGPU::SRC_PRIVATE_LIMIT:
2154   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2155     return true;
2156   case AMDGPU::SRC_VCCZ:
2157   case AMDGPU::SRC_EXECZ:
2158   case AMDGPU::SRC_SCC:
2159     return true;
2160   case AMDGPU::SGPR_NULL:
2161     return true;
2162   default:
2163     return false;
2164   }
2165 }
2166 
2167 bool AMDGPUOperand::isInlineValue() const {
2168   return isRegKind() && ::isInlineValue(getReg());
2169 }
2170 
2171 //===----------------------------------------------------------------------===//
2172 // AsmParser
2173 //===----------------------------------------------------------------------===//
2174 
2175 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2176   if (Is == IS_VGPR) {
2177     switch (RegWidth) {
2178       default: return -1;
2179       case 1: return AMDGPU::VGPR_32RegClassID;
2180       case 2: return AMDGPU::VReg_64RegClassID;
2181       case 3: return AMDGPU::VReg_96RegClassID;
2182       case 4: return AMDGPU::VReg_128RegClassID;
2183       case 5: return AMDGPU::VReg_160RegClassID;
2184       case 6: return AMDGPU::VReg_192RegClassID;
2185       case 8: return AMDGPU::VReg_256RegClassID;
2186       case 16: return AMDGPU::VReg_512RegClassID;
2187       case 32: return AMDGPU::VReg_1024RegClassID;
2188     }
2189   } else if (Is == IS_TTMP) {
2190     switch (RegWidth) {
2191       default: return -1;
2192       case 1: return AMDGPU::TTMP_32RegClassID;
2193       case 2: return AMDGPU::TTMP_64RegClassID;
2194       case 4: return AMDGPU::TTMP_128RegClassID;
2195       case 8: return AMDGPU::TTMP_256RegClassID;
2196       case 16: return AMDGPU::TTMP_512RegClassID;
2197     }
2198   } else if (Is == IS_SGPR) {
2199     switch (RegWidth) {
2200       default: return -1;
2201       case 1: return AMDGPU::SGPR_32RegClassID;
2202       case 2: return AMDGPU::SGPR_64RegClassID;
2203       case 3: return AMDGPU::SGPR_96RegClassID;
2204       case 4: return AMDGPU::SGPR_128RegClassID;
2205       case 5: return AMDGPU::SGPR_160RegClassID;
2206       case 6: return AMDGPU::SGPR_192RegClassID;
2207       case 8: return AMDGPU::SGPR_256RegClassID;
2208       case 16: return AMDGPU::SGPR_512RegClassID;
2209     }
2210   } else if (Is == IS_AGPR) {
2211     switch (RegWidth) {
2212       default: return -1;
2213       case 1: return AMDGPU::AGPR_32RegClassID;
2214       case 2: return AMDGPU::AReg_64RegClassID;
2215       case 3: return AMDGPU::AReg_96RegClassID;
2216       case 4: return AMDGPU::AReg_128RegClassID;
2217       case 5: return AMDGPU::AReg_160RegClassID;
2218       case 6: return AMDGPU::AReg_192RegClassID;
2219       case 8: return AMDGPU::AReg_256RegClassID;
2220       case 16: return AMDGPU::AReg_512RegClassID;
2221       case 32: return AMDGPU::AReg_1024RegClassID;
2222     }
2223   }
2224   return -1;
2225 }
2226 
2227 static unsigned getSpecialRegForName(StringRef RegName) {
2228   return StringSwitch<unsigned>(RegName)
2229     .Case("exec", AMDGPU::EXEC)
2230     .Case("vcc", AMDGPU::VCC)
2231     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2232     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2233     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2234     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2235     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2236     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2237     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2238     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2239     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2240     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2241     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2242     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2243     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2244     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2245     .Case("m0", AMDGPU::M0)
2246     .Case("vccz", AMDGPU::SRC_VCCZ)
2247     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2248     .Case("execz", AMDGPU::SRC_EXECZ)
2249     .Case("src_execz", AMDGPU::SRC_EXECZ)
2250     .Case("scc", AMDGPU::SRC_SCC)
2251     .Case("src_scc", AMDGPU::SRC_SCC)
2252     .Case("tba", AMDGPU::TBA)
2253     .Case("tma", AMDGPU::TMA)
2254     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2255     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2256     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2257     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2258     .Case("vcc_lo", AMDGPU::VCC_LO)
2259     .Case("vcc_hi", AMDGPU::VCC_HI)
2260     .Case("exec_lo", AMDGPU::EXEC_LO)
2261     .Case("exec_hi", AMDGPU::EXEC_HI)
2262     .Case("tma_lo", AMDGPU::TMA_LO)
2263     .Case("tma_hi", AMDGPU::TMA_HI)
2264     .Case("tba_lo", AMDGPU::TBA_LO)
2265     .Case("tba_hi", AMDGPU::TBA_HI)
2266     .Case("pc", AMDGPU::PC_REG)
2267     .Case("null", AMDGPU::SGPR_NULL)
2268     .Default(AMDGPU::NoRegister);
2269 }
2270 
2271 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2272                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2273   auto R = parseRegister();
2274   if (!R) return true;
2275   assert(R->isReg());
2276   RegNo = R->getReg();
2277   StartLoc = R->getStartLoc();
2278   EndLoc = R->getEndLoc();
2279   return false;
2280 }
2281 
2282 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2283                                     SMLoc &EndLoc) {
2284   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2285 }
2286 
2287 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2288                                                        SMLoc &StartLoc,
2289                                                        SMLoc &EndLoc) {
2290   bool Result =
2291       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2292   bool PendingErrors = getParser().hasPendingError();
2293   getParser().clearPendingErrors();
2294   if (PendingErrors)
2295     return MatchOperand_ParseFail;
2296   if (Result)
2297     return MatchOperand_NoMatch;
2298   return MatchOperand_Success;
2299 }
2300 
2301 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2302                                             RegisterKind RegKind, unsigned Reg1,
2303                                             SMLoc Loc) {
2304   switch (RegKind) {
2305   case IS_SPECIAL:
2306     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2307       Reg = AMDGPU::EXEC;
2308       RegWidth = 2;
2309       return true;
2310     }
2311     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2312       Reg = AMDGPU::FLAT_SCR;
2313       RegWidth = 2;
2314       return true;
2315     }
2316     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2317       Reg = AMDGPU::XNACK_MASK;
2318       RegWidth = 2;
2319       return true;
2320     }
2321     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2322       Reg = AMDGPU::VCC;
2323       RegWidth = 2;
2324       return true;
2325     }
2326     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2327       Reg = AMDGPU::TBA;
2328       RegWidth = 2;
2329       return true;
2330     }
2331     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2332       Reg = AMDGPU::TMA;
2333       RegWidth = 2;
2334       return true;
2335     }
2336     Error(Loc, "register does not fit in the list");
2337     return false;
2338   case IS_VGPR:
2339   case IS_SGPR:
2340   case IS_AGPR:
2341   case IS_TTMP:
2342     if (Reg1 != Reg + RegWidth) {
2343       Error(Loc, "registers in a list must have consecutive indices");
2344       return false;
2345     }
2346     RegWidth++;
2347     return true;
2348   default:
2349     llvm_unreachable("unexpected register kind");
2350   }
2351 }
2352 
2353 struct RegInfo {
2354   StringLiteral Name;
2355   RegisterKind Kind;
2356 };
2357 
2358 static constexpr RegInfo RegularRegisters[] = {
2359   {{"v"},    IS_VGPR},
2360   {{"s"},    IS_SGPR},
2361   {{"ttmp"}, IS_TTMP},
2362   {{"acc"},  IS_AGPR},
2363   {{"a"},    IS_AGPR},
2364 };
2365 
2366 static bool isRegularReg(RegisterKind Kind) {
2367   return Kind == IS_VGPR ||
2368          Kind == IS_SGPR ||
2369          Kind == IS_TTMP ||
2370          Kind == IS_AGPR;
2371 }
2372 
2373 static const RegInfo* getRegularRegInfo(StringRef Str) {
2374   for (const RegInfo &Reg : RegularRegisters)
2375     if (Str.startswith(Reg.Name))
2376       return &Reg;
2377   return nullptr;
2378 }
2379 
2380 static bool getRegNum(StringRef Str, unsigned& Num) {
2381   return !Str.getAsInteger(10, Num);
2382 }
2383 
2384 bool
2385 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2386                             const AsmToken &NextToken) const {
2387 
2388   // A list of consecutive registers: [s0,s1,s2,s3]
2389   if (Token.is(AsmToken::LBrac))
2390     return true;
2391 
2392   if (!Token.is(AsmToken::Identifier))
2393     return false;
2394 
2395   // A single register like s0 or a range of registers like s[0:1]
2396 
2397   StringRef Str = Token.getString();
2398   const RegInfo *Reg = getRegularRegInfo(Str);
2399   if (Reg) {
2400     StringRef RegName = Reg->Name;
2401     StringRef RegSuffix = Str.substr(RegName.size());
2402     if (!RegSuffix.empty()) {
2403       unsigned Num;
2404       // A single register with an index: rXX
2405       if (getRegNum(RegSuffix, Num))
2406         return true;
2407     } else {
2408       // A range of registers: r[XX:YY].
2409       if (NextToken.is(AsmToken::LBrac))
2410         return true;
2411     }
2412   }
2413 
2414   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2415 }
2416 
2417 bool
2418 AMDGPUAsmParser::isRegister()
2419 {
2420   return isRegister(getToken(), peekToken());
2421 }
2422 
2423 unsigned
2424 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2425                                unsigned RegNum,
2426                                unsigned RegWidth,
2427                                SMLoc Loc) {
2428 
2429   assert(isRegularReg(RegKind));
2430 
2431   unsigned AlignSize = 1;
2432   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2433     // SGPR and TTMP registers must be aligned.
2434     // Max required alignment is 4 dwords.
2435     AlignSize = std::min(RegWidth, 4u);
2436   }
2437 
2438   if (RegNum % AlignSize != 0) {
2439     Error(Loc, "invalid register alignment");
2440     return AMDGPU::NoRegister;
2441   }
2442 
2443   unsigned RegIdx = RegNum / AlignSize;
2444   int RCID = getRegClass(RegKind, RegWidth);
2445   if (RCID == -1) {
2446     Error(Loc, "invalid or unsupported register size");
2447     return AMDGPU::NoRegister;
2448   }
2449 
2450   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2451   const MCRegisterClass RC = TRI->getRegClass(RCID);
2452   if (RegIdx >= RC.getNumRegs()) {
2453     Error(Loc, "register index is out of range");
2454     return AMDGPU::NoRegister;
2455   }
2456 
2457   return RC.getRegister(RegIdx);
2458 }
2459 
2460 bool
2461 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2462   int64_t RegLo, RegHi;
2463   if (!skipToken(AsmToken::LBrac, "missing register index"))
2464     return false;
2465 
2466   SMLoc FirstIdxLoc = getLoc();
2467   SMLoc SecondIdxLoc;
2468 
2469   if (!parseExpr(RegLo))
2470     return false;
2471 
2472   if (trySkipToken(AsmToken::Colon)) {
2473     SecondIdxLoc = getLoc();
2474     if (!parseExpr(RegHi))
2475       return false;
2476   } else {
2477     RegHi = RegLo;
2478   }
2479 
2480   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2481     return false;
2482 
2483   if (!isUInt<32>(RegLo)) {
2484     Error(FirstIdxLoc, "invalid register index");
2485     return false;
2486   }
2487 
2488   if (!isUInt<32>(RegHi)) {
2489     Error(SecondIdxLoc, "invalid register index");
2490     return false;
2491   }
2492 
2493   if (RegLo > RegHi) {
2494     Error(FirstIdxLoc, "first register index should not exceed second index");
2495     return false;
2496   }
2497 
2498   Num = static_cast<unsigned>(RegLo);
2499   Width = (RegHi - RegLo) + 1;
2500   return true;
2501 }
2502 
2503 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2504                                           unsigned &RegNum, unsigned &RegWidth,
2505                                           SmallVectorImpl<AsmToken> &Tokens) {
2506   assert(isToken(AsmToken::Identifier));
2507   unsigned Reg = getSpecialRegForName(getTokenStr());
2508   if (Reg) {
2509     RegNum = 0;
2510     RegWidth = 1;
2511     RegKind = IS_SPECIAL;
2512     Tokens.push_back(getToken());
2513     lex(); // skip register name
2514   }
2515   return Reg;
2516 }
2517 
2518 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2519                                           unsigned &RegNum, unsigned &RegWidth,
2520                                           SmallVectorImpl<AsmToken> &Tokens) {
2521   assert(isToken(AsmToken::Identifier));
2522   StringRef RegName = getTokenStr();
2523   auto Loc = getLoc();
2524 
2525   const RegInfo *RI = getRegularRegInfo(RegName);
2526   if (!RI) {
2527     Error(Loc, "invalid register name");
2528     return AMDGPU::NoRegister;
2529   }
2530 
2531   Tokens.push_back(getToken());
2532   lex(); // skip register name
2533 
2534   RegKind = RI->Kind;
2535   StringRef RegSuffix = RegName.substr(RI->Name.size());
2536   if (!RegSuffix.empty()) {
2537     // Single 32-bit register: vXX.
2538     if (!getRegNum(RegSuffix, RegNum)) {
2539       Error(Loc, "invalid register index");
2540       return AMDGPU::NoRegister;
2541     }
2542     RegWidth = 1;
2543   } else {
2544     // Range of registers: v[XX:YY]. ":YY" is optional.
2545     if (!ParseRegRange(RegNum, RegWidth))
2546       return AMDGPU::NoRegister;
2547   }
2548 
2549   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2550 }
2551 
2552 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2553                                        unsigned &RegWidth,
2554                                        SmallVectorImpl<AsmToken> &Tokens) {
2555   unsigned Reg = AMDGPU::NoRegister;
2556   auto ListLoc = getLoc();
2557 
2558   if (!skipToken(AsmToken::LBrac,
2559                  "expected a register or a list of registers")) {
2560     return AMDGPU::NoRegister;
2561   }
2562 
2563   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2564 
2565   auto Loc = getLoc();
2566   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2567     return AMDGPU::NoRegister;
2568   if (RegWidth != 1) {
2569     Error(Loc, "expected a single 32-bit register");
2570     return AMDGPU::NoRegister;
2571   }
2572 
2573   for (; trySkipToken(AsmToken::Comma); ) {
2574     RegisterKind NextRegKind;
2575     unsigned NextReg, NextRegNum, NextRegWidth;
2576     Loc = getLoc();
2577 
2578     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2579                              NextRegNum, NextRegWidth,
2580                              Tokens)) {
2581       return AMDGPU::NoRegister;
2582     }
2583     if (NextRegWidth != 1) {
2584       Error(Loc, "expected a single 32-bit register");
2585       return AMDGPU::NoRegister;
2586     }
2587     if (NextRegKind != RegKind) {
2588       Error(Loc, "registers in a list must be of the same kind");
2589       return AMDGPU::NoRegister;
2590     }
2591     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2592       return AMDGPU::NoRegister;
2593   }
2594 
2595   if (!skipToken(AsmToken::RBrac,
2596                  "expected a comma or a closing square bracket")) {
2597     return AMDGPU::NoRegister;
2598   }
2599 
2600   if (isRegularReg(RegKind))
2601     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2602 
2603   return Reg;
2604 }
2605 
2606 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2607                                           unsigned &RegNum, unsigned &RegWidth,
2608                                           SmallVectorImpl<AsmToken> &Tokens) {
2609   auto Loc = getLoc();
2610   Reg = AMDGPU::NoRegister;
2611 
2612   if (isToken(AsmToken::Identifier)) {
2613     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2614     if (Reg == AMDGPU::NoRegister)
2615       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2616   } else {
2617     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2618   }
2619 
2620   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2621   if (Reg == AMDGPU::NoRegister) {
2622     assert(Parser.hasPendingError());
2623     return false;
2624   }
2625 
2626   if (!subtargetHasRegister(*TRI, Reg)) {
2627     if (Reg == AMDGPU::SGPR_NULL) {
2628       Error(Loc, "'null' operand is not supported on this GPU");
2629     } else {
2630       Error(Loc, "register not available on this GPU");
2631     }
2632     return false;
2633   }
2634 
2635   return true;
2636 }
2637 
2638 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2639                                           unsigned &RegNum, unsigned &RegWidth,
2640                                           bool RestoreOnFailure /*=false*/) {
2641   Reg = AMDGPU::NoRegister;
2642 
2643   SmallVector<AsmToken, 1> Tokens;
2644   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2645     if (RestoreOnFailure) {
2646       while (!Tokens.empty()) {
2647         getLexer().UnLex(Tokens.pop_back_val());
2648       }
2649     }
2650     return true;
2651   }
2652   return false;
2653 }
2654 
2655 Optional<StringRef>
2656 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2657   switch (RegKind) {
2658   case IS_VGPR:
2659     return StringRef(".amdgcn.next_free_vgpr");
2660   case IS_SGPR:
2661     return StringRef(".amdgcn.next_free_sgpr");
2662   default:
2663     return None;
2664   }
2665 }
2666 
2667 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2668   auto SymbolName = getGprCountSymbolName(RegKind);
2669   assert(SymbolName && "initializing invalid register kind");
2670   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2671   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2672 }
2673 
2674 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2675                                             unsigned DwordRegIndex,
2676                                             unsigned RegWidth) {
2677   // Symbols are only defined for GCN targets
2678   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2679     return true;
2680 
2681   auto SymbolName = getGprCountSymbolName(RegKind);
2682   if (!SymbolName)
2683     return true;
2684   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2685 
2686   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2687   int64_t OldCount;
2688 
2689   if (!Sym->isVariable())
2690     return !Error(getLoc(),
2691                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2692   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2693     return !Error(
2694         getLoc(),
2695         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2696 
2697   if (OldCount <= NewMax)
2698     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2699 
2700   return true;
2701 }
2702 
2703 std::unique_ptr<AMDGPUOperand>
2704 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2705   const auto &Tok = getToken();
2706   SMLoc StartLoc = Tok.getLoc();
2707   SMLoc EndLoc = Tok.getEndLoc();
2708   RegisterKind RegKind;
2709   unsigned Reg, RegNum, RegWidth;
2710 
2711   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2712     return nullptr;
2713   }
2714   if (isHsaAbiVersion3(&getSTI())) {
2715     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2716       return nullptr;
2717   } else
2718     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2719   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2720 }
2721 
2722 OperandMatchResultTy
2723 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2724   // TODO: add syntactic sugar for 1/(2*PI)
2725 
2726   assert(!isRegister());
2727   assert(!isModifier());
2728 
2729   const auto& Tok = getToken();
2730   const auto& NextTok = peekToken();
2731   bool IsReal = Tok.is(AsmToken::Real);
2732   SMLoc S = getLoc();
2733   bool Negate = false;
2734 
2735   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2736     lex();
2737     IsReal = true;
2738     Negate = true;
2739   }
2740 
2741   if (IsReal) {
2742     // Floating-point expressions are not supported.
2743     // Can only allow floating-point literals with an
2744     // optional sign.
2745 
2746     StringRef Num = getTokenStr();
2747     lex();
2748 
2749     APFloat RealVal(APFloat::IEEEdouble());
2750     auto roundMode = APFloat::rmNearestTiesToEven;
2751     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2752       return MatchOperand_ParseFail;
2753     }
2754     if (Negate)
2755       RealVal.changeSign();
2756 
2757     Operands.push_back(
2758       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2759                                AMDGPUOperand::ImmTyNone, true));
2760 
2761     return MatchOperand_Success;
2762 
2763   } else {
2764     int64_t IntVal;
2765     const MCExpr *Expr;
2766     SMLoc S = getLoc();
2767 
2768     if (HasSP3AbsModifier) {
2769       // This is a workaround for handling expressions
2770       // as arguments of SP3 'abs' modifier, for example:
2771       //     |1.0|
2772       //     |-1|
2773       //     |1+x|
2774       // This syntax is not compatible with syntax of standard
2775       // MC expressions (due to the trailing '|').
2776       SMLoc EndLoc;
2777       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2778         return MatchOperand_ParseFail;
2779     } else {
2780       if (Parser.parseExpression(Expr))
2781         return MatchOperand_ParseFail;
2782     }
2783 
2784     if (Expr->evaluateAsAbsolute(IntVal)) {
2785       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2786     } else {
2787       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2788     }
2789 
2790     return MatchOperand_Success;
2791   }
2792 
2793   return MatchOperand_NoMatch;
2794 }
2795 
2796 OperandMatchResultTy
2797 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2798   if (!isRegister())
2799     return MatchOperand_NoMatch;
2800 
2801   if (auto R = parseRegister()) {
2802     assert(R->isReg());
2803     Operands.push_back(std::move(R));
2804     return MatchOperand_Success;
2805   }
2806   return MatchOperand_ParseFail;
2807 }
2808 
2809 OperandMatchResultTy
2810 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2811   auto res = parseReg(Operands);
2812   if (res != MatchOperand_NoMatch) {
2813     return res;
2814   } else if (isModifier()) {
2815     return MatchOperand_NoMatch;
2816   } else {
2817     return parseImm(Operands, HasSP3AbsMod);
2818   }
2819 }
2820 
2821 bool
2822 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2823   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2824     const auto &str = Token.getString();
2825     return str == "abs" || str == "neg" || str == "sext";
2826   }
2827   return false;
2828 }
2829 
2830 bool
2831 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2832   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2833 }
2834 
2835 bool
2836 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2837   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2838 }
2839 
2840 bool
2841 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2842   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2843 }
2844 
2845 // Check if this is an operand modifier or an opcode modifier
2846 // which may look like an expression but it is not. We should
2847 // avoid parsing these modifiers as expressions. Currently
2848 // recognized sequences are:
2849 //   |...|
2850 //   abs(...)
2851 //   neg(...)
2852 //   sext(...)
2853 //   -reg
2854 //   -|...|
2855 //   -abs(...)
2856 //   name:...
2857 // Note that simple opcode modifiers like 'gds' may be parsed as
2858 // expressions; this is a special case. See getExpressionAsToken.
2859 //
2860 bool
2861 AMDGPUAsmParser::isModifier() {
2862 
2863   AsmToken Tok = getToken();
2864   AsmToken NextToken[2];
2865   peekTokens(NextToken);
2866 
2867   return isOperandModifier(Tok, NextToken[0]) ||
2868          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2869          isOpcodeModifierWithVal(Tok, NextToken[0]);
2870 }
2871 
2872 // Check if the current token is an SP3 'neg' modifier.
2873 // Currently this modifier is allowed in the following context:
2874 //
2875 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2876 // 2. Before an 'abs' modifier: -abs(...)
2877 // 3. Before an SP3 'abs' modifier: -|...|
2878 //
2879 // In all other cases "-" is handled as a part
2880 // of an expression that follows the sign.
2881 //
2882 // Note: When "-" is followed by an integer literal,
2883 // this is interpreted as integer negation rather
2884 // than a floating-point NEG modifier applied to N.
2885 // Beside being contr-intuitive, such use of floating-point
2886 // NEG modifier would have resulted in different meaning
2887 // of integer literals used with VOP1/2/C and VOP3,
2888 // for example:
2889 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2890 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2891 // Negative fp literals with preceding "-" are
2892 // handled likewise for unifomtity
2893 //
2894 bool
2895 AMDGPUAsmParser::parseSP3NegModifier() {
2896 
2897   AsmToken NextToken[2];
2898   peekTokens(NextToken);
2899 
2900   if (isToken(AsmToken::Minus) &&
2901       (isRegister(NextToken[0], NextToken[1]) ||
2902        NextToken[0].is(AsmToken::Pipe) ||
2903        isId(NextToken[0], "abs"))) {
2904     lex();
2905     return true;
2906   }
2907 
2908   return false;
2909 }
2910 
2911 OperandMatchResultTy
2912 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2913                                               bool AllowImm) {
2914   bool Neg, SP3Neg;
2915   bool Abs, SP3Abs;
2916   SMLoc Loc;
2917 
2918   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2919   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2920     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2921     return MatchOperand_ParseFail;
2922   }
2923 
2924   SP3Neg = parseSP3NegModifier();
2925 
2926   Loc = getLoc();
2927   Neg = trySkipId("neg");
2928   if (Neg && SP3Neg) {
2929     Error(Loc, "expected register or immediate");
2930     return MatchOperand_ParseFail;
2931   }
2932   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2933     return MatchOperand_ParseFail;
2934 
2935   Abs = trySkipId("abs");
2936   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2937     return MatchOperand_ParseFail;
2938 
2939   Loc = getLoc();
2940   SP3Abs = trySkipToken(AsmToken::Pipe);
2941   if (Abs && SP3Abs) {
2942     Error(Loc, "expected register or immediate");
2943     return MatchOperand_ParseFail;
2944   }
2945 
2946   OperandMatchResultTy Res;
2947   if (AllowImm) {
2948     Res = parseRegOrImm(Operands, SP3Abs);
2949   } else {
2950     Res = parseReg(Operands);
2951   }
2952   if (Res != MatchOperand_Success) {
2953     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2954   }
2955 
2956   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2957     return MatchOperand_ParseFail;
2958   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2959     return MatchOperand_ParseFail;
2960   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2961     return MatchOperand_ParseFail;
2962 
2963   AMDGPUOperand::Modifiers Mods;
2964   Mods.Abs = Abs || SP3Abs;
2965   Mods.Neg = Neg || SP3Neg;
2966 
2967   if (Mods.hasFPModifiers()) {
2968     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2969     if (Op.isExpr()) {
2970       Error(Op.getStartLoc(), "expected an absolute expression");
2971       return MatchOperand_ParseFail;
2972     }
2973     Op.setModifiers(Mods);
2974   }
2975   return MatchOperand_Success;
2976 }
2977 
2978 OperandMatchResultTy
2979 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2980                                                bool AllowImm) {
2981   bool Sext = trySkipId("sext");
2982   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2983     return MatchOperand_ParseFail;
2984 
2985   OperandMatchResultTy Res;
2986   if (AllowImm) {
2987     Res = parseRegOrImm(Operands);
2988   } else {
2989     Res = parseReg(Operands);
2990   }
2991   if (Res != MatchOperand_Success) {
2992     return Sext? MatchOperand_ParseFail : Res;
2993   }
2994 
2995   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2996     return MatchOperand_ParseFail;
2997 
2998   AMDGPUOperand::Modifiers Mods;
2999   Mods.Sext = Sext;
3000 
3001   if (Mods.hasIntModifiers()) {
3002     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3003     if (Op.isExpr()) {
3004       Error(Op.getStartLoc(), "expected an absolute expression");
3005       return MatchOperand_ParseFail;
3006     }
3007     Op.setModifiers(Mods);
3008   }
3009 
3010   return MatchOperand_Success;
3011 }
3012 
3013 OperandMatchResultTy
3014 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3015   return parseRegOrImmWithFPInputMods(Operands, false);
3016 }
3017 
3018 OperandMatchResultTy
3019 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3020   return parseRegOrImmWithIntInputMods(Operands, false);
3021 }
3022 
3023 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3024   auto Loc = getLoc();
3025   if (trySkipId("off")) {
3026     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3027                                                 AMDGPUOperand::ImmTyOff, false));
3028     return MatchOperand_Success;
3029   }
3030 
3031   if (!isRegister())
3032     return MatchOperand_NoMatch;
3033 
3034   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3035   if (Reg) {
3036     Operands.push_back(std::move(Reg));
3037     return MatchOperand_Success;
3038   }
3039 
3040   return MatchOperand_ParseFail;
3041 
3042 }
3043 
3044 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3045   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3046 
3047   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3048       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3049       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3050       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3051     return Match_InvalidOperand;
3052 
3053   if ((TSFlags & SIInstrFlags::VOP3) &&
3054       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3055       getForcedEncodingSize() != 64)
3056     return Match_PreferE32;
3057 
3058   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3059       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3060     // v_mac_f32/16 allow only dst_sel == DWORD;
3061     auto OpNum =
3062         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3063     const auto &Op = Inst.getOperand(OpNum);
3064     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3065       return Match_InvalidOperand;
3066     }
3067   }
3068 
3069   return Match_Success;
3070 }
3071 
3072 static ArrayRef<unsigned> getAllVariants() {
3073   static const unsigned Variants[] = {
3074     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3075     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3076   };
3077 
3078   return makeArrayRef(Variants);
3079 }
3080 
3081 // What asm variants we should check
3082 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3083   if (getForcedEncodingSize() == 32) {
3084     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3085     return makeArrayRef(Variants);
3086   }
3087 
3088   if (isForcedVOP3()) {
3089     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3090     return makeArrayRef(Variants);
3091   }
3092 
3093   if (isForcedSDWA()) {
3094     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3095                                         AMDGPUAsmVariants::SDWA9};
3096     return makeArrayRef(Variants);
3097   }
3098 
3099   if (isForcedDPP()) {
3100     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3101     return makeArrayRef(Variants);
3102   }
3103 
3104   return getAllVariants();
3105 }
3106 
3107 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3108   if (getForcedEncodingSize() == 32)
3109     return "e32";
3110 
3111   if (isForcedVOP3())
3112     return "e64";
3113 
3114   if (isForcedSDWA())
3115     return "sdwa";
3116 
3117   if (isForcedDPP())
3118     return "dpp";
3119 
3120   return "";
3121 }
3122 
3123 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3124   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3125   const unsigned Num = Desc.getNumImplicitUses();
3126   for (unsigned i = 0; i < Num; ++i) {
3127     unsigned Reg = Desc.ImplicitUses[i];
3128     switch (Reg) {
3129     case AMDGPU::FLAT_SCR:
3130     case AMDGPU::VCC:
3131     case AMDGPU::VCC_LO:
3132     case AMDGPU::VCC_HI:
3133     case AMDGPU::M0:
3134       return Reg;
3135     default:
3136       break;
3137     }
3138   }
3139   return AMDGPU::NoRegister;
3140 }
3141 
3142 // NB: This code is correct only when used to check constant
3143 // bus limitations because GFX7 support no f16 inline constants.
3144 // Note that there are no cases when a GFX7 opcode violates
3145 // constant bus limitations due to the use of an f16 constant.
3146 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3147                                        unsigned OpIdx) const {
3148   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3149 
3150   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3151     return false;
3152   }
3153 
3154   const MCOperand &MO = Inst.getOperand(OpIdx);
3155 
3156   int64_t Val = MO.getImm();
3157   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3158 
3159   switch (OpSize) { // expected operand size
3160   case 8:
3161     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3162   case 4:
3163     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3164   case 2: {
3165     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3166     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3167         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3168         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3169       return AMDGPU::isInlinableIntLiteral(Val);
3170 
3171     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3172         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3173         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3174       return AMDGPU::isInlinableIntLiteralV216(Val);
3175 
3176     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3177         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3178         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3179       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3180 
3181     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3182   }
3183   default:
3184     llvm_unreachable("invalid operand size");
3185   }
3186 }
3187 
3188 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3189   if (!isGFX10Plus())
3190     return 1;
3191 
3192   switch (Opcode) {
3193   // 64-bit shift instructions can use only one scalar value input
3194   case AMDGPU::V_LSHLREV_B64_e64:
3195   case AMDGPU::V_LSHLREV_B64_gfx10:
3196   case AMDGPU::V_LSHRREV_B64_e64:
3197   case AMDGPU::V_LSHRREV_B64_gfx10:
3198   case AMDGPU::V_ASHRREV_I64_e64:
3199   case AMDGPU::V_ASHRREV_I64_gfx10:
3200   case AMDGPU::V_LSHL_B64_e64:
3201   case AMDGPU::V_LSHR_B64_e64:
3202   case AMDGPU::V_ASHR_I64_e64:
3203     return 1;
3204   default:
3205     return 2;
3206   }
3207 }
3208 
3209 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3210   const MCOperand &MO = Inst.getOperand(OpIdx);
3211   if (MO.isImm()) {
3212     return !isInlineConstant(Inst, OpIdx);
3213   } else if (MO.isReg()) {
3214     auto Reg = MO.getReg();
3215     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3216     auto PReg = mc2PseudoReg(Reg);
3217     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3218   } else {
3219     return true;
3220   }
3221 }
3222 
3223 bool
3224 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3225                                                 const OperandVector &Operands) {
3226   const unsigned Opcode = Inst.getOpcode();
3227   const MCInstrDesc &Desc = MII.get(Opcode);
3228   unsigned LastSGPR = AMDGPU::NoRegister;
3229   unsigned ConstantBusUseCount = 0;
3230   unsigned NumLiterals = 0;
3231   unsigned LiteralSize;
3232 
3233   if (Desc.TSFlags &
3234       (SIInstrFlags::VOPC |
3235        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3236        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3237        SIInstrFlags::SDWA)) {
3238     // Check special imm operands (used by madmk, etc)
3239     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3240       ++ConstantBusUseCount;
3241     }
3242 
3243     SmallDenseSet<unsigned> SGPRsUsed;
3244     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3245     if (SGPRUsed != AMDGPU::NoRegister) {
3246       SGPRsUsed.insert(SGPRUsed);
3247       ++ConstantBusUseCount;
3248     }
3249 
3250     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3251     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3252     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3253 
3254     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3255 
3256     for (int OpIdx : OpIndices) {
3257       if (OpIdx == -1) break;
3258 
3259       const MCOperand &MO = Inst.getOperand(OpIdx);
3260       if (usesConstantBus(Inst, OpIdx)) {
3261         if (MO.isReg()) {
3262           LastSGPR = mc2PseudoReg(MO.getReg());
3263           // Pairs of registers with a partial intersections like these
3264           //   s0, s[0:1]
3265           //   flat_scratch_lo, flat_scratch
3266           //   flat_scratch_lo, flat_scratch_hi
3267           // are theoretically valid but they are disabled anyway.
3268           // Note that this code mimics SIInstrInfo::verifyInstruction
3269           if (!SGPRsUsed.count(LastSGPR)) {
3270             SGPRsUsed.insert(LastSGPR);
3271             ++ConstantBusUseCount;
3272           }
3273         } else { // Expression or a literal
3274 
3275           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3276             continue; // special operand like VINTERP attr_chan
3277 
3278           // An instruction may use only one literal.
3279           // This has been validated on the previous step.
3280           // See validateVOP3Literal.
3281           // This literal may be used as more than one operand.
3282           // If all these operands are of the same size,
3283           // this literal counts as one scalar value.
3284           // Otherwise it counts as 2 scalar values.
3285           // See "GFX10 Shader Programming", section 3.6.2.3.
3286 
3287           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3288           if (Size < 4) Size = 4;
3289 
3290           if (NumLiterals == 0) {
3291             NumLiterals = 1;
3292             LiteralSize = Size;
3293           } else if (LiteralSize != Size) {
3294             NumLiterals = 2;
3295           }
3296         }
3297       }
3298     }
3299   }
3300   ConstantBusUseCount += NumLiterals;
3301 
3302   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3303     return true;
3304 
3305   SMLoc LitLoc = getLitLoc(Operands);
3306   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3307   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3308   Error(Loc, "invalid operand (violates constant bus restrictions)");
3309   return false;
3310 }
3311 
3312 bool
3313 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3314                                                  const OperandVector &Operands) {
3315   const unsigned Opcode = Inst.getOpcode();
3316   const MCInstrDesc &Desc = MII.get(Opcode);
3317 
3318   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3319   if (DstIdx == -1 ||
3320       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3321     return true;
3322   }
3323 
3324   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3325 
3326   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3327   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3328   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3329 
3330   assert(DstIdx != -1);
3331   const MCOperand &Dst = Inst.getOperand(DstIdx);
3332   assert(Dst.isReg());
3333   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3334 
3335   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3336 
3337   for (int SrcIdx : SrcIndices) {
3338     if (SrcIdx == -1) break;
3339     const MCOperand &Src = Inst.getOperand(SrcIdx);
3340     if (Src.isReg()) {
3341       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3342       if (isRegIntersect(DstReg, SrcReg, TRI)) {
3343         Error(getRegLoc(SrcReg, Operands),
3344           "destination must be different than all sources");
3345         return false;
3346       }
3347     }
3348   }
3349 
3350   return true;
3351 }
3352 
3353 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3354 
3355   const unsigned Opc = Inst.getOpcode();
3356   const MCInstrDesc &Desc = MII.get(Opc);
3357 
3358   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3359     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3360     assert(ClampIdx != -1);
3361     return Inst.getOperand(ClampIdx).getImm() == 0;
3362   }
3363 
3364   return true;
3365 }
3366 
3367 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3368 
3369   const unsigned Opc = Inst.getOpcode();
3370   const MCInstrDesc &Desc = MII.get(Opc);
3371 
3372   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3373     return true;
3374 
3375   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3376   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3377   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3378 
3379   assert(VDataIdx != -1);
3380 
3381   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3382     return true;
3383 
3384   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3385   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3386   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3387   if (DMask == 0)
3388     DMask = 1;
3389 
3390   unsigned DataSize =
3391     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3392   if (hasPackedD16()) {
3393     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3394     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3395       DataSize = (DataSize + 1) / 2;
3396   }
3397 
3398   return (VDataSize / 4) == DataSize + TFESize;
3399 }
3400 
3401 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3402   const unsigned Opc = Inst.getOpcode();
3403   const MCInstrDesc &Desc = MII.get(Opc);
3404 
3405   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3406     return true;
3407 
3408   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3409 
3410   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3411       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3412   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3413   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3414   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3415 
3416   assert(VAddr0Idx != -1);
3417   assert(SrsrcIdx != -1);
3418   assert(SrsrcIdx > VAddr0Idx);
3419 
3420   if (DimIdx == -1)
3421     return true; // intersect_ray
3422 
3423   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3424   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3425   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3426   unsigned VAddrSize =
3427       IsNSA ? SrsrcIdx - VAddr0Idx
3428             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3429 
3430   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3431                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3432                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3433                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3434   if (!IsNSA) {
3435     if (AddrSize > 8)
3436       AddrSize = 16;
3437     else if (AddrSize > 4)
3438       AddrSize = 8;
3439   }
3440 
3441   return VAddrSize == AddrSize;
3442 }
3443 
3444 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3445 
3446   const unsigned Opc = Inst.getOpcode();
3447   const MCInstrDesc &Desc = MII.get(Opc);
3448 
3449   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3450     return true;
3451   if (!Desc.mayLoad() || !Desc.mayStore())
3452     return true; // Not atomic
3453 
3454   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3455   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3456 
3457   // This is an incomplete check because image_atomic_cmpswap
3458   // may only use 0x3 and 0xf while other atomic operations
3459   // may use 0x1 and 0x3. However these limitations are
3460   // verified when we check that dmask matches dst size.
3461   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3462 }
3463 
3464 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3465 
3466   const unsigned Opc = Inst.getOpcode();
3467   const MCInstrDesc &Desc = MII.get(Opc);
3468 
3469   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3470     return true;
3471 
3472   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3473   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3474 
3475   // GATHER4 instructions use dmask in a different fashion compared to
3476   // other MIMG instructions. The only useful DMASK values are
3477   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3478   // (red,red,red,red) etc.) The ISA document doesn't mention
3479   // this.
3480   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3481 }
3482 
3483 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3484   const unsigned Opc = Inst.getOpcode();
3485   const MCInstrDesc &Desc = MII.get(Opc);
3486 
3487   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3488     return true;
3489 
3490   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3491   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3492       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3493 
3494   if (!BaseOpcode->MSAA)
3495     return true;
3496 
3497   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3498   assert(DimIdx != -1);
3499 
3500   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3501   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3502 
3503   return DimInfo->MSAA;
3504 }
3505 
3506 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3507 {
3508   switch (Opcode) {
3509   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3510   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3511   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3512     return true;
3513   default:
3514     return false;
3515   }
3516 }
3517 
3518 // movrels* opcodes should only allow VGPRS as src0.
3519 // This is specified in .td description for vop1/vop3,
3520 // but sdwa is handled differently. See isSDWAOperand.
3521 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3522                                       const OperandVector &Operands) {
3523 
3524   const unsigned Opc = Inst.getOpcode();
3525   const MCInstrDesc &Desc = MII.get(Opc);
3526 
3527   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3528     return true;
3529 
3530   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3531   assert(Src0Idx != -1);
3532 
3533   SMLoc ErrLoc;
3534   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3535   if (Src0.isReg()) {
3536     auto Reg = mc2PseudoReg(Src0.getReg());
3537     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3538     if (!isSGPR(Reg, TRI))
3539       return true;
3540     ErrLoc = getRegLoc(Reg, Operands);
3541   } else {
3542     ErrLoc = getConstLoc(Operands);
3543   }
3544 
3545   Error(ErrLoc, "source operand must be a VGPR");
3546   return false;
3547 }
3548 
3549 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3550                                           const OperandVector &Operands) {
3551 
3552   const unsigned Opc = Inst.getOpcode();
3553 
3554   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3555     return true;
3556 
3557   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3558   assert(Src0Idx != -1);
3559 
3560   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3561   if (!Src0.isReg())
3562     return true;
3563 
3564   auto Reg = mc2PseudoReg(Src0.getReg());
3565   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3566   if (isSGPR(Reg, TRI)) {
3567     Error(getRegLoc(Reg, Operands),
3568           "source operand must be either a VGPR or an inline constant");
3569     return false;
3570   }
3571 
3572   return true;
3573 }
3574 
3575 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3576   switch (Inst.getOpcode()) {
3577   default:
3578     return true;
3579   case V_DIV_SCALE_F32_gfx6_gfx7:
3580   case V_DIV_SCALE_F32_vi:
3581   case V_DIV_SCALE_F32_gfx10:
3582   case V_DIV_SCALE_F64_gfx6_gfx7:
3583   case V_DIV_SCALE_F64_vi:
3584   case V_DIV_SCALE_F64_gfx10:
3585     break;
3586   }
3587 
3588   // TODO: Check that src0 = src1 or src2.
3589 
3590   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3591                     AMDGPU::OpName::src2_modifiers,
3592                     AMDGPU::OpName::src2_modifiers}) {
3593     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3594             .getImm() &
3595         SISrcMods::ABS) {
3596       return false;
3597     }
3598   }
3599 
3600   return true;
3601 }
3602 
3603 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3604 
3605   const unsigned Opc = Inst.getOpcode();
3606   const MCInstrDesc &Desc = MII.get(Opc);
3607 
3608   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3609     return true;
3610 
3611   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3612   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3613     if (isCI() || isSI())
3614       return false;
3615   }
3616 
3617   return true;
3618 }
3619 
3620 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3621   const unsigned Opc = Inst.getOpcode();
3622   const MCInstrDesc &Desc = MII.get(Opc);
3623 
3624   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3625     return true;
3626 
3627   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3628   if (DimIdx < 0)
3629     return true;
3630 
3631   long Imm = Inst.getOperand(DimIdx).getImm();
3632   if (Imm < 0 || Imm >= 8)
3633     return false;
3634 
3635   return true;
3636 }
3637 
3638 static bool IsRevOpcode(const unsigned Opcode)
3639 {
3640   switch (Opcode) {
3641   case AMDGPU::V_SUBREV_F32_e32:
3642   case AMDGPU::V_SUBREV_F32_e64:
3643   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3644   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3645   case AMDGPU::V_SUBREV_F32_e32_vi:
3646   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3647   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3648   case AMDGPU::V_SUBREV_F32_e64_vi:
3649 
3650   case AMDGPU::V_SUBREV_CO_U32_e32:
3651   case AMDGPU::V_SUBREV_CO_U32_e64:
3652   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3653   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3654 
3655   case AMDGPU::V_SUBBREV_U32_e32:
3656   case AMDGPU::V_SUBBREV_U32_e64:
3657   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3658   case AMDGPU::V_SUBBREV_U32_e32_vi:
3659   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3660   case AMDGPU::V_SUBBREV_U32_e64_vi:
3661 
3662   case AMDGPU::V_SUBREV_U32_e32:
3663   case AMDGPU::V_SUBREV_U32_e64:
3664   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3665   case AMDGPU::V_SUBREV_U32_e32_vi:
3666   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3667   case AMDGPU::V_SUBREV_U32_e64_vi:
3668 
3669   case AMDGPU::V_SUBREV_F16_e32:
3670   case AMDGPU::V_SUBREV_F16_e64:
3671   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3672   case AMDGPU::V_SUBREV_F16_e32_vi:
3673   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3674   case AMDGPU::V_SUBREV_F16_e64_vi:
3675 
3676   case AMDGPU::V_SUBREV_U16_e32:
3677   case AMDGPU::V_SUBREV_U16_e64:
3678   case AMDGPU::V_SUBREV_U16_e32_vi:
3679   case AMDGPU::V_SUBREV_U16_e64_vi:
3680 
3681   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3682   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3683   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3684 
3685   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3686   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3687 
3688   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3689   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3690 
3691   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3692   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3693 
3694   case AMDGPU::V_LSHRREV_B32_e32:
3695   case AMDGPU::V_LSHRREV_B32_e64:
3696   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3697   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3698   case AMDGPU::V_LSHRREV_B32_e32_vi:
3699   case AMDGPU::V_LSHRREV_B32_e64_vi:
3700   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3701   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3702 
3703   case AMDGPU::V_ASHRREV_I32_e32:
3704   case AMDGPU::V_ASHRREV_I32_e64:
3705   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3706   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3707   case AMDGPU::V_ASHRREV_I32_e32_vi:
3708   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3709   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3710   case AMDGPU::V_ASHRREV_I32_e64_vi:
3711 
3712   case AMDGPU::V_LSHLREV_B32_e32:
3713   case AMDGPU::V_LSHLREV_B32_e64:
3714   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3715   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3716   case AMDGPU::V_LSHLREV_B32_e32_vi:
3717   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3718   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3719   case AMDGPU::V_LSHLREV_B32_e64_vi:
3720 
3721   case AMDGPU::V_LSHLREV_B16_e32:
3722   case AMDGPU::V_LSHLREV_B16_e64:
3723   case AMDGPU::V_LSHLREV_B16_e32_vi:
3724   case AMDGPU::V_LSHLREV_B16_e64_vi:
3725   case AMDGPU::V_LSHLREV_B16_gfx10:
3726 
3727   case AMDGPU::V_LSHRREV_B16_e32:
3728   case AMDGPU::V_LSHRREV_B16_e64:
3729   case AMDGPU::V_LSHRREV_B16_e32_vi:
3730   case AMDGPU::V_LSHRREV_B16_e64_vi:
3731   case AMDGPU::V_LSHRREV_B16_gfx10:
3732 
3733   case AMDGPU::V_ASHRREV_I16_e32:
3734   case AMDGPU::V_ASHRREV_I16_e64:
3735   case AMDGPU::V_ASHRREV_I16_e32_vi:
3736   case AMDGPU::V_ASHRREV_I16_e64_vi:
3737   case AMDGPU::V_ASHRREV_I16_gfx10:
3738 
3739   case AMDGPU::V_LSHLREV_B64_e64:
3740   case AMDGPU::V_LSHLREV_B64_gfx10:
3741   case AMDGPU::V_LSHLREV_B64_vi:
3742 
3743   case AMDGPU::V_LSHRREV_B64_e64:
3744   case AMDGPU::V_LSHRREV_B64_gfx10:
3745   case AMDGPU::V_LSHRREV_B64_vi:
3746 
3747   case AMDGPU::V_ASHRREV_I64_e64:
3748   case AMDGPU::V_ASHRREV_I64_gfx10:
3749   case AMDGPU::V_ASHRREV_I64_vi:
3750 
3751   case AMDGPU::V_PK_LSHLREV_B16:
3752   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3753   case AMDGPU::V_PK_LSHLREV_B16_vi:
3754 
3755   case AMDGPU::V_PK_LSHRREV_B16:
3756   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3757   case AMDGPU::V_PK_LSHRREV_B16_vi:
3758   case AMDGPU::V_PK_ASHRREV_I16:
3759   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3760   case AMDGPU::V_PK_ASHRREV_I16_vi:
3761     return true;
3762   default:
3763     return false;
3764   }
3765 }
3766 
3767 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3768 
3769   using namespace SIInstrFlags;
3770   const unsigned Opcode = Inst.getOpcode();
3771   const MCInstrDesc &Desc = MII.get(Opcode);
3772 
3773   // lds_direct register is defined so that it can be used
3774   // with 9-bit operands only. Ignore encodings which do not accept these.
3775   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3776   if ((Desc.TSFlags & Enc) == 0)
3777     return None;
3778 
3779   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3780     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3781     if (SrcIdx == -1)
3782       break;
3783     const auto &Src = Inst.getOperand(SrcIdx);
3784     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3785 
3786       if (isGFX90A())
3787         return StringRef("lds_direct is not supported on this GPU");
3788 
3789       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3790         return StringRef("lds_direct cannot be used with this instruction");
3791 
3792       if (SrcName != OpName::src0)
3793         return StringRef("lds_direct may be used as src0 only");
3794     }
3795   }
3796 
3797   return None;
3798 }
3799 
3800 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3801   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3802     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3803     if (Op.isFlatOffset())
3804       return Op.getStartLoc();
3805   }
3806   return getLoc();
3807 }
3808 
3809 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3810                                          const OperandVector &Operands) {
3811   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3812   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3813     return true;
3814 
3815   auto Opcode = Inst.getOpcode();
3816   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3817   assert(OpNum != -1);
3818 
3819   const auto &Op = Inst.getOperand(OpNum);
3820   if (!hasFlatOffsets() && Op.getImm() != 0) {
3821     Error(getFlatOffsetLoc(Operands),
3822           "flat offset modifier is not supported on this GPU");
3823     return false;
3824   }
3825 
3826   // For FLAT segment the offset must be positive;
3827   // MSB is ignored and forced to zero.
3828   if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) {
3829     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3830     if (!isIntN(OffsetSize, Op.getImm())) {
3831       Error(getFlatOffsetLoc(Operands),
3832             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3833       return false;
3834     }
3835   } else {
3836     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3837     if (!isUIntN(OffsetSize, Op.getImm())) {
3838       Error(getFlatOffsetLoc(Operands),
3839             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3840       return false;
3841     }
3842   }
3843 
3844   return true;
3845 }
3846 
3847 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3848   // Start with second operand because SMEM Offset cannot be dst or src0.
3849   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3850     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3851     if (Op.isSMEMOffset())
3852       return Op.getStartLoc();
3853   }
3854   return getLoc();
3855 }
3856 
3857 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3858                                          const OperandVector &Operands) {
3859   if (isCI() || isSI())
3860     return true;
3861 
3862   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3863   if ((TSFlags & SIInstrFlags::SMRD) == 0)
3864     return true;
3865 
3866   auto Opcode = Inst.getOpcode();
3867   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3868   if (OpNum == -1)
3869     return true;
3870 
3871   const auto &Op = Inst.getOperand(OpNum);
3872   if (!Op.isImm())
3873     return true;
3874 
3875   uint64_t Offset = Op.getImm();
3876   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3877   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3878       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3879     return true;
3880 
3881   Error(getSMEMOffsetLoc(Operands),
3882         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3883                                "expected a 21-bit signed offset");
3884 
3885   return false;
3886 }
3887 
3888 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3889   unsigned Opcode = Inst.getOpcode();
3890   const MCInstrDesc &Desc = MII.get(Opcode);
3891   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3892     return true;
3893 
3894   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3895   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3896 
3897   const int OpIndices[] = { Src0Idx, Src1Idx };
3898 
3899   unsigned NumExprs = 0;
3900   unsigned NumLiterals = 0;
3901   uint32_t LiteralValue;
3902 
3903   for (int OpIdx : OpIndices) {
3904     if (OpIdx == -1) break;
3905 
3906     const MCOperand &MO = Inst.getOperand(OpIdx);
3907     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3908     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3909       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3910         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3911         if (NumLiterals == 0 || LiteralValue != Value) {
3912           LiteralValue = Value;
3913           ++NumLiterals;
3914         }
3915       } else if (MO.isExpr()) {
3916         ++NumExprs;
3917       }
3918     }
3919   }
3920 
3921   return NumLiterals + NumExprs <= 1;
3922 }
3923 
3924 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3925   const unsigned Opc = Inst.getOpcode();
3926   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3927       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3928     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3929     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3930 
3931     if (OpSel & ~3)
3932       return false;
3933   }
3934   return true;
3935 }
3936 
3937 // Check if VCC register matches wavefront size
3938 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3939   auto FB = getFeatureBits();
3940   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3941     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3942 }
3943 
3944 // VOP3 literal is only allowed in GFX10+ and only one can be used
3945 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3946                                           const OperandVector &Operands) {
3947   unsigned Opcode = Inst.getOpcode();
3948   const MCInstrDesc &Desc = MII.get(Opcode);
3949   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3950     return true;
3951 
3952   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3953   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3954   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3955 
3956   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3957 
3958   unsigned NumExprs = 0;
3959   unsigned NumLiterals = 0;
3960   uint32_t LiteralValue;
3961 
3962   for (int OpIdx : OpIndices) {
3963     if (OpIdx == -1) break;
3964 
3965     const MCOperand &MO = Inst.getOperand(OpIdx);
3966     if (!MO.isImm() && !MO.isExpr())
3967       continue;
3968     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3969       continue;
3970 
3971     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3972         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
3973       Error(getConstLoc(Operands),
3974             "inline constants are not allowed for this operand");
3975       return false;
3976     }
3977 
3978     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3979       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3980       if (NumLiterals == 0 || LiteralValue != Value) {
3981         LiteralValue = Value;
3982         ++NumLiterals;
3983       }
3984     } else if (MO.isExpr()) {
3985       ++NumExprs;
3986     }
3987   }
3988   NumLiterals += NumExprs;
3989 
3990   if (!NumLiterals)
3991     return true;
3992 
3993   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
3994     Error(getLitLoc(Operands), "literal operands are not supported");
3995     return false;
3996   }
3997 
3998   if (NumLiterals > 1) {
3999     Error(getLitLoc(Operands), "only one literal operand is allowed");
4000     return false;
4001   }
4002 
4003   return true;
4004 }
4005 
4006 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4007 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4008                          const MCRegisterInfo *MRI) {
4009   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4010   if (OpIdx < 0)
4011     return -1;
4012 
4013   const MCOperand &Op = Inst.getOperand(OpIdx);
4014   if (!Op.isReg())
4015     return -1;
4016 
4017   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4018   auto Reg = Sub ? Sub : Op.getReg();
4019   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4020   return AGRP32.contains(Reg) ? 1 : 0;
4021 }
4022 
4023 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4024   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4025   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4026                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4027                   SIInstrFlags::DS)) == 0)
4028     return true;
4029 
4030   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4031                                                       : AMDGPU::OpName::vdata;
4032 
4033   const MCRegisterInfo *MRI = getMRI();
4034   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4035   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4036 
4037   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4038     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4039     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4040       return false;
4041   }
4042 
4043   auto FB = getFeatureBits();
4044   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4045     if (DataAreg < 0 || DstAreg < 0)
4046       return true;
4047     return DstAreg == DataAreg;
4048   }
4049 
4050   return DstAreg < 1 && DataAreg < 1;
4051 }
4052 
4053 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4054   auto FB = getFeatureBits();
4055   if (!FB[AMDGPU::FeatureGFX90AInsts])
4056     return true;
4057 
4058   const MCRegisterInfo *MRI = getMRI();
4059   const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4060   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4061   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4062     const MCOperand &Op = Inst.getOperand(I);
4063     if (!Op.isReg())
4064       continue;
4065 
4066     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4067     if (!Sub)
4068       continue;
4069 
4070     if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4071       return false;
4072     if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4073       return false;
4074   }
4075 
4076   return true;
4077 }
4078 
4079 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4080                                             const OperandVector &Operands,
4081                                             const SMLoc &IDLoc) {
4082   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4083                                            AMDGPU::OpName::cpol);
4084   if (CPolPos == -1)
4085     return true;
4086 
4087   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4088 
4089   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4090   if ((TSFlags & (SIInstrFlags::SMRD)) &&
4091       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4092     Error(IDLoc, "invalid cache policy for SMRD instruction");
4093     return false;
4094   }
4095 
4096   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4097     return true;
4098 
4099   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4100     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4101       Error(IDLoc, "instruction must use glc");
4102       return false;
4103     }
4104   } else {
4105     if (CPol & CPol::GLC) {
4106       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4107       StringRef CStr(S.getPointer());
4108       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4109       Error(S, "instruction must not use glc");
4110       return false;
4111     }
4112   }
4113 
4114   if (isGFX90A() && (CPol & CPol::SCC) && (TSFlags & SIInstrFlags::FPAtomic)) {
4115     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4116     StringRef CStr(S.getPointer());
4117     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4118     Error(S, "instruction must not use scc");
4119     return false;
4120   }
4121 
4122   return true;
4123 }
4124 
4125 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4126                                           const SMLoc &IDLoc,
4127                                           const OperandVector &Operands) {
4128   if (auto ErrMsg = validateLdsDirect(Inst)) {
4129     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4130     return false;
4131   }
4132   if (!validateSOPLiteral(Inst)) {
4133     Error(getLitLoc(Operands),
4134       "only one literal operand is allowed");
4135     return false;
4136   }
4137   if (!validateVOP3Literal(Inst, Operands)) {
4138     return false;
4139   }
4140   if (!validateConstantBusLimitations(Inst, Operands)) {
4141     return false;
4142   }
4143   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4144     return false;
4145   }
4146   if (!validateIntClampSupported(Inst)) {
4147     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4148       "integer clamping is not supported on this GPU");
4149     return false;
4150   }
4151   if (!validateOpSel(Inst)) {
4152     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4153       "invalid op_sel operand");
4154     return false;
4155   }
4156   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4157   if (!validateMIMGD16(Inst)) {
4158     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4159       "d16 modifier is not supported on this GPU");
4160     return false;
4161   }
4162   if (!validateMIMGDim(Inst)) {
4163     Error(IDLoc, "dim modifier is required on this GPU");
4164     return false;
4165   }
4166   if (!validateMIMGMSAA(Inst)) {
4167     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4168           "invalid dim; must be MSAA type");
4169     return false;
4170   }
4171   if (!validateMIMGDataSize(Inst)) {
4172     Error(IDLoc,
4173       "image data size does not match dmask and tfe");
4174     return false;
4175   }
4176   if (!validateMIMGAddrSize(Inst)) {
4177     Error(IDLoc,
4178       "image address size does not match dim and a16");
4179     return false;
4180   }
4181   if (!validateMIMGAtomicDMask(Inst)) {
4182     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4183       "invalid atomic image dmask");
4184     return false;
4185   }
4186   if (!validateMIMGGatherDMask(Inst)) {
4187     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4188       "invalid image_gather dmask: only one bit must be set");
4189     return false;
4190   }
4191   if (!validateMovrels(Inst, Operands)) {
4192     return false;
4193   }
4194   if (!validateFlatOffset(Inst, Operands)) {
4195     return false;
4196   }
4197   if (!validateSMEMOffset(Inst, Operands)) {
4198     return false;
4199   }
4200   if (!validateMAIAccWrite(Inst, Operands)) {
4201     return false;
4202   }
4203   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4204     return false;
4205   }
4206 
4207   if (!validateAGPRLdSt(Inst)) {
4208     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4209     ? "invalid register class: data and dst should be all VGPR or AGPR"
4210     : "invalid register class: agpr loads and stores not supported on this GPU"
4211     );
4212     return false;
4213   }
4214   if (!validateVGPRAlign(Inst)) {
4215     Error(IDLoc,
4216       "invalid register class: vgpr tuples must be 64 bit aligned");
4217     return false;
4218   }
4219 
4220   if (!validateDivScale(Inst)) {
4221     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4222     return false;
4223   }
4224   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4225     return false;
4226   }
4227 
4228   return true;
4229 }
4230 
4231 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4232                                             const FeatureBitset &FBS,
4233                                             unsigned VariantID = 0);
4234 
4235 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4236                                 const FeatureBitset &AvailableFeatures,
4237                                 unsigned VariantID);
4238 
4239 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4240                                        const FeatureBitset &FBS) {
4241   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4242 }
4243 
4244 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4245                                        const FeatureBitset &FBS,
4246                                        ArrayRef<unsigned> Variants) {
4247   for (auto Variant : Variants) {
4248     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4249       return true;
4250   }
4251 
4252   return false;
4253 }
4254 
4255 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4256                                                   const SMLoc &IDLoc) {
4257   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4258 
4259   // Check if requested instruction variant is supported.
4260   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4261     return false;
4262 
4263   // This instruction is not supported.
4264   // Clear any other pending errors because they are no longer relevant.
4265   getParser().clearPendingErrors();
4266 
4267   // Requested instruction variant is not supported.
4268   // Check if any other variants are supported.
4269   StringRef VariantName = getMatchedVariantName();
4270   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4271     return Error(IDLoc,
4272                  Twine(VariantName,
4273                        " variant of this instruction is not supported"));
4274   }
4275 
4276   // Finally check if this instruction is supported on any other GPU.
4277   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4278     return Error(IDLoc, "instruction not supported on this GPU");
4279   }
4280 
4281   // Instruction not supported on any GPU. Probably a typo.
4282   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4283   return Error(IDLoc, "invalid instruction" + Suggestion);
4284 }
4285 
4286 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4287                                               OperandVector &Operands,
4288                                               MCStreamer &Out,
4289                                               uint64_t &ErrorInfo,
4290                                               bool MatchingInlineAsm) {
4291   MCInst Inst;
4292   unsigned Result = Match_Success;
4293   for (auto Variant : getMatchedVariants()) {
4294     uint64_t EI;
4295     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4296                                   Variant);
4297     // We order match statuses from least to most specific. We use most specific
4298     // status as resulting
4299     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4300     if ((R == Match_Success) ||
4301         (R == Match_PreferE32) ||
4302         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4303         (R == Match_InvalidOperand && Result != Match_MissingFeature
4304                                    && Result != Match_PreferE32) ||
4305         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4306                                    && Result != Match_MissingFeature
4307                                    && Result != Match_PreferE32)) {
4308       Result = R;
4309       ErrorInfo = EI;
4310     }
4311     if (R == Match_Success)
4312       break;
4313   }
4314 
4315   if (Result == Match_Success) {
4316     if (!validateInstruction(Inst, IDLoc, Operands)) {
4317       return true;
4318     }
4319     Inst.setLoc(IDLoc);
4320     Out.emitInstruction(Inst, getSTI());
4321     return false;
4322   }
4323 
4324   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4325   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4326     return true;
4327   }
4328 
4329   switch (Result) {
4330   default: break;
4331   case Match_MissingFeature:
4332     // It has been verified that the specified instruction
4333     // mnemonic is valid. A match was found but it requires
4334     // features which are not supported on this GPU.
4335     return Error(IDLoc, "operands are not valid for this GPU or mode");
4336 
4337   case Match_InvalidOperand: {
4338     SMLoc ErrorLoc = IDLoc;
4339     if (ErrorInfo != ~0ULL) {
4340       if (ErrorInfo >= Operands.size()) {
4341         return Error(IDLoc, "too few operands for instruction");
4342       }
4343       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4344       if (ErrorLoc == SMLoc())
4345         ErrorLoc = IDLoc;
4346     }
4347     return Error(ErrorLoc, "invalid operand for instruction");
4348   }
4349 
4350   case Match_PreferE32:
4351     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4352                         "should be encoded as e32");
4353   case Match_MnemonicFail:
4354     llvm_unreachable("Invalid instructions should have been handled already");
4355   }
4356   llvm_unreachable("Implement any new match types added!");
4357 }
4358 
4359 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4360   int64_t Tmp = -1;
4361   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4362     return true;
4363   }
4364   if (getParser().parseAbsoluteExpression(Tmp)) {
4365     return true;
4366   }
4367   Ret = static_cast<uint32_t>(Tmp);
4368   return false;
4369 }
4370 
4371 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4372                                                uint32_t &Minor) {
4373   if (ParseAsAbsoluteExpression(Major))
4374     return TokError("invalid major version");
4375 
4376   if (!trySkipToken(AsmToken::Comma))
4377     return TokError("minor version number required, comma expected");
4378 
4379   if (ParseAsAbsoluteExpression(Minor))
4380     return TokError("invalid minor version");
4381 
4382   return false;
4383 }
4384 
4385 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4386   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4387     return TokError("directive only supported for amdgcn architecture");
4388 
4389   std::string Target;
4390 
4391   SMLoc TargetStart = getLoc();
4392   if (getParser().parseEscapedString(Target))
4393     return true;
4394   SMRange TargetRange = SMRange(TargetStart, getLoc());
4395 
4396   std::string ExpectedTarget;
4397   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
4398   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
4399 
4400   if (Target != ExpectedTargetOS.str())
4401     return Error(TargetRange.Start, "target must match options", TargetRange);
4402 
4403   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
4404   return false;
4405 }
4406 
4407 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4408   return Error(Range.Start, "value out of range", Range);
4409 }
4410 
4411 bool AMDGPUAsmParser::calculateGPRBlocks(
4412     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4413     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4414     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4415     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4416   // TODO(scott.linder): These calculations are duplicated from
4417   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4418   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4419 
4420   unsigned NumVGPRs = NextFreeVGPR;
4421   unsigned NumSGPRs = NextFreeSGPR;
4422 
4423   if (Version.Major >= 10)
4424     NumSGPRs = 0;
4425   else {
4426     unsigned MaxAddressableNumSGPRs =
4427         IsaInfo::getAddressableNumSGPRs(&getSTI());
4428 
4429     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4430         NumSGPRs > MaxAddressableNumSGPRs)
4431       return OutOfRangeError(SGPRRange);
4432 
4433     NumSGPRs +=
4434         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4435 
4436     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4437         NumSGPRs > MaxAddressableNumSGPRs)
4438       return OutOfRangeError(SGPRRange);
4439 
4440     if (Features.test(FeatureSGPRInitBug))
4441       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4442   }
4443 
4444   VGPRBlocks =
4445       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4446   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4447 
4448   return false;
4449 }
4450 
4451 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4452   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4453     return TokError("directive only supported for amdgcn architecture");
4454 
4455   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4456     return TokError("directive only supported for amdhsa OS");
4457 
4458   StringRef KernelName;
4459   if (getParser().parseIdentifier(KernelName))
4460     return true;
4461 
4462   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4463 
4464   StringSet<> Seen;
4465 
4466   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4467 
4468   SMRange VGPRRange;
4469   uint64_t NextFreeVGPR = 0;
4470   uint64_t AccumOffset = 0;
4471   SMRange SGPRRange;
4472   uint64_t NextFreeSGPR = 0;
4473   unsigned UserSGPRCount = 0;
4474   bool ReserveVCC = true;
4475   bool ReserveFlatScr = true;
4476   bool ReserveXNACK = hasXNACK();
4477   Optional<bool> EnableWavefrontSize32;
4478 
4479   while (true) {
4480     while (trySkipToken(AsmToken::EndOfStatement));
4481 
4482     StringRef ID;
4483     SMRange IDRange = getTok().getLocRange();
4484     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4485       return true;
4486 
4487     if (ID == ".end_amdhsa_kernel")
4488       break;
4489 
4490     if (Seen.find(ID) != Seen.end())
4491       return TokError(".amdhsa_ directives cannot be repeated");
4492     Seen.insert(ID);
4493 
4494     SMLoc ValStart = getLoc();
4495     int64_t IVal;
4496     if (getParser().parseAbsoluteExpression(IVal))
4497       return true;
4498     SMLoc ValEnd = getLoc();
4499     SMRange ValRange = SMRange(ValStart, ValEnd);
4500 
4501     if (IVal < 0)
4502       return OutOfRangeError(ValRange);
4503 
4504     uint64_t Val = IVal;
4505 
4506 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4507   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4508     return OutOfRangeError(RANGE);                                             \
4509   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4510 
4511     if (ID == ".amdhsa_group_segment_fixed_size") {
4512       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4513         return OutOfRangeError(ValRange);
4514       KD.group_segment_fixed_size = Val;
4515     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4516       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4517         return OutOfRangeError(ValRange);
4518       KD.private_segment_fixed_size = Val;
4519     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4520       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4521                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4522                        Val, ValRange);
4523       if (Val)
4524         UserSGPRCount += 4;
4525     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4526       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4527                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4528                        ValRange);
4529       if (Val)
4530         UserSGPRCount += 2;
4531     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4532       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4533                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4534                        ValRange);
4535       if (Val)
4536         UserSGPRCount += 2;
4537     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4538       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4539                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4540                        Val, ValRange);
4541       if (Val)
4542         UserSGPRCount += 2;
4543     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4544       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4545                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4546                        ValRange);
4547       if (Val)
4548         UserSGPRCount += 2;
4549     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4550       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4551                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4552                        ValRange);
4553       if (Val)
4554         UserSGPRCount += 2;
4555     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4556       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4557                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4558                        Val, ValRange);
4559       if (Val)
4560         UserSGPRCount += 1;
4561     } else if (ID == ".amdhsa_wavefront_size32") {
4562       if (IVersion.Major < 10)
4563         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4564       EnableWavefrontSize32 = Val;
4565       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4566                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4567                        Val, ValRange);
4568     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4569       PARSE_BITS_ENTRY(
4570           KD.compute_pgm_rsrc2,
4571           COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,
4572           ValRange);
4573     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4574       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4575                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4576                        ValRange);
4577     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4578       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4579                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4580                        ValRange);
4581     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4582       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4583                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4584                        ValRange);
4585     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4586       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4587                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4588                        ValRange);
4589     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4590       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4591                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4592                        ValRange);
4593     } else if (ID == ".amdhsa_next_free_vgpr") {
4594       VGPRRange = ValRange;
4595       NextFreeVGPR = Val;
4596     } else if (ID == ".amdhsa_next_free_sgpr") {
4597       SGPRRange = ValRange;
4598       NextFreeSGPR = Val;
4599     } else if (ID == ".amdhsa_accum_offset") {
4600       if (!isGFX90A())
4601         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4602       AccumOffset = Val;
4603     } else if (ID == ".amdhsa_reserve_vcc") {
4604       if (!isUInt<1>(Val))
4605         return OutOfRangeError(ValRange);
4606       ReserveVCC = Val;
4607     } else if (ID == ".amdhsa_reserve_flat_scratch") {
4608       if (IVersion.Major < 7)
4609         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4610       if (!isUInt<1>(Val))
4611         return OutOfRangeError(ValRange);
4612       ReserveFlatScr = Val;
4613     } else if (ID == ".amdhsa_reserve_xnack_mask") {
4614       if (IVersion.Major < 8)
4615         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4616       if (!isUInt<1>(Val))
4617         return OutOfRangeError(ValRange);
4618       ReserveXNACK = Val;
4619     } else if (ID == ".amdhsa_float_round_mode_32") {
4620       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4621                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4622     } else if (ID == ".amdhsa_float_round_mode_16_64") {
4623       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4624                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4625     } else if (ID == ".amdhsa_float_denorm_mode_32") {
4626       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4627                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4628     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4629       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4630                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4631                        ValRange);
4632     } else if (ID == ".amdhsa_dx10_clamp") {
4633       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4634                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4635     } else if (ID == ".amdhsa_ieee_mode") {
4636       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4637                        Val, ValRange);
4638     } else if (ID == ".amdhsa_fp16_overflow") {
4639       if (IVersion.Major < 9)
4640         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4641       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4642                        ValRange);
4643     } else if (ID == ".amdhsa_tg_split") {
4644       if (!isGFX90A())
4645         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4646       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4647                        ValRange);
4648     } else if (ID == ".amdhsa_workgroup_processor_mode") {
4649       if (IVersion.Major < 10)
4650         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4651       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4652                        ValRange);
4653     } else if (ID == ".amdhsa_memory_ordered") {
4654       if (IVersion.Major < 10)
4655         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4656       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4657                        ValRange);
4658     } else if (ID == ".amdhsa_forward_progress") {
4659       if (IVersion.Major < 10)
4660         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4661       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4662                        ValRange);
4663     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4664       PARSE_BITS_ENTRY(
4665           KD.compute_pgm_rsrc2,
4666           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4667           ValRange);
4668     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4669       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4670                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4671                        Val, ValRange);
4672     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4673       PARSE_BITS_ENTRY(
4674           KD.compute_pgm_rsrc2,
4675           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4676           ValRange);
4677     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4678       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4679                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4680                        Val, ValRange);
4681     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4682       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4683                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4684                        Val, ValRange);
4685     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4686       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4687                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4688                        Val, ValRange);
4689     } else if (ID == ".amdhsa_exception_int_div_zero") {
4690       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4691                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4692                        Val, ValRange);
4693     } else {
4694       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4695     }
4696 
4697 #undef PARSE_BITS_ENTRY
4698   }
4699 
4700   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4701     return TokError(".amdhsa_next_free_vgpr directive is required");
4702 
4703   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4704     return TokError(".amdhsa_next_free_sgpr directive is required");
4705 
4706   unsigned VGPRBlocks;
4707   unsigned SGPRBlocks;
4708   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4709                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4710                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4711                          SGPRBlocks))
4712     return true;
4713 
4714   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4715           VGPRBlocks))
4716     return OutOfRangeError(VGPRRange);
4717   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4718                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4719 
4720   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4721           SGPRBlocks))
4722     return OutOfRangeError(SGPRRange);
4723   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4724                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4725                   SGPRBlocks);
4726 
4727   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4728     return TokError("too many user SGPRs enabled");
4729   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4730                   UserSGPRCount);
4731 
4732   if (isGFX90A()) {
4733     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4734       return TokError(".amdhsa_accum_offset directive is required");
4735     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4736       return TokError("accum_offset should be in range [4..256] in "
4737                       "increments of 4");
4738     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4739       return TokError("accum_offset exceeds total VGPR allocation");
4740     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4741                     (AccumOffset / 4 - 1));
4742   }
4743 
4744   getTargetStreamer().EmitAmdhsaKernelDescriptor(
4745       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4746       ReserveFlatScr, ReserveXNACK);
4747   return false;
4748 }
4749 
4750 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4751   uint32_t Major;
4752   uint32_t Minor;
4753 
4754   if (ParseDirectiveMajorMinor(Major, Minor))
4755     return true;
4756 
4757   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4758   return false;
4759 }
4760 
4761 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4762   uint32_t Major;
4763   uint32_t Minor;
4764   uint32_t Stepping;
4765   StringRef VendorName;
4766   StringRef ArchName;
4767 
4768   // If this directive has no arguments, then use the ISA version for the
4769   // targeted GPU.
4770   if (isToken(AsmToken::EndOfStatement)) {
4771     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4772     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4773                                                       ISA.Stepping,
4774                                                       "AMD", "AMDGPU");
4775     return false;
4776   }
4777 
4778   if (ParseDirectiveMajorMinor(Major, Minor))
4779     return true;
4780 
4781   if (!trySkipToken(AsmToken::Comma))
4782     return TokError("stepping version number required, comma expected");
4783 
4784   if (ParseAsAbsoluteExpression(Stepping))
4785     return TokError("invalid stepping version");
4786 
4787   if (!trySkipToken(AsmToken::Comma))
4788     return TokError("vendor name required, comma expected");
4789 
4790   if (!parseString(VendorName, "invalid vendor name"))
4791     return true;
4792 
4793   if (!trySkipToken(AsmToken::Comma))
4794     return TokError("arch name required, comma expected");
4795 
4796   if (!parseString(ArchName, "invalid arch name"))
4797     return true;
4798 
4799   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4800                                                     VendorName, ArchName);
4801   return false;
4802 }
4803 
4804 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4805                                                amd_kernel_code_t &Header) {
4806   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4807   // assembly for backwards compatibility.
4808   if (ID == "max_scratch_backing_memory_byte_size") {
4809     Parser.eatToEndOfStatement();
4810     return false;
4811   }
4812 
4813   SmallString<40> ErrStr;
4814   raw_svector_ostream Err(ErrStr);
4815   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4816     return TokError(Err.str());
4817   }
4818   Lex();
4819 
4820   if (ID == "enable_wavefront_size32") {
4821     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4822       if (!isGFX10Plus())
4823         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4824       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4825         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4826     } else {
4827       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4828         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4829     }
4830   }
4831 
4832   if (ID == "wavefront_size") {
4833     if (Header.wavefront_size == 5) {
4834       if (!isGFX10Plus())
4835         return TokError("wavefront_size=5 is only allowed on GFX10+");
4836       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4837         return TokError("wavefront_size=5 requires +WavefrontSize32");
4838     } else if (Header.wavefront_size == 6) {
4839       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4840         return TokError("wavefront_size=6 requires +WavefrontSize64");
4841     }
4842   }
4843 
4844   if (ID == "enable_wgp_mode") {
4845     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4846         !isGFX10Plus())
4847       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4848   }
4849 
4850   if (ID == "enable_mem_ordered") {
4851     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4852         !isGFX10Plus())
4853       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4854   }
4855 
4856   if (ID == "enable_fwd_progress") {
4857     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4858         !isGFX10Plus())
4859       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4860   }
4861 
4862   return false;
4863 }
4864 
4865 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4866   amd_kernel_code_t Header;
4867   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4868 
4869   while (true) {
4870     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4871     // will set the current token to EndOfStatement.
4872     while(trySkipToken(AsmToken::EndOfStatement));
4873 
4874     StringRef ID;
4875     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4876       return true;
4877 
4878     if (ID == ".end_amd_kernel_code_t")
4879       break;
4880 
4881     if (ParseAMDKernelCodeTValue(ID, Header))
4882       return true;
4883   }
4884 
4885   getTargetStreamer().EmitAMDKernelCodeT(Header);
4886 
4887   return false;
4888 }
4889 
4890 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4891   StringRef KernelName;
4892   if (!parseId(KernelName, "expected symbol name"))
4893     return true;
4894 
4895   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4896                                            ELF::STT_AMDGPU_HSA_KERNEL);
4897 
4898   KernelScope.initialize(getContext());
4899   return false;
4900 }
4901 
4902 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4903   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4904     return Error(getLoc(),
4905                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4906                  "architectures");
4907   }
4908 
4909   auto ISAVersionStringFromASM = getToken().getStringContents();
4910 
4911   std::string ISAVersionStringFromSTI;
4912   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4913   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4914 
4915   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4916     return Error(getLoc(),
4917                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4918                  "arguments specified through the command line");
4919   }
4920 
4921   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4922   Lex();
4923 
4924   return false;
4925 }
4926 
4927 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4928   const char *AssemblerDirectiveBegin;
4929   const char *AssemblerDirectiveEnd;
4930   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4931       isHsaAbiVersion3(&getSTI())
4932           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4933                             HSAMD::V3::AssemblerDirectiveEnd)
4934           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4935                             HSAMD::AssemblerDirectiveEnd);
4936 
4937   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4938     return Error(getLoc(),
4939                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4940                  "not available on non-amdhsa OSes")).str());
4941   }
4942 
4943   std::string HSAMetadataString;
4944   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4945                           HSAMetadataString))
4946     return true;
4947 
4948   if (isHsaAbiVersion3(&getSTI())) {
4949     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4950       return Error(getLoc(), "invalid HSA metadata");
4951   } else {
4952     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4953       return Error(getLoc(), "invalid HSA metadata");
4954   }
4955 
4956   return false;
4957 }
4958 
4959 /// Common code to parse out a block of text (typically YAML) between start and
4960 /// end directives.
4961 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4962                                           const char *AssemblerDirectiveEnd,
4963                                           std::string &CollectString) {
4964 
4965   raw_string_ostream CollectStream(CollectString);
4966 
4967   getLexer().setSkipSpace(false);
4968 
4969   bool FoundEnd = false;
4970   while (!isToken(AsmToken::Eof)) {
4971     while (isToken(AsmToken::Space)) {
4972       CollectStream << getTokenStr();
4973       Lex();
4974     }
4975 
4976     if (trySkipId(AssemblerDirectiveEnd)) {
4977       FoundEnd = true;
4978       break;
4979     }
4980 
4981     CollectStream << Parser.parseStringToEndOfStatement()
4982                   << getContext().getAsmInfo()->getSeparatorString();
4983 
4984     Parser.eatToEndOfStatement();
4985   }
4986 
4987   getLexer().setSkipSpace(true);
4988 
4989   if (isToken(AsmToken::Eof) && !FoundEnd) {
4990     return TokError(Twine("expected directive ") +
4991                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4992   }
4993 
4994   CollectStream.flush();
4995   return false;
4996 }
4997 
4998 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4999 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5000   std::string String;
5001   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5002                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5003     return true;
5004 
5005   auto PALMetadata = getTargetStreamer().getPALMetadata();
5006   if (!PALMetadata->setFromString(String))
5007     return Error(getLoc(), "invalid PAL metadata");
5008   return false;
5009 }
5010 
5011 /// Parse the assembler directive for old linear-format PAL metadata.
5012 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5013   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5014     return Error(getLoc(),
5015                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5016                  "not available on non-amdpal OSes")).str());
5017   }
5018 
5019   auto PALMetadata = getTargetStreamer().getPALMetadata();
5020   PALMetadata->setLegacy();
5021   for (;;) {
5022     uint32_t Key, Value;
5023     if (ParseAsAbsoluteExpression(Key)) {
5024       return TokError(Twine("invalid value in ") +
5025                       Twine(PALMD::AssemblerDirective));
5026     }
5027     if (!trySkipToken(AsmToken::Comma)) {
5028       return TokError(Twine("expected an even number of values in ") +
5029                       Twine(PALMD::AssemblerDirective));
5030     }
5031     if (ParseAsAbsoluteExpression(Value)) {
5032       return TokError(Twine("invalid value in ") +
5033                       Twine(PALMD::AssemblerDirective));
5034     }
5035     PALMetadata->setRegister(Key, Value);
5036     if (!trySkipToken(AsmToken::Comma))
5037       break;
5038   }
5039   return false;
5040 }
5041 
5042 /// ParseDirectiveAMDGPULDS
5043 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5044 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5045   if (getParser().checkForValidSection())
5046     return true;
5047 
5048   StringRef Name;
5049   SMLoc NameLoc = getLoc();
5050   if (getParser().parseIdentifier(Name))
5051     return TokError("expected identifier in directive");
5052 
5053   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5054   if (parseToken(AsmToken::Comma, "expected ','"))
5055     return true;
5056 
5057   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5058 
5059   int64_t Size;
5060   SMLoc SizeLoc = getLoc();
5061   if (getParser().parseAbsoluteExpression(Size))
5062     return true;
5063   if (Size < 0)
5064     return Error(SizeLoc, "size must be non-negative");
5065   if (Size > LocalMemorySize)
5066     return Error(SizeLoc, "size is too large");
5067 
5068   int64_t Alignment = 4;
5069   if (trySkipToken(AsmToken::Comma)) {
5070     SMLoc AlignLoc = getLoc();
5071     if (getParser().parseAbsoluteExpression(Alignment))
5072       return true;
5073     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5074       return Error(AlignLoc, "alignment must be a power of two");
5075 
5076     // Alignment larger than the size of LDS is possible in theory, as long
5077     // as the linker manages to place to symbol at address 0, but we do want
5078     // to make sure the alignment fits nicely into a 32-bit integer.
5079     if (Alignment >= 1u << 31)
5080       return Error(AlignLoc, "alignment is too large");
5081   }
5082 
5083   if (parseToken(AsmToken::EndOfStatement,
5084                  "unexpected token in '.amdgpu_lds' directive"))
5085     return true;
5086 
5087   Symbol->redefineIfPossible();
5088   if (!Symbol->isUndefined())
5089     return Error(NameLoc, "invalid symbol redefinition");
5090 
5091   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5092   return false;
5093 }
5094 
5095 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5096   StringRef IDVal = DirectiveID.getString();
5097 
5098   if (isHsaAbiVersion3(&getSTI())) {
5099     if (IDVal == ".amdgcn_target")
5100       return ParseDirectiveAMDGCNTarget();
5101 
5102     if (IDVal == ".amdhsa_kernel")
5103       return ParseDirectiveAMDHSAKernel();
5104 
5105     // TODO: Restructure/combine with PAL metadata directive.
5106     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5107       return ParseDirectiveHSAMetadata();
5108   } else {
5109     if (IDVal == ".hsa_code_object_version")
5110       return ParseDirectiveHSACodeObjectVersion();
5111 
5112     if (IDVal == ".hsa_code_object_isa")
5113       return ParseDirectiveHSACodeObjectISA();
5114 
5115     if (IDVal == ".amd_kernel_code_t")
5116       return ParseDirectiveAMDKernelCodeT();
5117 
5118     if (IDVal == ".amdgpu_hsa_kernel")
5119       return ParseDirectiveAMDGPUHsaKernel();
5120 
5121     if (IDVal == ".amd_amdgpu_isa")
5122       return ParseDirectiveISAVersion();
5123 
5124     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5125       return ParseDirectiveHSAMetadata();
5126   }
5127 
5128   if (IDVal == ".amdgpu_lds")
5129     return ParseDirectiveAMDGPULDS();
5130 
5131   if (IDVal == PALMD::AssemblerDirectiveBegin)
5132     return ParseDirectivePALMetadataBegin();
5133 
5134   if (IDVal == PALMD::AssemblerDirective)
5135     return ParseDirectivePALMetadata();
5136 
5137   return true;
5138 }
5139 
5140 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5141                                            unsigned RegNo) const {
5142 
5143   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5144        R.isValid(); ++R) {
5145     if (*R == RegNo)
5146       return isGFX9Plus();
5147   }
5148 
5149   // GFX10 has 2 more SGPRs 104 and 105.
5150   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5151        R.isValid(); ++R) {
5152     if (*R == RegNo)
5153       return hasSGPR104_SGPR105();
5154   }
5155 
5156   switch (RegNo) {
5157   case AMDGPU::SRC_SHARED_BASE:
5158   case AMDGPU::SRC_SHARED_LIMIT:
5159   case AMDGPU::SRC_PRIVATE_BASE:
5160   case AMDGPU::SRC_PRIVATE_LIMIT:
5161   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5162     return isGFX9Plus();
5163   case AMDGPU::TBA:
5164   case AMDGPU::TBA_LO:
5165   case AMDGPU::TBA_HI:
5166   case AMDGPU::TMA:
5167   case AMDGPU::TMA_LO:
5168   case AMDGPU::TMA_HI:
5169     return !isGFX9Plus();
5170   case AMDGPU::XNACK_MASK:
5171   case AMDGPU::XNACK_MASK_LO:
5172   case AMDGPU::XNACK_MASK_HI:
5173     return (isVI() || isGFX9()) && hasXNACK();
5174   case AMDGPU::SGPR_NULL:
5175     return isGFX10Plus();
5176   default:
5177     break;
5178   }
5179 
5180   if (isCI())
5181     return true;
5182 
5183   if (isSI() || isGFX10Plus()) {
5184     // No flat_scr on SI.
5185     // On GFX10 flat scratch is not a valid register operand and can only be
5186     // accessed with s_setreg/s_getreg.
5187     switch (RegNo) {
5188     case AMDGPU::FLAT_SCR:
5189     case AMDGPU::FLAT_SCR_LO:
5190     case AMDGPU::FLAT_SCR_HI:
5191       return false;
5192     default:
5193       return true;
5194     }
5195   }
5196 
5197   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5198   // SI/CI have.
5199   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5200        R.isValid(); ++R) {
5201     if (*R == RegNo)
5202       return hasSGPR102_SGPR103();
5203   }
5204 
5205   return true;
5206 }
5207 
5208 OperandMatchResultTy
5209 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5210                               OperandMode Mode) {
5211   // Try to parse with a custom parser
5212   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5213 
5214   // If we successfully parsed the operand or if there as an error parsing,
5215   // we are done.
5216   //
5217   // If we are parsing after we reach EndOfStatement then this means we
5218   // are appending default values to the Operands list.  This is only done
5219   // by custom parser, so we shouldn't continue on to the generic parsing.
5220   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5221       isToken(AsmToken::EndOfStatement))
5222     return ResTy;
5223 
5224   SMLoc RBraceLoc;
5225   SMLoc LBraceLoc = getLoc();
5226   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5227     unsigned Prefix = Operands.size();
5228 
5229     for (;;) {
5230       auto Loc = getLoc();
5231       ResTy = parseReg(Operands);
5232       if (ResTy == MatchOperand_NoMatch)
5233         Error(Loc, "expected a register");
5234       if (ResTy != MatchOperand_Success)
5235         return MatchOperand_ParseFail;
5236 
5237       RBraceLoc = getLoc();
5238       if (trySkipToken(AsmToken::RBrac))
5239         break;
5240 
5241       if (!skipToken(AsmToken::Comma,
5242                      "expected a comma or a closing square bracket")) {
5243         return MatchOperand_ParseFail;
5244       }
5245     }
5246 
5247     if (Operands.size() - Prefix > 1) {
5248       Operands.insert(Operands.begin() + Prefix,
5249                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5250       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5251     }
5252 
5253     return MatchOperand_Success;
5254   }
5255 
5256   return parseRegOrImm(Operands);
5257 }
5258 
5259 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5260   // Clear any forced encodings from the previous instruction.
5261   setForcedEncodingSize(0);
5262   setForcedDPP(false);
5263   setForcedSDWA(false);
5264 
5265   if (Name.endswith("_e64")) {
5266     setForcedEncodingSize(64);
5267     return Name.substr(0, Name.size() - 4);
5268   } else if (Name.endswith("_e32")) {
5269     setForcedEncodingSize(32);
5270     return Name.substr(0, Name.size() - 4);
5271   } else if (Name.endswith("_dpp")) {
5272     setForcedDPP(true);
5273     return Name.substr(0, Name.size() - 4);
5274   } else if (Name.endswith("_sdwa")) {
5275     setForcedSDWA(true);
5276     return Name.substr(0, Name.size() - 5);
5277   }
5278   return Name;
5279 }
5280 
5281 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5282                                        StringRef Name,
5283                                        SMLoc NameLoc, OperandVector &Operands) {
5284   // Add the instruction mnemonic
5285   Name = parseMnemonicSuffix(Name);
5286   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5287 
5288   bool IsMIMG = Name.startswith("image_");
5289 
5290   while (!trySkipToken(AsmToken::EndOfStatement)) {
5291     OperandMode Mode = OperandMode_Default;
5292     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5293       Mode = OperandMode_NSA;
5294     CPolSeen = 0;
5295     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5296 
5297     if (Res != MatchOperand_Success) {
5298       checkUnsupportedInstruction(Name, NameLoc);
5299       if (!Parser.hasPendingError()) {
5300         // FIXME: use real operand location rather than the current location.
5301         StringRef Msg =
5302           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5303                                             "not a valid operand.";
5304         Error(getLoc(), Msg);
5305       }
5306       while (!trySkipToken(AsmToken::EndOfStatement)) {
5307         lex();
5308       }
5309       return true;
5310     }
5311 
5312     // Eat the comma or space if there is one.
5313     trySkipToken(AsmToken::Comma);
5314   }
5315 
5316   return false;
5317 }
5318 
5319 //===----------------------------------------------------------------------===//
5320 // Utility functions
5321 //===----------------------------------------------------------------------===//
5322 
5323 OperandMatchResultTy
5324 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5325 
5326   if (!trySkipId(Prefix, AsmToken::Colon))
5327     return MatchOperand_NoMatch;
5328 
5329   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5330 }
5331 
5332 OperandMatchResultTy
5333 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5334                                     AMDGPUOperand::ImmTy ImmTy,
5335                                     bool (*ConvertResult)(int64_t&)) {
5336   SMLoc S = getLoc();
5337   int64_t Value = 0;
5338 
5339   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5340   if (Res != MatchOperand_Success)
5341     return Res;
5342 
5343   if (ConvertResult && !ConvertResult(Value)) {
5344     Error(S, "invalid " + StringRef(Prefix) + " value.");
5345   }
5346 
5347   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5348   return MatchOperand_Success;
5349 }
5350 
5351 OperandMatchResultTy
5352 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5353                                              OperandVector &Operands,
5354                                              AMDGPUOperand::ImmTy ImmTy,
5355                                              bool (*ConvertResult)(int64_t&)) {
5356   SMLoc S = getLoc();
5357   if (!trySkipId(Prefix, AsmToken::Colon))
5358     return MatchOperand_NoMatch;
5359 
5360   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5361     return MatchOperand_ParseFail;
5362 
5363   unsigned Val = 0;
5364   const unsigned MaxSize = 4;
5365 
5366   // FIXME: How to verify the number of elements matches the number of src
5367   // operands?
5368   for (int I = 0; ; ++I) {
5369     int64_t Op;
5370     SMLoc Loc = getLoc();
5371     if (!parseExpr(Op))
5372       return MatchOperand_ParseFail;
5373 
5374     if (Op != 0 && Op != 1) {
5375       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5376       return MatchOperand_ParseFail;
5377     }
5378 
5379     Val |= (Op << I);
5380 
5381     if (trySkipToken(AsmToken::RBrac))
5382       break;
5383 
5384     if (I + 1 == MaxSize) {
5385       Error(getLoc(), "expected a closing square bracket");
5386       return MatchOperand_ParseFail;
5387     }
5388 
5389     if (!skipToken(AsmToken::Comma, "expected a comma"))
5390       return MatchOperand_ParseFail;
5391   }
5392 
5393   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5394   return MatchOperand_Success;
5395 }
5396 
5397 OperandMatchResultTy
5398 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5399                                AMDGPUOperand::ImmTy ImmTy) {
5400   int64_t Bit;
5401   SMLoc S = getLoc();
5402 
5403   if (trySkipId(Name)) {
5404     Bit = 1;
5405   } else if (trySkipId("no", Name)) {
5406     Bit = 0;
5407   } else {
5408     return MatchOperand_NoMatch;
5409   }
5410 
5411   if (Name == "r128" && !hasMIMG_R128()) {
5412     Error(S, "r128 modifier is not supported on this GPU");
5413     return MatchOperand_ParseFail;
5414   }
5415   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5416     Error(S, "a16 modifier is not supported on this GPU");
5417     return MatchOperand_ParseFail;
5418   }
5419 
5420   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5421     ImmTy = AMDGPUOperand::ImmTyR128A16;
5422 
5423   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5424   return MatchOperand_Success;
5425 }
5426 
5427 OperandMatchResultTy
5428 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5429   unsigned CPolOn = 0;
5430   unsigned CPolOff = 0;
5431   SMLoc S = getLoc();
5432 
5433   if (trySkipId("glc"))
5434     CPolOn = AMDGPU::CPol::GLC;
5435   else if (trySkipId("noglc"))
5436     CPolOff = AMDGPU::CPol::GLC;
5437   else if (trySkipId("slc"))
5438     CPolOn = AMDGPU::CPol::SLC;
5439   else if (trySkipId("noslc"))
5440     CPolOff = AMDGPU::CPol::SLC;
5441   else if (trySkipId("dlc"))
5442     CPolOn = AMDGPU::CPol::DLC;
5443   else if (trySkipId("nodlc"))
5444     CPolOff = AMDGPU::CPol::DLC;
5445   else if (trySkipId("scc"))
5446     CPolOn = AMDGPU::CPol::SCC;
5447   else if (trySkipId("noscc"))
5448     CPolOff = AMDGPU::CPol::SCC;
5449   else
5450     return MatchOperand_NoMatch;
5451 
5452   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5453     Error(S, "dlc modifier is not supported on this GPU");
5454     return MatchOperand_ParseFail;
5455   }
5456 
5457   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5458     Error(S, "scc modifier is not supported on this GPU");
5459     return MatchOperand_ParseFail;
5460   }
5461 
5462   if (CPolSeen & (CPolOn | CPolOff)) {
5463     Error(S, "duplicate cache policy modifier");
5464     return MatchOperand_ParseFail;
5465   }
5466 
5467   CPolSeen |= (CPolOn | CPolOff);
5468 
5469   for (unsigned I = 1; I != Operands.size(); ++I) {
5470     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5471     if (Op.isCPol()) {
5472       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5473       return MatchOperand_Success;
5474     }
5475   }
5476 
5477   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5478                                               AMDGPUOperand::ImmTyCPol));
5479 
5480   return MatchOperand_Success;
5481 }
5482 
5483 static void addOptionalImmOperand(
5484   MCInst& Inst, const OperandVector& Operands,
5485   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5486   AMDGPUOperand::ImmTy ImmT,
5487   int64_t Default = 0) {
5488   auto i = OptionalIdx.find(ImmT);
5489   if (i != OptionalIdx.end()) {
5490     unsigned Idx = i->second;
5491     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5492   } else {
5493     Inst.addOperand(MCOperand::createImm(Default));
5494   }
5495 }
5496 
5497 OperandMatchResultTy
5498 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5499                                        StringRef &Value,
5500                                        SMLoc &StringLoc) {
5501   if (!trySkipId(Prefix, AsmToken::Colon))
5502     return MatchOperand_NoMatch;
5503 
5504   StringLoc = getLoc();
5505   return parseId(Value, "expected an identifier") ? MatchOperand_Success
5506                                                   : MatchOperand_ParseFail;
5507 }
5508 
5509 //===----------------------------------------------------------------------===//
5510 // MTBUF format
5511 //===----------------------------------------------------------------------===//
5512 
5513 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5514                                   int64_t MaxVal,
5515                                   int64_t &Fmt) {
5516   int64_t Val;
5517   SMLoc Loc = getLoc();
5518 
5519   auto Res = parseIntWithPrefix(Pref, Val);
5520   if (Res == MatchOperand_ParseFail)
5521     return false;
5522   if (Res == MatchOperand_NoMatch)
5523     return true;
5524 
5525   if (Val < 0 || Val > MaxVal) {
5526     Error(Loc, Twine("out of range ", StringRef(Pref)));
5527     return false;
5528   }
5529 
5530   Fmt = Val;
5531   return true;
5532 }
5533 
5534 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5535 // values to live in a joint format operand in the MCInst encoding.
5536 OperandMatchResultTy
5537 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5538   using namespace llvm::AMDGPU::MTBUFFormat;
5539 
5540   int64_t Dfmt = DFMT_UNDEF;
5541   int64_t Nfmt = NFMT_UNDEF;
5542 
5543   // dfmt and nfmt can appear in either order, and each is optional.
5544   for (int I = 0; I < 2; ++I) {
5545     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5546       return MatchOperand_ParseFail;
5547 
5548     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5549       return MatchOperand_ParseFail;
5550     }
5551     // Skip optional comma between dfmt/nfmt
5552     // but guard against 2 commas following each other.
5553     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5554         !peekToken().is(AsmToken::Comma)) {
5555       trySkipToken(AsmToken::Comma);
5556     }
5557   }
5558 
5559   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5560     return MatchOperand_NoMatch;
5561 
5562   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5563   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5564 
5565   Format = encodeDfmtNfmt(Dfmt, Nfmt);
5566   return MatchOperand_Success;
5567 }
5568 
5569 OperandMatchResultTy
5570 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5571   using namespace llvm::AMDGPU::MTBUFFormat;
5572 
5573   int64_t Fmt = UFMT_UNDEF;
5574 
5575   if (!tryParseFmt("format", UFMT_MAX, Fmt))
5576     return MatchOperand_ParseFail;
5577 
5578   if (Fmt == UFMT_UNDEF)
5579     return MatchOperand_NoMatch;
5580 
5581   Format = Fmt;
5582   return MatchOperand_Success;
5583 }
5584 
5585 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5586                                     int64_t &Nfmt,
5587                                     StringRef FormatStr,
5588                                     SMLoc Loc) {
5589   using namespace llvm::AMDGPU::MTBUFFormat;
5590   int64_t Format;
5591 
5592   Format = getDfmt(FormatStr);
5593   if (Format != DFMT_UNDEF) {
5594     Dfmt = Format;
5595     return true;
5596   }
5597 
5598   Format = getNfmt(FormatStr, getSTI());
5599   if (Format != NFMT_UNDEF) {
5600     Nfmt = Format;
5601     return true;
5602   }
5603 
5604   Error(Loc, "unsupported format");
5605   return false;
5606 }
5607 
5608 OperandMatchResultTy
5609 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5610                                           SMLoc FormatLoc,
5611                                           int64_t &Format) {
5612   using namespace llvm::AMDGPU::MTBUFFormat;
5613 
5614   int64_t Dfmt = DFMT_UNDEF;
5615   int64_t Nfmt = NFMT_UNDEF;
5616   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5617     return MatchOperand_ParseFail;
5618 
5619   if (trySkipToken(AsmToken::Comma)) {
5620     StringRef Str;
5621     SMLoc Loc = getLoc();
5622     if (!parseId(Str, "expected a format string") ||
5623         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5624       return MatchOperand_ParseFail;
5625     }
5626     if (Dfmt == DFMT_UNDEF) {
5627       Error(Loc, "duplicate numeric format");
5628       return MatchOperand_ParseFail;
5629     } else if (Nfmt == NFMT_UNDEF) {
5630       Error(Loc, "duplicate data format");
5631       return MatchOperand_ParseFail;
5632     }
5633   }
5634 
5635   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5636   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5637 
5638   if (isGFX10Plus()) {
5639     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5640     if (Ufmt == UFMT_UNDEF) {
5641       Error(FormatLoc, "unsupported format");
5642       return MatchOperand_ParseFail;
5643     }
5644     Format = Ufmt;
5645   } else {
5646     Format = encodeDfmtNfmt(Dfmt, Nfmt);
5647   }
5648 
5649   return MatchOperand_Success;
5650 }
5651 
5652 OperandMatchResultTy
5653 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5654                                             SMLoc Loc,
5655                                             int64_t &Format) {
5656   using namespace llvm::AMDGPU::MTBUFFormat;
5657 
5658   auto Id = getUnifiedFormat(FormatStr);
5659   if (Id == UFMT_UNDEF)
5660     return MatchOperand_NoMatch;
5661 
5662   if (!isGFX10Plus()) {
5663     Error(Loc, "unified format is not supported on this GPU");
5664     return MatchOperand_ParseFail;
5665   }
5666 
5667   Format = Id;
5668   return MatchOperand_Success;
5669 }
5670 
5671 OperandMatchResultTy
5672 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5673   using namespace llvm::AMDGPU::MTBUFFormat;
5674   SMLoc Loc = getLoc();
5675 
5676   if (!parseExpr(Format))
5677     return MatchOperand_ParseFail;
5678   if (!isValidFormatEncoding(Format, getSTI())) {
5679     Error(Loc, "out of range format");
5680     return MatchOperand_ParseFail;
5681   }
5682 
5683   return MatchOperand_Success;
5684 }
5685 
5686 OperandMatchResultTy
5687 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5688   using namespace llvm::AMDGPU::MTBUFFormat;
5689 
5690   if (!trySkipId("format", AsmToken::Colon))
5691     return MatchOperand_NoMatch;
5692 
5693   if (trySkipToken(AsmToken::LBrac)) {
5694     StringRef FormatStr;
5695     SMLoc Loc = getLoc();
5696     if (!parseId(FormatStr, "expected a format string"))
5697       return MatchOperand_ParseFail;
5698 
5699     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5700     if (Res == MatchOperand_NoMatch)
5701       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5702     if (Res != MatchOperand_Success)
5703       return Res;
5704 
5705     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5706       return MatchOperand_ParseFail;
5707 
5708     return MatchOperand_Success;
5709   }
5710 
5711   return parseNumericFormat(Format);
5712 }
5713 
5714 OperandMatchResultTy
5715 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5716   using namespace llvm::AMDGPU::MTBUFFormat;
5717 
5718   int64_t Format = getDefaultFormatEncoding(getSTI());
5719   OperandMatchResultTy Res;
5720   SMLoc Loc = getLoc();
5721 
5722   // Parse legacy format syntax.
5723   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5724   if (Res == MatchOperand_ParseFail)
5725     return Res;
5726 
5727   bool FormatFound = (Res == MatchOperand_Success);
5728 
5729   Operands.push_back(
5730     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5731 
5732   if (FormatFound)
5733     trySkipToken(AsmToken::Comma);
5734 
5735   if (isToken(AsmToken::EndOfStatement)) {
5736     // We are expecting an soffset operand,
5737     // but let matcher handle the error.
5738     return MatchOperand_Success;
5739   }
5740 
5741   // Parse soffset.
5742   Res = parseRegOrImm(Operands);
5743   if (Res != MatchOperand_Success)
5744     return Res;
5745 
5746   trySkipToken(AsmToken::Comma);
5747 
5748   if (!FormatFound) {
5749     Res = parseSymbolicOrNumericFormat(Format);
5750     if (Res == MatchOperand_ParseFail)
5751       return Res;
5752     if (Res == MatchOperand_Success) {
5753       auto Size = Operands.size();
5754       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5755       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5756       Op.setImm(Format);
5757     }
5758     return MatchOperand_Success;
5759   }
5760 
5761   if (isId("format") && peekToken().is(AsmToken::Colon)) {
5762     Error(getLoc(), "duplicate format");
5763     return MatchOperand_ParseFail;
5764   }
5765   return MatchOperand_Success;
5766 }
5767 
5768 //===----------------------------------------------------------------------===//
5769 // ds
5770 //===----------------------------------------------------------------------===//
5771 
5772 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5773                                     const OperandVector &Operands) {
5774   OptionalImmIndexMap OptionalIdx;
5775 
5776   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5777     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5778 
5779     // Add the register arguments
5780     if (Op.isReg()) {
5781       Op.addRegOperands(Inst, 1);
5782       continue;
5783     }
5784 
5785     // Handle optional arguments
5786     OptionalIdx[Op.getImmTy()] = i;
5787   }
5788 
5789   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5790   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5791   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5792 
5793   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5794 }
5795 
5796 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5797                                 bool IsGdsHardcoded) {
5798   OptionalImmIndexMap OptionalIdx;
5799 
5800   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5801     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5802 
5803     // Add the register arguments
5804     if (Op.isReg()) {
5805       Op.addRegOperands(Inst, 1);
5806       continue;
5807     }
5808 
5809     if (Op.isToken() && Op.getToken() == "gds") {
5810       IsGdsHardcoded = true;
5811       continue;
5812     }
5813 
5814     // Handle optional arguments
5815     OptionalIdx[Op.getImmTy()] = i;
5816   }
5817 
5818   AMDGPUOperand::ImmTy OffsetType =
5819     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5820      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5821      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5822                                                       AMDGPUOperand::ImmTyOffset;
5823 
5824   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5825 
5826   if (!IsGdsHardcoded) {
5827     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5828   }
5829   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5830 }
5831 
5832 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5833   OptionalImmIndexMap OptionalIdx;
5834 
5835   unsigned OperandIdx[4];
5836   unsigned EnMask = 0;
5837   int SrcIdx = 0;
5838 
5839   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5840     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5841 
5842     // Add the register arguments
5843     if (Op.isReg()) {
5844       assert(SrcIdx < 4);
5845       OperandIdx[SrcIdx] = Inst.size();
5846       Op.addRegOperands(Inst, 1);
5847       ++SrcIdx;
5848       continue;
5849     }
5850 
5851     if (Op.isOff()) {
5852       assert(SrcIdx < 4);
5853       OperandIdx[SrcIdx] = Inst.size();
5854       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5855       ++SrcIdx;
5856       continue;
5857     }
5858 
5859     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5860       Op.addImmOperands(Inst, 1);
5861       continue;
5862     }
5863 
5864     if (Op.isToken() && Op.getToken() == "done")
5865       continue;
5866 
5867     // Handle optional arguments
5868     OptionalIdx[Op.getImmTy()] = i;
5869   }
5870 
5871   assert(SrcIdx == 4);
5872 
5873   bool Compr = false;
5874   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5875     Compr = true;
5876     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5877     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5878     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5879   }
5880 
5881   for (auto i = 0; i < SrcIdx; ++i) {
5882     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5883       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5884     }
5885   }
5886 
5887   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5888   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5889 
5890   Inst.addOperand(MCOperand::createImm(EnMask));
5891 }
5892 
5893 //===----------------------------------------------------------------------===//
5894 // s_waitcnt
5895 //===----------------------------------------------------------------------===//
5896 
5897 static bool
5898 encodeCnt(
5899   const AMDGPU::IsaVersion ISA,
5900   int64_t &IntVal,
5901   int64_t CntVal,
5902   bool Saturate,
5903   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5904   unsigned (*decode)(const IsaVersion &Version, unsigned))
5905 {
5906   bool Failed = false;
5907 
5908   IntVal = encode(ISA, IntVal, CntVal);
5909   if (CntVal != decode(ISA, IntVal)) {
5910     if (Saturate) {
5911       IntVal = encode(ISA, IntVal, -1);
5912     } else {
5913       Failed = true;
5914     }
5915   }
5916   return Failed;
5917 }
5918 
5919 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5920 
5921   SMLoc CntLoc = getLoc();
5922   StringRef CntName = getTokenStr();
5923 
5924   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5925       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5926     return false;
5927 
5928   int64_t CntVal;
5929   SMLoc ValLoc = getLoc();
5930   if (!parseExpr(CntVal))
5931     return false;
5932 
5933   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5934 
5935   bool Failed = true;
5936   bool Sat = CntName.endswith("_sat");
5937 
5938   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5939     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5940   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5941     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5942   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5943     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5944   } else {
5945     Error(CntLoc, "invalid counter name " + CntName);
5946     return false;
5947   }
5948 
5949   if (Failed) {
5950     Error(ValLoc, "too large value for " + CntName);
5951     return false;
5952   }
5953 
5954   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5955     return false;
5956 
5957   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5958     if (isToken(AsmToken::EndOfStatement)) {
5959       Error(getLoc(), "expected a counter name");
5960       return false;
5961     }
5962   }
5963 
5964   return true;
5965 }
5966 
5967 OperandMatchResultTy
5968 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5969   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5970   int64_t Waitcnt = getWaitcntBitMask(ISA);
5971   SMLoc S = getLoc();
5972 
5973   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5974     while (!isToken(AsmToken::EndOfStatement)) {
5975       if (!parseCnt(Waitcnt))
5976         return MatchOperand_ParseFail;
5977     }
5978   } else {
5979     if (!parseExpr(Waitcnt))
5980       return MatchOperand_ParseFail;
5981   }
5982 
5983   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5984   return MatchOperand_Success;
5985 }
5986 
5987 bool
5988 AMDGPUOperand::isSWaitCnt() const {
5989   return isImm();
5990 }
5991 
5992 //===----------------------------------------------------------------------===//
5993 // hwreg
5994 //===----------------------------------------------------------------------===//
5995 
5996 bool
5997 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5998                                 OperandInfoTy &Offset,
5999                                 OperandInfoTy &Width) {
6000   using namespace llvm::AMDGPU::Hwreg;
6001 
6002   // The register may be specified by name or using a numeric code
6003   HwReg.Loc = getLoc();
6004   if (isToken(AsmToken::Identifier) &&
6005       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6006     HwReg.IsSymbolic = true;
6007     lex(); // skip register name
6008   } else if (!parseExpr(HwReg.Id, "a register name")) {
6009     return false;
6010   }
6011 
6012   if (trySkipToken(AsmToken::RParen))
6013     return true;
6014 
6015   // parse optional params
6016   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6017     return false;
6018 
6019   Offset.Loc = getLoc();
6020   if (!parseExpr(Offset.Id))
6021     return false;
6022 
6023   if (!skipToken(AsmToken::Comma, "expected a comma"))
6024     return false;
6025 
6026   Width.Loc = getLoc();
6027   return parseExpr(Width.Id) &&
6028          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6029 }
6030 
6031 bool
6032 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6033                                const OperandInfoTy &Offset,
6034                                const OperandInfoTy &Width) {
6035 
6036   using namespace llvm::AMDGPU::Hwreg;
6037 
6038   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6039     Error(HwReg.Loc,
6040           "specified hardware register is not supported on this GPU");
6041     return false;
6042   }
6043   if (!isValidHwreg(HwReg.Id)) {
6044     Error(HwReg.Loc,
6045           "invalid code of hardware register: only 6-bit values are legal");
6046     return false;
6047   }
6048   if (!isValidHwregOffset(Offset.Id)) {
6049     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6050     return false;
6051   }
6052   if (!isValidHwregWidth(Width.Id)) {
6053     Error(Width.Loc,
6054           "invalid bitfield width: only values from 1 to 32 are legal");
6055     return false;
6056   }
6057   return true;
6058 }
6059 
6060 OperandMatchResultTy
6061 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6062   using namespace llvm::AMDGPU::Hwreg;
6063 
6064   int64_t ImmVal = 0;
6065   SMLoc Loc = getLoc();
6066 
6067   if (trySkipId("hwreg", AsmToken::LParen)) {
6068     OperandInfoTy HwReg(ID_UNKNOWN_);
6069     OperandInfoTy Offset(OFFSET_DEFAULT_);
6070     OperandInfoTy Width(WIDTH_DEFAULT_);
6071     if (parseHwregBody(HwReg, Offset, Width) &&
6072         validateHwreg(HwReg, Offset, Width)) {
6073       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6074     } else {
6075       return MatchOperand_ParseFail;
6076     }
6077   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6078     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6079       Error(Loc, "invalid immediate: only 16-bit values are legal");
6080       return MatchOperand_ParseFail;
6081     }
6082   } else {
6083     return MatchOperand_ParseFail;
6084   }
6085 
6086   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6087   return MatchOperand_Success;
6088 }
6089 
6090 bool AMDGPUOperand::isHwreg() const {
6091   return isImmTy(ImmTyHwreg);
6092 }
6093 
6094 //===----------------------------------------------------------------------===//
6095 // sendmsg
6096 //===----------------------------------------------------------------------===//
6097 
6098 bool
6099 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6100                                   OperandInfoTy &Op,
6101                                   OperandInfoTy &Stream) {
6102   using namespace llvm::AMDGPU::SendMsg;
6103 
6104   Msg.Loc = getLoc();
6105   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6106     Msg.IsSymbolic = true;
6107     lex(); // skip message name
6108   } else if (!parseExpr(Msg.Id, "a message name")) {
6109     return false;
6110   }
6111 
6112   if (trySkipToken(AsmToken::Comma)) {
6113     Op.IsDefined = true;
6114     Op.Loc = getLoc();
6115     if (isToken(AsmToken::Identifier) &&
6116         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6117       lex(); // skip operation name
6118     } else if (!parseExpr(Op.Id, "an operation name")) {
6119       return false;
6120     }
6121 
6122     if (trySkipToken(AsmToken::Comma)) {
6123       Stream.IsDefined = true;
6124       Stream.Loc = getLoc();
6125       if (!parseExpr(Stream.Id))
6126         return false;
6127     }
6128   }
6129 
6130   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6131 }
6132 
6133 bool
6134 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6135                                  const OperandInfoTy &Op,
6136                                  const OperandInfoTy &Stream) {
6137   using namespace llvm::AMDGPU::SendMsg;
6138 
6139   // Validation strictness depends on whether message is specified
6140   // in a symbolc or in a numeric form. In the latter case
6141   // only encoding possibility is checked.
6142   bool Strict = Msg.IsSymbolic;
6143 
6144   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6145     Error(Msg.Loc, "invalid message id");
6146     return false;
6147   }
6148   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6149     if (Op.IsDefined) {
6150       Error(Op.Loc, "message does not support operations");
6151     } else {
6152       Error(Msg.Loc, "missing message operation");
6153     }
6154     return false;
6155   }
6156   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6157     Error(Op.Loc, "invalid operation id");
6158     return false;
6159   }
6160   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6161     Error(Stream.Loc, "message operation does not support streams");
6162     return false;
6163   }
6164   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6165     Error(Stream.Loc, "invalid message stream id");
6166     return false;
6167   }
6168   return true;
6169 }
6170 
6171 OperandMatchResultTy
6172 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6173   using namespace llvm::AMDGPU::SendMsg;
6174 
6175   int64_t ImmVal = 0;
6176   SMLoc Loc = getLoc();
6177 
6178   if (trySkipId("sendmsg", AsmToken::LParen)) {
6179     OperandInfoTy Msg(ID_UNKNOWN_);
6180     OperandInfoTy Op(OP_NONE_);
6181     OperandInfoTy Stream(STREAM_ID_NONE_);
6182     if (parseSendMsgBody(Msg, Op, Stream) &&
6183         validateSendMsg(Msg, Op, Stream)) {
6184       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6185     } else {
6186       return MatchOperand_ParseFail;
6187     }
6188   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6189     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6190       Error(Loc, "invalid immediate: only 16-bit values are legal");
6191       return MatchOperand_ParseFail;
6192     }
6193   } else {
6194     return MatchOperand_ParseFail;
6195   }
6196 
6197   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6198   return MatchOperand_Success;
6199 }
6200 
6201 bool AMDGPUOperand::isSendMsg() const {
6202   return isImmTy(ImmTySendMsg);
6203 }
6204 
6205 //===----------------------------------------------------------------------===//
6206 // v_interp
6207 //===----------------------------------------------------------------------===//
6208 
6209 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6210   StringRef Str;
6211   SMLoc S = getLoc();
6212 
6213   if (!parseId(Str))
6214     return MatchOperand_NoMatch;
6215 
6216   int Slot = StringSwitch<int>(Str)
6217     .Case("p10", 0)
6218     .Case("p20", 1)
6219     .Case("p0", 2)
6220     .Default(-1);
6221 
6222   if (Slot == -1) {
6223     Error(S, "invalid interpolation slot");
6224     return MatchOperand_ParseFail;
6225   }
6226 
6227   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6228                                               AMDGPUOperand::ImmTyInterpSlot));
6229   return MatchOperand_Success;
6230 }
6231 
6232 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6233   StringRef Str;
6234   SMLoc S = getLoc();
6235 
6236   if (!parseId(Str))
6237     return MatchOperand_NoMatch;
6238 
6239   if (!Str.startswith("attr")) {
6240     Error(S, "invalid interpolation attribute");
6241     return MatchOperand_ParseFail;
6242   }
6243 
6244   StringRef Chan = Str.take_back(2);
6245   int AttrChan = StringSwitch<int>(Chan)
6246     .Case(".x", 0)
6247     .Case(".y", 1)
6248     .Case(".z", 2)
6249     .Case(".w", 3)
6250     .Default(-1);
6251   if (AttrChan == -1) {
6252     Error(S, "invalid or missing interpolation attribute channel");
6253     return MatchOperand_ParseFail;
6254   }
6255 
6256   Str = Str.drop_back(2).drop_front(4);
6257 
6258   uint8_t Attr;
6259   if (Str.getAsInteger(10, Attr)) {
6260     Error(S, "invalid or missing interpolation attribute number");
6261     return MatchOperand_ParseFail;
6262   }
6263 
6264   if (Attr > 63) {
6265     Error(S, "out of bounds interpolation attribute number");
6266     return MatchOperand_ParseFail;
6267   }
6268 
6269   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6270 
6271   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6272                                               AMDGPUOperand::ImmTyInterpAttr));
6273   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6274                                               AMDGPUOperand::ImmTyAttrChan));
6275   return MatchOperand_Success;
6276 }
6277 
6278 //===----------------------------------------------------------------------===//
6279 // exp
6280 //===----------------------------------------------------------------------===//
6281 
6282 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6283   using namespace llvm::AMDGPU::Exp;
6284 
6285   StringRef Str;
6286   SMLoc S = getLoc();
6287 
6288   if (!parseId(Str))
6289     return MatchOperand_NoMatch;
6290 
6291   unsigned Id = getTgtId(Str);
6292   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6293     Error(S, (Id == ET_INVALID) ?
6294                 "invalid exp target" :
6295                 "exp target is not supported on this GPU");
6296     return MatchOperand_ParseFail;
6297   }
6298 
6299   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6300                                               AMDGPUOperand::ImmTyExpTgt));
6301   return MatchOperand_Success;
6302 }
6303 
6304 //===----------------------------------------------------------------------===//
6305 // parser helpers
6306 //===----------------------------------------------------------------------===//
6307 
6308 bool
6309 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6310   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6311 }
6312 
6313 bool
6314 AMDGPUAsmParser::isId(const StringRef Id) const {
6315   return isId(getToken(), Id);
6316 }
6317 
6318 bool
6319 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6320   return getTokenKind() == Kind;
6321 }
6322 
6323 bool
6324 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6325   if (isId(Id)) {
6326     lex();
6327     return true;
6328   }
6329   return false;
6330 }
6331 
6332 bool
6333 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6334   if (isToken(AsmToken::Identifier)) {
6335     StringRef Tok = getTokenStr();
6336     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6337       lex();
6338       return true;
6339     }
6340   }
6341   return false;
6342 }
6343 
6344 bool
6345 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6346   if (isId(Id) && peekToken().is(Kind)) {
6347     lex();
6348     lex();
6349     return true;
6350   }
6351   return false;
6352 }
6353 
6354 bool
6355 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6356   if (isToken(Kind)) {
6357     lex();
6358     return true;
6359   }
6360   return false;
6361 }
6362 
6363 bool
6364 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6365                            const StringRef ErrMsg) {
6366   if (!trySkipToken(Kind)) {
6367     Error(getLoc(), ErrMsg);
6368     return false;
6369   }
6370   return true;
6371 }
6372 
6373 bool
6374 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6375   SMLoc S = getLoc();
6376 
6377   const MCExpr *Expr;
6378   if (Parser.parseExpression(Expr))
6379     return false;
6380 
6381   if (Expr->evaluateAsAbsolute(Imm))
6382     return true;
6383 
6384   if (Expected.empty()) {
6385     Error(S, "expected absolute expression");
6386   } else {
6387     Error(S, Twine("expected ", Expected) +
6388              Twine(" or an absolute expression"));
6389   }
6390   return false;
6391 }
6392 
6393 bool
6394 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6395   SMLoc S = getLoc();
6396 
6397   const MCExpr *Expr;
6398   if (Parser.parseExpression(Expr))
6399     return false;
6400 
6401   int64_t IntVal;
6402   if (Expr->evaluateAsAbsolute(IntVal)) {
6403     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6404   } else {
6405     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6406   }
6407   return true;
6408 }
6409 
6410 bool
6411 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6412   if (isToken(AsmToken::String)) {
6413     Val = getToken().getStringContents();
6414     lex();
6415     return true;
6416   } else {
6417     Error(getLoc(), ErrMsg);
6418     return false;
6419   }
6420 }
6421 
6422 bool
6423 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6424   if (isToken(AsmToken::Identifier)) {
6425     Val = getTokenStr();
6426     lex();
6427     return true;
6428   } else {
6429     if (!ErrMsg.empty())
6430       Error(getLoc(), ErrMsg);
6431     return false;
6432   }
6433 }
6434 
6435 AsmToken
6436 AMDGPUAsmParser::getToken() const {
6437   return Parser.getTok();
6438 }
6439 
6440 AsmToken
6441 AMDGPUAsmParser::peekToken() {
6442   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6443 }
6444 
6445 void
6446 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6447   auto TokCount = getLexer().peekTokens(Tokens);
6448 
6449   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6450     Tokens[Idx] = AsmToken(AsmToken::Error, "");
6451 }
6452 
6453 AsmToken::TokenKind
6454 AMDGPUAsmParser::getTokenKind() const {
6455   return getLexer().getKind();
6456 }
6457 
6458 SMLoc
6459 AMDGPUAsmParser::getLoc() const {
6460   return getToken().getLoc();
6461 }
6462 
6463 StringRef
6464 AMDGPUAsmParser::getTokenStr() const {
6465   return getToken().getString();
6466 }
6467 
6468 void
6469 AMDGPUAsmParser::lex() {
6470   Parser.Lex();
6471 }
6472 
6473 SMLoc
6474 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6475                                const OperandVector &Operands) const {
6476   for (unsigned i = Operands.size() - 1; i > 0; --i) {
6477     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6478     if (Test(Op))
6479       return Op.getStartLoc();
6480   }
6481   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6482 }
6483 
6484 SMLoc
6485 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6486                            const OperandVector &Operands) const {
6487   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6488   return getOperandLoc(Test, Operands);
6489 }
6490 
6491 SMLoc
6492 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6493                            const OperandVector &Operands) const {
6494   auto Test = [=](const AMDGPUOperand& Op) {
6495     return Op.isRegKind() && Op.getReg() == Reg;
6496   };
6497   return getOperandLoc(Test, Operands);
6498 }
6499 
6500 SMLoc
6501 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6502   auto Test = [](const AMDGPUOperand& Op) {
6503     return Op.IsImmKindLiteral() || Op.isExpr();
6504   };
6505   return getOperandLoc(Test, Operands);
6506 }
6507 
6508 SMLoc
6509 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6510   auto Test = [](const AMDGPUOperand& Op) {
6511     return Op.isImmKindConst();
6512   };
6513   return getOperandLoc(Test, Operands);
6514 }
6515 
6516 //===----------------------------------------------------------------------===//
6517 // swizzle
6518 //===----------------------------------------------------------------------===//
6519 
6520 LLVM_READNONE
6521 static unsigned
6522 encodeBitmaskPerm(const unsigned AndMask,
6523                   const unsigned OrMask,
6524                   const unsigned XorMask) {
6525   using namespace llvm::AMDGPU::Swizzle;
6526 
6527   return BITMASK_PERM_ENC |
6528          (AndMask << BITMASK_AND_SHIFT) |
6529          (OrMask  << BITMASK_OR_SHIFT)  |
6530          (XorMask << BITMASK_XOR_SHIFT);
6531 }
6532 
6533 bool
6534 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6535                                      const unsigned MinVal,
6536                                      const unsigned MaxVal,
6537                                      const StringRef ErrMsg,
6538                                      SMLoc &Loc) {
6539   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6540     return false;
6541   }
6542   Loc = getLoc();
6543   if (!parseExpr(Op)) {
6544     return false;
6545   }
6546   if (Op < MinVal || Op > MaxVal) {
6547     Error(Loc, ErrMsg);
6548     return false;
6549   }
6550 
6551   return true;
6552 }
6553 
6554 bool
6555 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6556                                       const unsigned MinVal,
6557                                       const unsigned MaxVal,
6558                                       const StringRef ErrMsg) {
6559   SMLoc Loc;
6560   for (unsigned i = 0; i < OpNum; ++i) {
6561     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6562       return false;
6563   }
6564 
6565   return true;
6566 }
6567 
6568 bool
6569 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6570   using namespace llvm::AMDGPU::Swizzle;
6571 
6572   int64_t Lane[LANE_NUM];
6573   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6574                            "expected a 2-bit lane id")) {
6575     Imm = QUAD_PERM_ENC;
6576     for (unsigned I = 0; I < LANE_NUM; ++I) {
6577       Imm |= Lane[I] << (LANE_SHIFT * I);
6578     }
6579     return true;
6580   }
6581   return false;
6582 }
6583 
6584 bool
6585 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6586   using namespace llvm::AMDGPU::Swizzle;
6587 
6588   SMLoc Loc;
6589   int64_t GroupSize;
6590   int64_t LaneIdx;
6591 
6592   if (!parseSwizzleOperand(GroupSize,
6593                            2, 32,
6594                            "group size must be in the interval [2,32]",
6595                            Loc)) {
6596     return false;
6597   }
6598   if (!isPowerOf2_64(GroupSize)) {
6599     Error(Loc, "group size must be a power of two");
6600     return false;
6601   }
6602   if (parseSwizzleOperand(LaneIdx,
6603                           0, GroupSize - 1,
6604                           "lane id must be in the interval [0,group size - 1]",
6605                           Loc)) {
6606     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6607     return true;
6608   }
6609   return false;
6610 }
6611 
6612 bool
6613 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6614   using namespace llvm::AMDGPU::Swizzle;
6615 
6616   SMLoc Loc;
6617   int64_t GroupSize;
6618 
6619   if (!parseSwizzleOperand(GroupSize,
6620                            2, 32,
6621                            "group size must be in the interval [2,32]",
6622                            Loc)) {
6623     return false;
6624   }
6625   if (!isPowerOf2_64(GroupSize)) {
6626     Error(Loc, "group size must be a power of two");
6627     return false;
6628   }
6629 
6630   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6631   return true;
6632 }
6633 
6634 bool
6635 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6636   using namespace llvm::AMDGPU::Swizzle;
6637 
6638   SMLoc Loc;
6639   int64_t GroupSize;
6640 
6641   if (!parseSwizzleOperand(GroupSize,
6642                            1, 16,
6643                            "group size must be in the interval [1,16]",
6644                            Loc)) {
6645     return false;
6646   }
6647   if (!isPowerOf2_64(GroupSize)) {
6648     Error(Loc, "group size must be a power of two");
6649     return false;
6650   }
6651 
6652   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6653   return true;
6654 }
6655 
6656 bool
6657 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6658   using namespace llvm::AMDGPU::Swizzle;
6659 
6660   if (!skipToken(AsmToken::Comma, "expected a comma")) {
6661     return false;
6662   }
6663 
6664   StringRef Ctl;
6665   SMLoc StrLoc = getLoc();
6666   if (!parseString(Ctl)) {
6667     return false;
6668   }
6669   if (Ctl.size() != BITMASK_WIDTH) {
6670     Error(StrLoc, "expected a 5-character mask");
6671     return false;
6672   }
6673 
6674   unsigned AndMask = 0;
6675   unsigned OrMask = 0;
6676   unsigned XorMask = 0;
6677 
6678   for (size_t i = 0; i < Ctl.size(); ++i) {
6679     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6680     switch(Ctl[i]) {
6681     default:
6682       Error(StrLoc, "invalid mask");
6683       return false;
6684     case '0':
6685       break;
6686     case '1':
6687       OrMask |= Mask;
6688       break;
6689     case 'p':
6690       AndMask |= Mask;
6691       break;
6692     case 'i':
6693       AndMask |= Mask;
6694       XorMask |= Mask;
6695       break;
6696     }
6697   }
6698 
6699   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6700   return true;
6701 }
6702 
6703 bool
6704 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6705 
6706   SMLoc OffsetLoc = getLoc();
6707 
6708   if (!parseExpr(Imm, "a swizzle macro")) {
6709     return false;
6710   }
6711   if (!isUInt<16>(Imm)) {
6712     Error(OffsetLoc, "expected a 16-bit offset");
6713     return false;
6714   }
6715   return true;
6716 }
6717 
6718 bool
6719 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6720   using namespace llvm::AMDGPU::Swizzle;
6721 
6722   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6723 
6724     SMLoc ModeLoc = getLoc();
6725     bool Ok = false;
6726 
6727     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6728       Ok = parseSwizzleQuadPerm(Imm);
6729     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6730       Ok = parseSwizzleBitmaskPerm(Imm);
6731     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6732       Ok = parseSwizzleBroadcast(Imm);
6733     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6734       Ok = parseSwizzleSwap(Imm);
6735     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6736       Ok = parseSwizzleReverse(Imm);
6737     } else {
6738       Error(ModeLoc, "expected a swizzle mode");
6739     }
6740 
6741     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6742   }
6743 
6744   return false;
6745 }
6746 
6747 OperandMatchResultTy
6748 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6749   SMLoc S = getLoc();
6750   int64_t Imm = 0;
6751 
6752   if (trySkipId("offset")) {
6753 
6754     bool Ok = false;
6755     if (skipToken(AsmToken::Colon, "expected a colon")) {
6756       if (trySkipId("swizzle")) {
6757         Ok = parseSwizzleMacro(Imm);
6758       } else {
6759         Ok = parseSwizzleOffset(Imm);
6760       }
6761     }
6762 
6763     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6764 
6765     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6766   } else {
6767     // Swizzle "offset" operand is optional.
6768     // If it is omitted, try parsing other optional operands.
6769     return parseOptionalOpr(Operands);
6770   }
6771 }
6772 
6773 bool
6774 AMDGPUOperand::isSwizzle() const {
6775   return isImmTy(ImmTySwizzle);
6776 }
6777 
6778 //===----------------------------------------------------------------------===//
6779 // VGPR Index Mode
6780 //===----------------------------------------------------------------------===//
6781 
6782 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6783 
6784   using namespace llvm::AMDGPU::VGPRIndexMode;
6785 
6786   if (trySkipToken(AsmToken::RParen)) {
6787     return OFF;
6788   }
6789 
6790   int64_t Imm = 0;
6791 
6792   while (true) {
6793     unsigned Mode = 0;
6794     SMLoc S = getLoc();
6795 
6796     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6797       if (trySkipId(IdSymbolic[ModeId])) {
6798         Mode = 1 << ModeId;
6799         break;
6800       }
6801     }
6802 
6803     if (Mode == 0) {
6804       Error(S, (Imm == 0)?
6805                "expected a VGPR index mode or a closing parenthesis" :
6806                "expected a VGPR index mode");
6807       return UNDEF;
6808     }
6809 
6810     if (Imm & Mode) {
6811       Error(S, "duplicate VGPR index mode");
6812       return UNDEF;
6813     }
6814     Imm |= Mode;
6815 
6816     if (trySkipToken(AsmToken::RParen))
6817       break;
6818     if (!skipToken(AsmToken::Comma,
6819                    "expected a comma or a closing parenthesis"))
6820       return UNDEF;
6821   }
6822 
6823   return Imm;
6824 }
6825 
6826 OperandMatchResultTy
6827 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6828 
6829   using namespace llvm::AMDGPU::VGPRIndexMode;
6830 
6831   int64_t Imm = 0;
6832   SMLoc S = getLoc();
6833 
6834   if (trySkipId("gpr_idx", AsmToken::LParen)) {
6835     Imm = parseGPRIdxMacro();
6836     if (Imm == UNDEF)
6837       return MatchOperand_ParseFail;
6838   } else {
6839     if (getParser().parseAbsoluteExpression(Imm))
6840       return MatchOperand_ParseFail;
6841     if (Imm < 0 || !isUInt<4>(Imm)) {
6842       Error(S, "invalid immediate: only 4-bit values are legal");
6843       return MatchOperand_ParseFail;
6844     }
6845   }
6846 
6847   Operands.push_back(
6848       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6849   return MatchOperand_Success;
6850 }
6851 
6852 bool AMDGPUOperand::isGPRIdxMode() const {
6853   return isImmTy(ImmTyGprIdxMode);
6854 }
6855 
6856 //===----------------------------------------------------------------------===//
6857 // sopp branch targets
6858 //===----------------------------------------------------------------------===//
6859 
6860 OperandMatchResultTy
6861 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6862 
6863   // Make sure we are not parsing something
6864   // that looks like a label or an expression but is not.
6865   // This will improve error messages.
6866   if (isRegister() || isModifier())
6867     return MatchOperand_NoMatch;
6868 
6869   if (!parseExpr(Operands))
6870     return MatchOperand_ParseFail;
6871 
6872   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6873   assert(Opr.isImm() || Opr.isExpr());
6874   SMLoc Loc = Opr.getStartLoc();
6875 
6876   // Currently we do not support arbitrary expressions as branch targets.
6877   // Only labels and absolute expressions are accepted.
6878   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6879     Error(Loc, "expected an absolute expression or a label");
6880   } else if (Opr.isImm() && !Opr.isS16Imm()) {
6881     Error(Loc, "expected a 16-bit signed jump offset");
6882   }
6883 
6884   return MatchOperand_Success;
6885 }
6886 
6887 //===----------------------------------------------------------------------===//
6888 // Boolean holding registers
6889 //===----------------------------------------------------------------------===//
6890 
6891 OperandMatchResultTy
6892 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6893   return parseReg(Operands);
6894 }
6895 
6896 //===----------------------------------------------------------------------===//
6897 // mubuf
6898 //===----------------------------------------------------------------------===//
6899 
6900 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
6901   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
6902 }
6903 
6904 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6905                                    const OperandVector &Operands,
6906                                    bool IsAtomic,
6907                                    bool IsLds) {
6908   bool IsLdsOpcode = IsLds;
6909   bool HasLdsModifier = false;
6910   OptionalImmIndexMap OptionalIdx;
6911   unsigned FirstOperandIdx = 1;
6912   bool IsAtomicReturn = false;
6913 
6914   if (IsAtomic) {
6915     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6916       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6917       if (!Op.isCPol())
6918         continue;
6919       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
6920       break;
6921     }
6922 
6923     if (!IsAtomicReturn) {
6924       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
6925       if (NewOpc != -1)
6926         Inst.setOpcode(NewOpc);
6927     }
6928 
6929     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
6930                       SIInstrFlags::IsAtomicRet;
6931   }
6932 
6933   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6934     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6935 
6936     // Add the register arguments
6937     if (Op.isReg()) {
6938       Op.addRegOperands(Inst, 1);
6939       // Insert a tied src for atomic return dst.
6940       // This cannot be postponed as subsequent calls to
6941       // addImmOperands rely on correct number of MC operands.
6942       if (IsAtomicReturn && i == FirstOperandIdx)
6943         Op.addRegOperands(Inst, 1);
6944       continue;
6945     }
6946 
6947     // Handle the case where soffset is an immediate
6948     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6949       Op.addImmOperands(Inst, 1);
6950       continue;
6951     }
6952 
6953     HasLdsModifier |= Op.isLDS();
6954 
6955     // Handle tokens like 'offen' which are sometimes hard-coded into the
6956     // asm string.  There are no MCInst operands for these.
6957     if (Op.isToken()) {
6958       continue;
6959     }
6960     assert(Op.isImm());
6961 
6962     // Handle optional arguments
6963     OptionalIdx[Op.getImmTy()] = i;
6964   }
6965 
6966   // This is a workaround for an llvm quirk which may result in an
6967   // incorrect instruction selection. Lds and non-lds versions of
6968   // MUBUF instructions are identical except that lds versions
6969   // have mandatory 'lds' modifier. However this modifier follows
6970   // optional modifiers and llvm asm matcher regards this 'lds'
6971   // modifier as an optional one. As a result, an lds version
6972   // of opcode may be selected even if it has no 'lds' modifier.
6973   if (IsLdsOpcode && !HasLdsModifier) {
6974     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6975     if (NoLdsOpcode != -1) { // Got lds version - correct it.
6976       Inst.setOpcode(NoLdsOpcode);
6977       IsLdsOpcode = false;
6978     }
6979   }
6980 
6981   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6982   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
6983 
6984   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6985     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6986   }
6987   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
6988 }
6989 
6990 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6991   OptionalImmIndexMap OptionalIdx;
6992 
6993   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6994     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6995 
6996     // Add the register arguments
6997     if (Op.isReg()) {
6998       Op.addRegOperands(Inst, 1);
6999       continue;
7000     }
7001 
7002     // Handle the case where soffset is an immediate
7003     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7004       Op.addImmOperands(Inst, 1);
7005       continue;
7006     }
7007 
7008     // Handle tokens like 'offen' which are sometimes hard-coded into the
7009     // asm string.  There are no MCInst operands for these.
7010     if (Op.isToken()) {
7011       continue;
7012     }
7013     assert(Op.isImm());
7014 
7015     // Handle optional arguments
7016     OptionalIdx[Op.getImmTy()] = i;
7017   }
7018 
7019   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7020                         AMDGPUOperand::ImmTyOffset);
7021   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7022   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7023   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7024   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7025 }
7026 
7027 //===----------------------------------------------------------------------===//
7028 // mimg
7029 //===----------------------------------------------------------------------===//
7030 
7031 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7032                               bool IsAtomic) {
7033   unsigned I = 1;
7034   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7035   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7036     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7037   }
7038 
7039   if (IsAtomic) {
7040     // Add src, same as dst
7041     assert(Desc.getNumDefs() == 1);
7042     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7043   }
7044 
7045   OptionalImmIndexMap OptionalIdx;
7046 
7047   for (unsigned E = Operands.size(); I != E; ++I) {
7048     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7049 
7050     // Add the register arguments
7051     if (Op.isReg()) {
7052       Op.addRegOperands(Inst, 1);
7053     } else if (Op.isImmModifier()) {
7054       OptionalIdx[Op.getImmTy()] = I;
7055     } else if (!Op.isToken()) {
7056       llvm_unreachable("unexpected operand type");
7057     }
7058   }
7059 
7060   bool IsGFX10Plus = isGFX10Plus();
7061 
7062   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7063   if (IsGFX10Plus)
7064     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7065   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7066   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7067   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7068   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7069     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7070   if (IsGFX10Plus)
7071     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7072   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7073   if (!IsGFX10Plus)
7074     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7075   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7076 }
7077 
7078 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7079   cvtMIMG(Inst, Operands, true);
7080 }
7081 
7082 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7083   OptionalImmIndexMap OptionalIdx;
7084   bool IsAtomicReturn = false;
7085 
7086   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7087     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7088     if (!Op.isCPol())
7089       continue;
7090     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7091     break;
7092   }
7093 
7094   if (!IsAtomicReturn) {
7095     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7096     if (NewOpc != -1)
7097       Inst.setOpcode(NewOpc);
7098   }
7099 
7100   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7101                     SIInstrFlags::IsAtomicRet;
7102 
7103   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7104     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7105 
7106     // Add the register arguments
7107     if (Op.isReg()) {
7108       Op.addRegOperands(Inst, 1);
7109       if (IsAtomicReturn && i == 1)
7110         Op.addRegOperands(Inst, 1);
7111       continue;
7112     }
7113 
7114     // Handle the case where soffset is an immediate
7115     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7116       Op.addImmOperands(Inst, 1);
7117       continue;
7118     }
7119 
7120     // Handle tokens like 'offen' which are sometimes hard-coded into the
7121     // asm string.  There are no MCInst operands for these.
7122     if (Op.isToken()) {
7123       continue;
7124     }
7125     assert(Op.isImm());
7126 
7127     // Handle optional arguments
7128     OptionalIdx[Op.getImmTy()] = i;
7129   }
7130 
7131   if ((int)Inst.getNumOperands() <=
7132       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7133     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7134   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7135 }
7136 
7137 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7138                                       const OperandVector &Operands) {
7139   for (unsigned I = 1; I < Operands.size(); ++I) {
7140     auto &Operand = (AMDGPUOperand &)*Operands[I];
7141     if (Operand.isReg())
7142       Operand.addRegOperands(Inst, 1);
7143   }
7144 
7145   Inst.addOperand(MCOperand::createImm(1)); // a16
7146 }
7147 
7148 //===----------------------------------------------------------------------===//
7149 // smrd
7150 //===----------------------------------------------------------------------===//
7151 
7152 bool AMDGPUOperand::isSMRDOffset8() const {
7153   return isImm() && isUInt<8>(getImm());
7154 }
7155 
7156 bool AMDGPUOperand::isSMEMOffset() const {
7157   return isImm(); // Offset range is checked later by validator.
7158 }
7159 
7160 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7161   // 32-bit literals are only supported on CI and we only want to use them
7162   // when the offset is > 8-bits.
7163   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7164 }
7165 
7166 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7167   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7168 }
7169 
7170 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7171   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7172 }
7173 
7174 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7175   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7176 }
7177 
7178 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7179   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7180 }
7181 
7182 //===----------------------------------------------------------------------===//
7183 // vop3
7184 //===----------------------------------------------------------------------===//
7185 
7186 static bool ConvertOmodMul(int64_t &Mul) {
7187   if (Mul != 1 && Mul != 2 && Mul != 4)
7188     return false;
7189 
7190   Mul >>= 1;
7191   return true;
7192 }
7193 
7194 static bool ConvertOmodDiv(int64_t &Div) {
7195   if (Div == 1) {
7196     Div = 0;
7197     return true;
7198   }
7199 
7200   if (Div == 2) {
7201     Div = 3;
7202     return true;
7203   }
7204 
7205   return false;
7206 }
7207 
7208 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7209 // This is intentional and ensures compatibility with sp3.
7210 // See bug 35397 for details.
7211 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7212   if (BoundCtrl == 0 || BoundCtrl == 1) {
7213     BoundCtrl = 1;
7214     return true;
7215   }
7216   return false;
7217 }
7218 
7219 // Note: the order in this table matches the order of operands in AsmString.
7220 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7221   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7222   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7223   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7224   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7225   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7226   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7227   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7228   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7229   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7230   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7231   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7232   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7233   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7234   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7235   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7236   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7237   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7238   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7239   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7240   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7241   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7242   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7243   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7244   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7245   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7246   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7247   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7248   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7249   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7250   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7251   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7252   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7253   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7254   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7255   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7256   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7257   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7258   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7259   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7260   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7261   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7262 };
7263 
7264 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7265 
7266   OperandMatchResultTy res = parseOptionalOpr(Operands);
7267 
7268   // This is a hack to enable hardcoded mandatory operands which follow
7269   // optional operands.
7270   //
7271   // Current design assumes that all operands after the first optional operand
7272   // are also optional. However implementation of some instructions violates
7273   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7274   //
7275   // To alleviate this problem, we have to (implicitly) parse extra operands
7276   // to make sure autogenerated parser of custom operands never hit hardcoded
7277   // mandatory operands.
7278 
7279   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7280     if (res != MatchOperand_Success ||
7281         isToken(AsmToken::EndOfStatement))
7282       break;
7283 
7284     trySkipToken(AsmToken::Comma);
7285     res = parseOptionalOpr(Operands);
7286   }
7287 
7288   return res;
7289 }
7290 
7291 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7292   OperandMatchResultTy res;
7293   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7294     // try to parse any optional operand here
7295     if (Op.IsBit) {
7296       res = parseNamedBit(Op.Name, Operands, Op.Type);
7297     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7298       res = parseOModOperand(Operands);
7299     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7300                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7301                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7302       res = parseSDWASel(Operands, Op.Name, Op.Type);
7303     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7304       res = parseSDWADstUnused(Operands);
7305     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7306                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7307                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7308                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7309       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7310                                         Op.ConvertResult);
7311     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7312       res = parseDim(Operands);
7313     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7314       res = parseCPol(Operands);
7315     } else {
7316       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7317     }
7318     if (res != MatchOperand_NoMatch) {
7319       return res;
7320     }
7321   }
7322   return MatchOperand_NoMatch;
7323 }
7324 
7325 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7326   StringRef Name = getTokenStr();
7327   if (Name == "mul") {
7328     return parseIntWithPrefix("mul", Operands,
7329                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7330   }
7331 
7332   if (Name == "div") {
7333     return parseIntWithPrefix("div", Operands,
7334                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7335   }
7336 
7337   return MatchOperand_NoMatch;
7338 }
7339 
7340 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7341   cvtVOP3P(Inst, Operands);
7342 
7343   int Opc = Inst.getOpcode();
7344 
7345   int SrcNum;
7346   const int Ops[] = { AMDGPU::OpName::src0,
7347                       AMDGPU::OpName::src1,
7348                       AMDGPU::OpName::src2 };
7349   for (SrcNum = 0;
7350        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7351        ++SrcNum);
7352   assert(SrcNum > 0);
7353 
7354   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7355   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7356 
7357   if ((OpSel & (1 << SrcNum)) != 0) {
7358     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7359     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7360     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7361   }
7362 }
7363 
7364 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7365       // 1. This operand is input modifiers
7366   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7367       // 2. This is not last operand
7368       && Desc.NumOperands > (OpNum + 1)
7369       // 3. Next operand is register class
7370       && Desc.OpInfo[OpNum + 1].RegClass != -1
7371       // 4. Next register is not tied to any other operand
7372       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7373 }
7374 
7375 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7376 {
7377   OptionalImmIndexMap OptionalIdx;
7378   unsigned Opc = Inst.getOpcode();
7379 
7380   unsigned I = 1;
7381   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7382   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7383     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7384   }
7385 
7386   for (unsigned E = Operands.size(); I != E; ++I) {
7387     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7388     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7389       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7390     } else if (Op.isInterpSlot() ||
7391                Op.isInterpAttr() ||
7392                Op.isAttrChan()) {
7393       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7394     } else if (Op.isImmModifier()) {
7395       OptionalIdx[Op.getImmTy()] = I;
7396     } else {
7397       llvm_unreachable("unhandled operand type");
7398     }
7399   }
7400 
7401   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7402     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7403   }
7404 
7405   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7406     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7407   }
7408 
7409   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7410     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7411   }
7412 }
7413 
7414 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7415                               OptionalImmIndexMap &OptionalIdx) {
7416   unsigned Opc = Inst.getOpcode();
7417 
7418   unsigned I = 1;
7419   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7420   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7421     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7422   }
7423 
7424   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7425     // This instruction has src modifiers
7426     for (unsigned E = Operands.size(); I != E; ++I) {
7427       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7428       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7429         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7430       } else if (Op.isImmModifier()) {
7431         OptionalIdx[Op.getImmTy()] = I;
7432       } else if (Op.isRegOrImm()) {
7433         Op.addRegOrImmOperands(Inst, 1);
7434       } else {
7435         llvm_unreachable("unhandled operand type");
7436       }
7437     }
7438   } else {
7439     // No src modifiers
7440     for (unsigned E = Operands.size(); I != E; ++I) {
7441       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7442       if (Op.isMod()) {
7443         OptionalIdx[Op.getImmTy()] = I;
7444       } else {
7445         Op.addRegOrImmOperands(Inst, 1);
7446       }
7447     }
7448   }
7449 
7450   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7451     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7452   }
7453 
7454   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7455     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7456   }
7457 
7458   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7459   // it has src2 register operand that is tied to dst operand
7460   // we don't allow modifiers for this operand in assembler so src2_modifiers
7461   // should be 0.
7462   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7463       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7464       Opc == AMDGPU::V_MAC_F32_e64_vi ||
7465       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7466       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7467       Opc == AMDGPU::V_MAC_F16_e64_vi ||
7468       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7469       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7470       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7471       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7472       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7473     auto it = Inst.begin();
7474     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7475     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7476     ++it;
7477     // Copy the operand to ensure it's not invalidated when Inst grows.
7478     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7479   }
7480 }
7481 
7482 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7483   OptionalImmIndexMap OptionalIdx;
7484   cvtVOP3(Inst, Operands, OptionalIdx);
7485 }
7486 
7487 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
7488                                const OperandVector &Operands) {
7489   OptionalImmIndexMap OptIdx;
7490   const int Opc = Inst.getOpcode();
7491   const MCInstrDesc &Desc = MII.get(Opc);
7492 
7493   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7494 
7495   cvtVOP3(Inst, Operands, OptIdx);
7496 
7497   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7498     assert(!IsPacked);
7499     Inst.addOperand(Inst.getOperand(0));
7500   }
7501 
7502   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7503   // instruction, and then figure out where to actually put the modifiers
7504 
7505   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7506 
7507   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7508   if (OpSelHiIdx != -1) {
7509     int DefaultVal = IsPacked ? -1 : 0;
7510     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7511                           DefaultVal);
7512   }
7513 
7514   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7515   if (NegLoIdx != -1) {
7516     assert(IsPacked);
7517     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7518     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7519   }
7520 
7521   const int Ops[] = { AMDGPU::OpName::src0,
7522                       AMDGPU::OpName::src1,
7523                       AMDGPU::OpName::src2 };
7524   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7525                          AMDGPU::OpName::src1_modifiers,
7526                          AMDGPU::OpName::src2_modifiers };
7527 
7528   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7529 
7530   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7531   unsigned OpSelHi = 0;
7532   unsigned NegLo = 0;
7533   unsigned NegHi = 0;
7534 
7535   if (OpSelHiIdx != -1) {
7536     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7537   }
7538 
7539   if (NegLoIdx != -1) {
7540     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7541     NegLo = Inst.getOperand(NegLoIdx).getImm();
7542     NegHi = Inst.getOperand(NegHiIdx).getImm();
7543   }
7544 
7545   for (int J = 0; J < 3; ++J) {
7546     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7547     if (OpIdx == -1)
7548       break;
7549 
7550     uint32_t ModVal = 0;
7551 
7552     if ((OpSel & (1 << J)) != 0)
7553       ModVal |= SISrcMods::OP_SEL_0;
7554 
7555     if ((OpSelHi & (1 << J)) != 0)
7556       ModVal |= SISrcMods::OP_SEL_1;
7557 
7558     if ((NegLo & (1 << J)) != 0)
7559       ModVal |= SISrcMods::NEG;
7560 
7561     if ((NegHi & (1 << J)) != 0)
7562       ModVal |= SISrcMods::NEG_HI;
7563 
7564     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7565 
7566     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7567   }
7568 }
7569 
7570 //===----------------------------------------------------------------------===//
7571 // dpp
7572 //===----------------------------------------------------------------------===//
7573 
7574 bool AMDGPUOperand::isDPP8() const {
7575   return isImmTy(ImmTyDPP8);
7576 }
7577 
7578 bool AMDGPUOperand::isDPPCtrl() const {
7579   using namespace AMDGPU::DPP;
7580 
7581   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7582   if (result) {
7583     int64_t Imm = getImm();
7584     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7585            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7586            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7587            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7588            (Imm == DppCtrl::WAVE_SHL1) ||
7589            (Imm == DppCtrl::WAVE_ROL1) ||
7590            (Imm == DppCtrl::WAVE_SHR1) ||
7591            (Imm == DppCtrl::WAVE_ROR1) ||
7592            (Imm == DppCtrl::ROW_MIRROR) ||
7593            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7594            (Imm == DppCtrl::BCAST15) ||
7595            (Imm == DppCtrl::BCAST31) ||
7596            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7597            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7598   }
7599   return false;
7600 }
7601 
7602 //===----------------------------------------------------------------------===//
7603 // mAI
7604 //===----------------------------------------------------------------------===//
7605 
7606 bool AMDGPUOperand::isBLGP() const {
7607   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7608 }
7609 
7610 bool AMDGPUOperand::isCBSZ() const {
7611   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7612 }
7613 
7614 bool AMDGPUOperand::isABID() const {
7615   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7616 }
7617 
7618 bool AMDGPUOperand::isS16Imm() const {
7619   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7620 }
7621 
7622 bool AMDGPUOperand::isU16Imm() const {
7623   return isImm() && isUInt<16>(getImm());
7624 }
7625 
7626 //===----------------------------------------------------------------------===//
7627 // dim
7628 //===----------------------------------------------------------------------===//
7629 
7630 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7631   // We want to allow "dim:1D" etc.,
7632   // but the initial 1 is tokenized as an integer.
7633   std::string Token;
7634   if (isToken(AsmToken::Integer)) {
7635     SMLoc Loc = getToken().getEndLoc();
7636     Token = std::string(getTokenStr());
7637     lex();
7638     if (getLoc() != Loc)
7639       return false;
7640   }
7641 
7642   StringRef Suffix;
7643   if (!parseId(Suffix))
7644     return false;
7645   Token += Suffix;
7646 
7647   StringRef DimId = Token;
7648   if (DimId.startswith("SQ_RSRC_IMG_"))
7649     DimId = DimId.drop_front(12);
7650 
7651   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7652   if (!DimInfo)
7653     return false;
7654 
7655   Encoding = DimInfo->Encoding;
7656   return true;
7657 }
7658 
7659 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7660   if (!isGFX10Plus())
7661     return MatchOperand_NoMatch;
7662 
7663   SMLoc S = getLoc();
7664 
7665   if (!trySkipId("dim", AsmToken::Colon))
7666     return MatchOperand_NoMatch;
7667 
7668   unsigned Encoding;
7669   SMLoc Loc = getLoc();
7670   if (!parseDimId(Encoding)) {
7671     Error(Loc, "invalid dim value");
7672     return MatchOperand_ParseFail;
7673   }
7674 
7675   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7676                                               AMDGPUOperand::ImmTyDim));
7677   return MatchOperand_Success;
7678 }
7679 
7680 //===----------------------------------------------------------------------===//
7681 // dpp
7682 //===----------------------------------------------------------------------===//
7683 
7684 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7685   SMLoc S = getLoc();
7686 
7687   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7688     return MatchOperand_NoMatch;
7689 
7690   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7691 
7692   int64_t Sels[8];
7693 
7694   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7695     return MatchOperand_ParseFail;
7696 
7697   for (size_t i = 0; i < 8; ++i) {
7698     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7699       return MatchOperand_ParseFail;
7700 
7701     SMLoc Loc = getLoc();
7702     if (getParser().parseAbsoluteExpression(Sels[i]))
7703       return MatchOperand_ParseFail;
7704     if (0 > Sels[i] || 7 < Sels[i]) {
7705       Error(Loc, "expected a 3-bit value");
7706       return MatchOperand_ParseFail;
7707     }
7708   }
7709 
7710   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7711     return MatchOperand_ParseFail;
7712 
7713   unsigned DPP8 = 0;
7714   for (size_t i = 0; i < 8; ++i)
7715     DPP8 |= (Sels[i] << (i * 3));
7716 
7717   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7718   return MatchOperand_Success;
7719 }
7720 
7721 bool
7722 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7723                                     const OperandVector &Operands) {
7724   if (Ctrl == "row_newbcast")
7725       return isGFX90A();
7726 
7727   // DPP64 is supported for row_newbcast only.
7728   const MCRegisterInfo *MRI = getMRI();
7729   if (Operands.size() > 2 && Operands[1]->isReg() &&
7730       MRI->getSubReg(Operands[1]->getReg(), AMDGPU::sub1))
7731     return false;
7732 
7733   if (Ctrl == "row_share" ||
7734       Ctrl == "row_xmask")
7735     return isGFX10Plus();
7736 
7737   if (Ctrl == "wave_shl" ||
7738       Ctrl == "wave_shr" ||
7739       Ctrl == "wave_rol" ||
7740       Ctrl == "wave_ror" ||
7741       Ctrl == "row_bcast")
7742     return isVI() || isGFX9();
7743 
7744   return Ctrl == "row_mirror" ||
7745          Ctrl == "row_half_mirror" ||
7746          Ctrl == "quad_perm" ||
7747          Ctrl == "row_shl" ||
7748          Ctrl == "row_shr" ||
7749          Ctrl == "row_ror";
7750 }
7751 
7752 int64_t
7753 AMDGPUAsmParser::parseDPPCtrlPerm() {
7754   // quad_perm:[%d,%d,%d,%d]
7755 
7756   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7757     return -1;
7758 
7759   int64_t Val = 0;
7760   for (int i = 0; i < 4; ++i) {
7761     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7762       return -1;
7763 
7764     int64_t Temp;
7765     SMLoc Loc = getLoc();
7766     if (getParser().parseAbsoluteExpression(Temp))
7767       return -1;
7768     if (Temp < 0 || Temp > 3) {
7769       Error(Loc, "expected a 2-bit value");
7770       return -1;
7771     }
7772 
7773     Val += (Temp << i * 2);
7774   }
7775 
7776   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7777     return -1;
7778 
7779   return Val;
7780 }
7781 
7782 int64_t
7783 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7784   using namespace AMDGPU::DPP;
7785 
7786   // sel:%d
7787 
7788   int64_t Val;
7789   SMLoc Loc = getLoc();
7790 
7791   if (getParser().parseAbsoluteExpression(Val))
7792     return -1;
7793 
7794   struct DppCtrlCheck {
7795     int64_t Ctrl;
7796     int Lo;
7797     int Hi;
7798   };
7799 
7800   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7801     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7802     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7803     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7804     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7805     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7806     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7807     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7808     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7809     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7810     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7811     .Default({-1, 0, 0});
7812 
7813   bool Valid;
7814   if (Check.Ctrl == -1) {
7815     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7816     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7817   } else {
7818     Valid = Check.Lo <= Val && Val <= Check.Hi;
7819     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7820   }
7821 
7822   if (!Valid) {
7823     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7824     return -1;
7825   }
7826 
7827   return Val;
7828 }
7829 
7830 OperandMatchResultTy
7831 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7832   using namespace AMDGPU::DPP;
7833 
7834   if (!isToken(AsmToken::Identifier) ||
7835       !isSupportedDPPCtrl(getTokenStr(), Operands))
7836     return MatchOperand_NoMatch;
7837 
7838   SMLoc S = getLoc();
7839   int64_t Val = -1;
7840   StringRef Ctrl;
7841 
7842   parseId(Ctrl);
7843 
7844   if (Ctrl == "row_mirror") {
7845     Val = DppCtrl::ROW_MIRROR;
7846   } else if (Ctrl == "row_half_mirror") {
7847     Val = DppCtrl::ROW_HALF_MIRROR;
7848   } else {
7849     if (skipToken(AsmToken::Colon, "expected a colon")) {
7850       if (Ctrl == "quad_perm") {
7851         Val = parseDPPCtrlPerm();
7852       } else {
7853         Val = parseDPPCtrlSel(Ctrl);
7854       }
7855     }
7856   }
7857 
7858   if (Val == -1)
7859     return MatchOperand_ParseFail;
7860 
7861   Operands.push_back(
7862     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7863   return MatchOperand_Success;
7864 }
7865 
7866 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7867   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7868 }
7869 
7870 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7871   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7872 }
7873 
7874 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7875   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7876 }
7877 
7878 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7879   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7880 }
7881 
7882 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7883   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7884 }
7885 
7886 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7887   OptionalImmIndexMap OptionalIdx;
7888 
7889   unsigned I = 1;
7890   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7891   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7892     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7893   }
7894 
7895   int Fi = 0;
7896   for (unsigned E = Operands.size(); I != E; ++I) {
7897     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7898                                             MCOI::TIED_TO);
7899     if (TiedTo != -1) {
7900       assert((unsigned)TiedTo < Inst.getNumOperands());
7901       // handle tied old or src2 for MAC instructions
7902       Inst.addOperand(Inst.getOperand(TiedTo));
7903     }
7904     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7905     // Add the register arguments
7906     if (Op.isReg() && validateVccOperand(Op.getReg())) {
7907       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7908       // Skip it.
7909       continue;
7910     }
7911 
7912     if (IsDPP8) {
7913       if (Op.isDPP8()) {
7914         Op.addImmOperands(Inst, 1);
7915       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7916         Op.addRegWithFPInputModsOperands(Inst, 2);
7917       } else if (Op.isFI()) {
7918         Fi = Op.getImm();
7919       } else if (Op.isReg()) {
7920         Op.addRegOperands(Inst, 1);
7921       } else {
7922         llvm_unreachable("Invalid operand type");
7923       }
7924     } else {
7925       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7926         Op.addRegWithFPInputModsOperands(Inst, 2);
7927       } else if (Op.isDPPCtrl()) {
7928         Op.addImmOperands(Inst, 1);
7929       } else if (Op.isImm()) {
7930         // Handle optional arguments
7931         OptionalIdx[Op.getImmTy()] = I;
7932       } else {
7933         llvm_unreachable("Invalid operand type");
7934       }
7935     }
7936   }
7937 
7938   if (IsDPP8) {
7939     using namespace llvm::AMDGPU::DPP;
7940     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7941   } else {
7942     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7943     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7944     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7945     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7946       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7947     }
7948   }
7949 }
7950 
7951 //===----------------------------------------------------------------------===//
7952 // sdwa
7953 //===----------------------------------------------------------------------===//
7954 
7955 OperandMatchResultTy
7956 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7957                               AMDGPUOperand::ImmTy Type) {
7958   using namespace llvm::AMDGPU::SDWA;
7959 
7960   SMLoc S = getLoc();
7961   StringRef Value;
7962   OperandMatchResultTy res;
7963 
7964   SMLoc StringLoc;
7965   res = parseStringWithPrefix(Prefix, Value, StringLoc);
7966   if (res != MatchOperand_Success) {
7967     return res;
7968   }
7969 
7970   int64_t Int;
7971   Int = StringSwitch<int64_t>(Value)
7972         .Case("BYTE_0", SdwaSel::BYTE_0)
7973         .Case("BYTE_1", SdwaSel::BYTE_1)
7974         .Case("BYTE_2", SdwaSel::BYTE_2)
7975         .Case("BYTE_3", SdwaSel::BYTE_3)
7976         .Case("WORD_0", SdwaSel::WORD_0)
7977         .Case("WORD_1", SdwaSel::WORD_1)
7978         .Case("DWORD", SdwaSel::DWORD)
7979         .Default(0xffffffff);
7980 
7981   if (Int == 0xffffffff) {
7982     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
7983     return MatchOperand_ParseFail;
7984   }
7985 
7986   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7987   return MatchOperand_Success;
7988 }
7989 
7990 OperandMatchResultTy
7991 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7992   using namespace llvm::AMDGPU::SDWA;
7993 
7994   SMLoc S = getLoc();
7995   StringRef Value;
7996   OperandMatchResultTy res;
7997 
7998   SMLoc StringLoc;
7999   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8000   if (res != MatchOperand_Success) {
8001     return res;
8002   }
8003 
8004   int64_t Int;
8005   Int = StringSwitch<int64_t>(Value)
8006         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8007         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8008         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8009         .Default(0xffffffff);
8010 
8011   if (Int == 0xffffffff) {
8012     Error(StringLoc, "invalid dst_unused value");
8013     return MatchOperand_ParseFail;
8014   }
8015 
8016   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8017   return MatchOperand_Success;
8018 }
8019 
8020 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8021   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8022 }
8023 
8024 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8025   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8026 }
8027 
8028 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8029   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8030 }
8031 
8032 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8033   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8034 }
8035 
8036 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8037   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8038 }
8039 
8040 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8041                               uint64_t BasicInstType,
8042                               bool SkipDstVcc,
8043                               bool SkipSrcVcc) {
8044   using namespace llvm::AMDGPU::SDWA;
8045 
8046   OptionalImmIndexMap OptionalIdx;
8047   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8048   bool SkippedVcc = false;
8049 
8050   unsigned I = 1;
8051   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8052   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8053     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8054   }
8055 
8056   for (unsigned E = Operands.size(); I != E; ++I) {
8057     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8058     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8059         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8060       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8061       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8062       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8063       // Skip VCC only if we didn't skip it on previous iteration.
8064       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8065       if (BasicInstType == SIInstrFlags::VOP2 &&
8066           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8067            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8068         SkippedVcc = true;
8069         continue;
8070       } else if (BasicInstType == SIInstrFlags::VOPC &&
8071                  Inst.getNumOperands() == 0) {
8072         SkippedVcc = true;
8073         continue;
8074       }
8075     }
8076     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8077       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8078     } else if (Op.isImm()) {
8079       // Handle optional arguments
8080       OptionalIdx[Op.getImmTy()] = I;
8081     } else {
8082       llvm_unreachable("Invalid operand type");
8083     }
8084     SkippedVcc = false;
8085   }
8086 
8087   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8088       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8089       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8090     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8091     switch (BasicInstType) {
8092     case SIInstrFlags::VOP1:
8093       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8094       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8095         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8096       }
8097       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8098       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8099       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8100       break;
8101 
8102     case SIInstrFlags::VOP2:
8103       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8104       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8105         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8106       }
8107       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8108       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8109       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8110       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8111       break;
8112 
8113     case SIInstrFlags::VOPC:
8114       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8115         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8116       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8117       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8118       break;
8119 
8120     default:
8121       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8122     }
8123   }
8124 
8125   // special case v_mac_{f16, f32}:
8126   // it has src2 register operand that is tied to dst operand
8127   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8128       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8129     auto it = Inst.begin();
8130     std::advance(
8131       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8132     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8133   }
8134 }
8135 
8136 //===----------------------------------------------------------------------===//
8137 // mAI
8138 //===----------------------------------------------------------------------===//
8139 
8140 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8141   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8142 }
8143 
8144 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8145   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8146 }
8147 
8148 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8149   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8150 }
8151 
8152 /// Force static initialization.
8153 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8154   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8155   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8156 }
8157 
8158 #define GET_REGISTER_MATCHER
8159 #define GET_MATCHER_IMPLEMENTATION
8160 #define GET_MNEMONIC_SPELL_CHECKER
8161 #define GET_MNEMONIC_CHECKER
8162 #include "AMDGPUGenAsmMatcher.inc"
8163 
8164 // This fuction should be defined after auto-generated include so that we have
8165 // MatchClassKind enum defined
8166 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8167                                                      unsigned Kind) {
8168   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8169   // But MatchInstructionImpl() expects to meet token and fails to validate
8170   // operand. This method checks if we are given immediate operand but expect to
8171   // get corresponding token.
8172   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8173   switch (Kind) {
8174   case MCK_addr64:
8175     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8176   case MCK_gds:
8177     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8178   case MCK_lds:
8179     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8180   case MCK_idxen:
8181     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8182   case MCK_offen:
8183     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8184   case MCK_SSrcB32:
8185     // When operands have expression values, they will return true for isToken,
8186     // because it is not possible to distinguish between a token and an
8187     // expression at parse time. MatchInstructionImpl() will always try to
8188     // match an operand as a token, when isToken returns true, and when the
8189     // name of the expression is not a valid token, the match will fail,
8190     // so we need to handle it here.
8191     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8192   case MCK_SSrcF32:
8193     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8194   case MCK_SoppBrTarget:
8195     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8196   case MCK_VReg32OrOff:
8197     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8198   case MCK_InterpSlot:
8199     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8200   case MCK_Attr:
8201     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8202   case MCK_AttrChan:
8203     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8204   case MCK_ImmSMEMOffset:
8205     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8206   case MCK_SReg_64:
8207   case MCK_SReg_64_XEXEC:
8208     // Null is defined as a 32-bit register but
8209     // it should also be enabled with 64-bit operands.
8210     // The following code enables it for SReg_64 operands
8211     // used as source and destination. Remaining source
8212     // operands are handled in isInlinableImm.
8213     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8214   default:
8215     return Match_InvalidOperand;
8216   }
8217 }
8218 
8219 //===----------------------------------------------------------------------===//
8220 // endpgm
8221 //===----------------------------------------------------------------------===//
8222 
8223 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8224   SMLoc S = getLoc();
8225   int64_t Imm = 0;
8226 
8227   if (!parseExpr(Imm)) {
8228     // The operand is optional, if not present default to 0
8229     Imm = 0;
8230   }
8231 
8232   if (!isUInt<16>(Imm)) {
8233     Error(S, "expected a 16-bit value");
8234     return MatchOperand_ParseFail;
8235   }
8236 
8237   Operands.push_back(
8238       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8239   return MatchOperand_Success;
8240 }
8241 
8242 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8243