1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTyTFE,
147     ImmTyD16,
148     ImmTyClampSI,
149     ImmTyOModSI,
150     ImmTyDPP8,
151     ImmTyDppCtrl,
152     ImmTyDppRowMask,
153     ImmTyDppBankMask,
154     ImmTyDppBoundCtrl,
155     ImmTyDppFi,
156     ImmTySdwaDstSel,
157     ImmTySdwaSrc0Sel,
158     ImmTySdwaSrc1Sel,
159     ImmTySdwaDstUnused,
160     ImmTyDMask,
161     ImmTyDim,
162     ImmTyUNorm,
163     ImmTyDA,
164     ImmTyR128A16,
165     ImmTyLWE,
166     ImmTyExpTgt,
167     ImmTyExpCompr,
168     ImmTyExpVM,
169     ImmTyFORMAT,
170     ImmTyHwreg,
171     ImmTyOff,
172     ImmTySendMsg,
173     ImmTyInterpSlot,
174     ImmTyInterpAttr,
175     ImmTyAttrChan,
176     ImmTyOpSel,
177     ImmTyOpSelHi,
178     ImmTyNegLo,
179     ImmTyNegHi,
180     ImmTySwizzle,
181     ImmTyGprIdxMode,
182     ImmTyEndpgm,
183     ImmTyHigh
184   };
185 
186 private:
187   struct TokOp {
188     const char *Data;
189     unsigned Length;
190   };
191 
192   struct ImmOp {
193     int64_t Val;
194     ImmTy Type;
195     bool IsFPImm;
196     Modifiers Mods;
197   };
198 
199   struct RegOp {
200     unsigned RegNo;
201     Modifiers Mods;
202   };
203 
204   union {
205     TokOp Tok;
206     ImmOp Imm;
207     RegOp Reg;
208     const MCExpr *Expr;
209   };
210 
211 public:
212   bool isToken() const override {
213     if (Kind == Token)
214       return true;
215 
216     if (Kind != Expression || !Expr)
217       return false;
218 
219     // When parsing operands, we can't always tell if something was meant to be
220     // a token, like 'gds', or an expression that references a global variable.
221     // In this case, we assume the string is an expression, and if we need to
222     // interpret is a token, then we treat the symbol name as the token.
223     return isa<MCSymbolRefExpr>(Expr);
224   }
225 
226   bool isImm() const override {
227     return Kind == Immediate;
228   }
229 
230   bool isInlinableImm(MVT type) const;
231   bool isLiteralImm(MVT type) const;
232 
233   bool isRegKind() const {
234     return Kind == Register;
235   }
236 
237   bool isReg() const override {
238     return isRegKind() && !hasModifiers();
239   }
240 
241   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
242     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
243   }
244 
245   bool isRegOrImmWithInt16InputMods() const {
246     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
247   }
248 
249   bool isRegOrImmWithInt32InputMods() const {
250     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
251   }
252 
253   bool isRegOrImmWithInt64InputMods() const {
254     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
255   }
256 
257   bool isRegOrImmWithFP16InputMods() const {
258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
259   }
260 
261   bool isRegOrImmWithFP32InputMods() const {
262     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
263   }
264 
265   bool isRegOrImmWithFP64InputMods() const {
266     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
267   }
268 
269   bool isVReg() const {
270     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
271            isRegClass(AMDGPU::VReg_64RegClassID) ||
272            isRegClass(AMDGPU::VReg_96RegClassID) ||
273            isRegClass(AMDGPU::VReg_128RegClassID) ||
274            isRegClass(AMDGPU::VReg_256RegClassID) ||
275            isRegClass(AMDGPU::VReg_512RegClassID);
276   }
277 
278   bool isVReg32() const {
279     return isRegClass(AMDGPU::VGPR_32RegClassID);
280   }
281 
282   bool isVReg32OrOff() const {
283     return isOff() || isVReg32();
284   }
285 
286   bool isSDWAOperand(MVT type) const;
287   bool isSDWAFP16Operand() const;
288   bool isSDWAFP32Operand() const;
289   bool isSDWAInt16Operand() const;
290   bool isSDWAInt32Operand() const;
291 
292   bool isImmTy(ImmTy ImmT) const {
293     return isImm() && Imm.Type == ImmT;
294   }
295 
296   bool isImmModifier() const {
297     return isImm() && Imm.Type != ImmTyNone;
298   }
299 
300   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
301   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
302   bool isDMask() const { return isImmTy(ImmTyDMask); }
303   bool isDim() const { return isImmTy(ImmTyDim); }
304   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
305   bool isDA() const { return isImmTy(ImmTyDA); }
306   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
307   bool isLWE() const { return isImmTy(ImmTyLWE); }
308   bool isOff() const { return isImmTy(ImmTyOff); }
309   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
310   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
311   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
312   bool isOffen() const { return isImmTy(ImmTyOffen); }
313   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
314   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
315   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
316   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
317   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
318 
319   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
320   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
321   bool isGDS() const { return isImmTy(ImmTyGDS); }
322   bool isLDS() const { return isImmTy(ImmTyLDS); }
323   bool isDLC() const { return isImmTy(ImmTyDLC); }
324   bool isGLC() const { return isImmTy(ImmTyGLC); }
325   bool isSLC() const { return isImmTy(ImmTySLC); }
326   bool isTFE() const { return isImmTy(ImmTyTFE); }
327   bool isD16() const { return isImmTy(ImmTyD16); }
328   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
329   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
330   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
331   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
332   bool isFI() const { return isImmTy(ImmTyDppFi); }
333   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
334   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
335   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
336   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
337   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
338   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
339   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
340   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
341   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
342   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
343   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
344   bool isHigh() const { return isImmTy(ImmTyHigh); }
345 
346   bool isMod() const {
347     return isClampSI() || isOModSI();
348   }
349 
350   bool isRegOrImm() const {
351     return isReg() || isImm();
352   }
353 
354   bool isRegClass(unsigned RCID) const;
355 
356   bool isInlineValue() const;
357 
358   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
359     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
360   }
361 
362   bool isSCSrcB16() const {
363     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
364   }
365 
366   bool isSCSrcV2B16() const {
367     return isSCSrcB16();
368   }
369 
370   bool isSCSrcB32() const {
371     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
372   }
373 
374   bool isSCSrcB64() const {
375     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
376   }
377 
378   bool isBoolReg() const;
379 
380   bool isSCSrcF16() const {
381     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
382   }
383 
384   bool isSCSrcV2F16() const {
385     return isSCSrcF16();
386   }
387 
388   bool isSCSrcF32() const {
389     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
390   }
391 
392   bool isSCSrcF64() const {
393     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
394   }
395 
396   bool isSSrcB32() const {
397     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
398   }
399 
400   bool isSSrcB16() const {
401     return isSCSrcB16() || isLiteralImm(MVT::i16);
402   }
403 
404   bool isSSrcV2B16() const {
405     llvm_unreachable("cannot happen");
406     return isSSrcB16();
407   }
408 
409   bool isSSrcB64() const {
410     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
411     // See isVSrc64().
412     return isSCSrcB64() || isLiteralImm(MVT::i64);
413   }
414 
415   bool isSSrcF32() const {
416     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
417   }
418 
419   bool isSSrcF64() const {
420     return isSCSrcB64() || isLiteralImm(MVT::f64);
421   }
422 
423   bool isSSrcF16() const {
424     return isSCSrcB16() || isLiteralImm(MVT::f16);
425   }
426 
427   bool isSSrcV2F16() const {
428     llvm_unreachable("cannot happen");
429     return isSSrcF16();
430   }
431 
432   bool isSSrcOrLdsB32() const {
433     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
434            isLiteralImm(MVT::i32) || isExpr();
435   }
436 
437   bool isVCSrcB32() const {
438     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
439   }
440 
441   bool isVCSrcB64() const {
442     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
443   }
444 
445   bool isVCSrcB16() const {
446     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
447   }
448 
449   bool isVCSrcV2B16() const {
450     return isVCSrcB16();
451   }
452 
453   bool isVCSrcF32() const {
454     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
455   }
456 
457   bool isVCSrcF64() const {
458     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
459   }
460 
461   bool isVCSrcF16() const {
462     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
463   }
464 
465   bool isVCSrcV2F16() const {
466     return isVCSrcF16();
467   }
468 
469   bool isVSrcB32() const {
470     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
471   }
472 
473   bool isVSrcB64() const {
474     return isVCSrcF64() || isLiteralImm(MVT::i64);
475   }
476 
477   bool isVSrcB16() const {
478     return isVCSrcF16() || isLiteralImm(MVT::i16);
479   }
480 
481   bool isVSrcV2B16() const {
482     return isVSrcB16() || isLiteralImm(MVT::v2i16);
483   }
484 
485   bool isVSrcF32() const {
486     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
487   }
488 
489   bool isVSrcF64() const {
490     return isVCSrcF64() || isLiteralImm(MVT::f64);
491   }
492 
493   bool isVSrcF16() const {
494     return isVCSrcF16() || isLiteralImm(MVT::f16);
495   }
496 
497   bool isVSrcV2F16() const {
498     return isVSrcF16() || isLiteralImm(MVT::v2f16);
499   }
500 
501   bool isKImmFP32() const {
502     return isLiteralImm(MVT::f32);
503   }
504 
505   bool isKImmFP16() const {
506     return isLiteralImm(MVT::f16);
507   }
508 
509   bool isMem() const override {
510     return false;
511   }
512 
513   bool isExpr() const {
514     return Kind == Expression;
515   }
516 
517   bool isSoppBrTarget() const {
518     return isExpr() || isImm();
519   }
520 
521   bool isSWaitCnt() const;
522   bool isHwreg() const;
523   bool isSendMsg() const;
524   bool isSwizzle() const;
525   bool isSMRDOffset8() const;
526   bool isSMRDOffset20() const;
527   bool isSMRDLiteralOffset() const;
528   bool isDPP8() const;
529   bool isDPPCtrl() const;
530   bool isGPRIdxMode() const;
531   bool isS16Imm() const;
532   bool isU16Imm() const;
533   bool isEndpgm() const;
534 
535   StringRef getExpressionAsToken() const {
536     assert(isExpr());
537     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
538     return S->getSymbol().getName();
539   }
540 
541   StringRef getToken() const {
542     assert(isToken());
543 
544     if (Kind == Expression)
545       return getExpressionAsToken();
546 
547     return StringRef(Tok.Data, Tok.Length);
548   }
549 
550   int64_t getImm() const {
551     assert(isImm());
552     return Imm.Val;
553   }
554 
555   ImmTy getImmTy() const {
556     assert(isImm());
557     return Imm.Type;
558   }
559 
560   unsigned getReg() const override {
561     assert(isRegKind());
562     return Reg.RegNo;
563   }
564 
565   SMLoc getStartLoc() const override {
566     return StartLoc;
567   }
568 
569   SMLoc getEndLoc() const override {
570     return EndLoc;
571   }
572 
573   SMRange getLocRange() const {
574     return SMRange(StartLoc, EndLoc);
575   }
576 
577   Modifiers getModifiers() const {
578     assert(isRegKind() || isImmTy(ImmTyNone));
579     return isRegKind() ? Reg.Mods : Imm.Mods;
580   }
581 
582   void setModifiers(Modifiers Mods) {
583     assert(isRegKind() || isImmTy(ImmTyNone));
584     if (isRegKind())
585       Reg.Mods = Mods;
586     else
587       Imm.Mods = Mods;
588   }
589 
590   bool hasModifiers() const {
591     return getModifiers().hasModifiers();
592   }
593 
594   bool hasFPModifiers() const {
595     return getModifiers().hasFPModifiers();
596   }
597 
598   bool hasIntModifiers() const {
599     return getModifiers().hasIntModifiers();
600   }
601 
602   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
603 
604   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
605 
606   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
607 
608   template <unsigned Bitwidth>
609   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
610 
611   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
612     addKImmFPOperands<16>(Inst, N);
613   }
614 
615   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
616     addKImmFPOperands<32>(Inst, N);
617   }
618 
619   void addRegOperands(MCInst &Inst, unsigned N) const;
620 
621   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
622     addRegOperands(Inst, N);
623   }
624 
625   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
626     if (isRegKind())
627       addRegOperands(Inst, N);
628     else if (isExpr())
629       Inst.addOperand(MCOperand::createExpr(Expr));
630     else
631       addImmOperands(Inst, N);
632   }
633 
634   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
635     Modifiers Mods = getModifiers();
636     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
637     if (isRegKind()) {
638       addRegOperands(Inst, N);
639     } else {
640       addImmOperands(Inst, N, false);
641     }
642   }
643 
644   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
645     assert(!hasIntModifiers());
646     addRegOrImmWithInputModsOperands(Inst, N);
647   }
648 
649   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
650     assert(!hasFPModifiers());
651     addRegOrImmWithInputModsOperands(Inst, N);
652   }
653 
654   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
655     Modifiers Mods = getModifiers();
656     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
657     assert(isRegKind());
658     addRegOperands(Inst, N);
659   }
660 
661   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
662     assert(!hasIntModifiers());
663     addRegWithInputModsOperands(Inst, N);
664   }
665 
666   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
667     assert(!hasFPModifiers());
668     addRegWithInputModsOperands(Inst, N);
669   }
670 
671   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
672     if (isImm())
673       addImmOperands(Inst, N);
674     else {
675       assert(isExpr());
676       Inst.addOperand(MCOperand::createExpr(Expr));
677     }
678   }
679 
680   static void printImmTy(raw_ostream& OS, ImmTy Type) {
681     switch (Type) {
682     case ImmTyNone: OS << "None"; break;
683     case ImmTyGDS: OS << "GDS"; break;
684     case ImmTyLDS: OS << "LDS"; break;
685     case ImmTyOffen: OS << "Offen"; break;
686     case ImmTyIdxen: OS << "Idxen"; break;
687     case ImmTyAddr64: OS << "Addr64"; break;
688     case ImmTyOffset: OS << "Offset"; break;
689     case ImmTyInstOffset: OS << "InstOffset"; break;
690     case ImmTyOffset0: OS << "Offset0"; break;
691     case ImmTyOffset1: OS << "Offset1"; break;
692     case ImmTyDLC: OS << "DLC"; break;
693     case ImmTyGLC: OS << "GLC"; break;
694     case ImmTySLC: OS << "SLC"; break;
695     case ImmTyTFE: OS << "TFE"; break;
696     case ImmTyD16: OS << "D16"; break;
697     case ImmTyFORMAT: OS << "FORMAT"; break;
698     case ImmTyClampSI: OS << "ClampSI"; break;
699     case ImmTyOModSI: OS << "OModSI"; break;
700     case ImmTyDPP8: OS << "DPP8"; break;
701     case ImmTyDppCtrl: OS << "DppCtrl"; break;
702     case ImmTyDppRowMask: OS << "DppRowMask"; break;
703     case ImmTyDppBankMask: OS << "DppBankMask"; break;
704     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
705     case ImmTyDppFi: OS << "FI"; break;
706     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
707     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
708     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
709     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
710     case ImmTyDMask: OS << "DMask"; break;
711     case ImmTyDim: OS << "Dim"; break;
712     case ImmTyUNorm: OS << "UNorm"; break;
713     case ImmTyDA: OS << "DA"; break;
714     case ImmTyR128A16: OS << "R128A16"; break;
715     case ImmTyLWE: OS << "LWE"; break;
716     case ImmTyOff: OS << "Off"; break;
717     case ImmTyExpTgt: OS << "ExpTgt"; break;
718     case ImmTyExpCompr: OS << "ExpCompr"; break;
719     case ImmTyExpVM: OS << "ExpVM"; break;
720     case ImmTyHwreg: OS << "Hwreg"; break;
721     case ImmTySendMsg: OS << "SendMsg"; break;
722     case ImmTyInterpSlot: OS << "InterpSlot"; break;
723     case ImmTyInterpAttr: OS << "InterpAttr"; break;
724     case ImmTyAttrChan: OS << "AttrChan"; break;
725     case ImmTyOpSel: OS << "OpSel"; break;
726     case ImmTyOpSelHi: OS << "OpSelHi"; break;
727     case ImmTyNegLo: OS << "NegLo"; break;
728     case ImmTyNegHi: OS << "NegHi"; break;
729     case ImmTySwizzle: OS << "Swizzle"; break;
730     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
731     case ImmTyHigh: OS << "High"; break;
732     case ImmTyEndpgm:
733       OS << "Endpgm";
734       break;
735     }
736   }
737 
738   void print(raw_ostream &OS) const override {
739     switch (Kind) {
740     case Register:
741       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
742       break;
743     case Immediate:
744       OS << '<' << getImm();
745       if (getImmTy() != ImmTyNone) {
746         OS << " type: "; printImmTy(OS, getImmTy());
747       }
748       OS << " mods: " << Imm.Mods << '>';
749       break;
750     case Token:
751       OS << '\'' << getToken() << '\'';
752       break;
753     case Expression:
754       OS << "<expr " << *Expr << '>';
755       break;
756     }
757   }
758 
759   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
760                                       int64_t Val, SMLoc Loc,
761                                       ImmTy Type = ImmTyNone,
762                                       bool IsFPImm = false) {
763     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
764     Op->Imm.Val = Val;
765     Op->Imm.IsFPImm = IsFPImm;
766     Op->Imm.Type = Type;
767     Op->Imm.Mods = Modifiers();
768     Op->StartLoc = Loc;
769     Op->EndLoc = Loc;
770     return Op;
771   }
772 
773   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
774                                         StringRef Str, SMLoc Loc,
775                                         bool HasExplicitEncodingSize = true) {
776     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
777     Res->Tok.Data = Str.data();
778     Res->Tok.Length = Str.size();
779     Res->StartLoc = Loc;
780     Res->EndLoc = Loc;
781     return Res;
782   }
783 
784   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
785                                       unsigned RegNo, SMLoc S,
786                                       SMLoc E) {
787     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
788     Op->Reg.RegNo = RegNo;
789     Op->Reg.Mods = Modifiers();
790     Op->StartLoc = S;
791     Op->EndLoc = E;
792     return Op;
793   }
794 
795   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
796                                        const class MCExpr *Expr, SMLoc S) {
797     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
798     Op->Expr = Expr;
799     Op->StartLoc = S;
800     Op->EndLoc = S;
801     return Op;
802   }
803 };
804 
805 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
806   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
807   return OS;
808 }
809 
810 //===----------------------------------------------------------------------===//
811 // AsmParser
812 //===----------------------------------------------------------------------===//
813 
814 // Holds info related to the current kernel, e.g. count of SGPRs used.
815 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
816 // .amdgpu_hsa_kernel or at EOF.
817 class KernelScopeInfo {
818   int SgprIndexUnusedMin = -1;
819   int VgprIndexUnusedMin = -1;
820   MCContext *Ctx = nullptr;
821 
822   void usesSgprAt(int i) {
823     if (i >= SgprIndexUnusedMin) {
824       SgprIndexUnusedMin = ++i;
825       if (Ctx) {
826         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
827         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
828       }
829     }
830   }
831 
832   void usesVgprAt(int i) {
833     if (i >= VgprIndexUnusedMin) {
834       VgprIndexUnusedMin = ++i;
835       if (Ctx) {
836         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
837         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
838       }
839     }
840   }
841 
842 public:
843   KernelScopeInfo() = default;
844 
845   void initialize(MCContext &Context) {
846     Ctx = &Context;
847     usesSgprAt(SgprIndexUnusedMin = -1);
848     usesVgprAt(VgprIndexUnusedMin = -1);
849   }
850 
851   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
852     switch (RegKind) {
853       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
854       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
855       default: break;
856     }
857   }
858 };
859 
860 class AMDGPUAsmParser : public MCTargetAsmParser {
861   MCAsmParser &Parser;
862 
863   // Number of extra operands parsed after the first optional operand.
864   // This may be necessary to skip hardcoded mandatory operands.
865   static const unsigned MAX_OPR_LOOKAHEAD = 8;
866 
867   unsigned ForcedEncodingSize = 0;
868   bool ForcedDPP = false;
869   bool ForcedSDWA = false;
870   KernelScopeInfo KernelScope;
871 
872   /// @name Auto-generated Match Functions
873   /// {
874 
875 #define GET_ASSEMBLER_HEADER
876 #include "AMDGPUGenAsmMatcher.inc"
877 
878   /// }
879 
880 private:
881   bool ParseAsAbsoluteExpression(uint32_t &Ret);
882   bool OutOfRangeError(SMRange Range);
883   /// Calculate VGPR/SGPR blocks required for given target, reserved
884   /// registers, and user-specified NextFreeXGPR values.
885   ///
886   /// \param Features [in] Target features, used for bug corrections.
887   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
888   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
889   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
890   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
891   /// descriptor field, if valid.
892   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
893   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
894   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
895   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
896   /// \param VGPRBlocks [out] Result VGPR block count.
897   /// \param SGPRBlocks [out] Result SGPR block count.
898   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
899                           bool FlatScrUsed, bool XNACKUsed,
900                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
901                           SMRange VGPRRange, unsigned NextFreeSGPR,
902                           SMRange SGPRRange, unsigned &VGPRBlocks,
903                           unsigned &SGPRBlocks);
904   bool ParseDirectiveAMDGCNTarget();
905   bool ParseDirectiveAMDHSAKernel();
906   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
907   bool ParseDirectiveHSACodeObjectVersion();
908   bool ParseDirectiveHSACodeObjectISA();
909   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
910   bool ParseDirectiveAMDKernelCodeT();
911   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
912   bool ParseDirectiveAMDGPUHsaKernel();
913 
914   bool ParseDirectiveISAVersion();
915   bool ParseDirectiveHSAMetadata();
916   bool ParseDirectivePALMetadataBegin();
917   bool ParseDirectivePALMetadata();
918   bool ParseDirectiveAMDGPULDS();
919 
920   /// Common code to parse out a block of text (typically YAML) between start and
921   /// end directives.
922   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
923                            const char *AssemblerDirectiveEnd,
924                            std::string &CollectString);
925 
926   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
927                              RegisterKind RegKind, unsigned Reg1,
928                              unsigned RegNum);
929   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
930                            unsigned& RegNum, unsigned& RegWidth,
931                            unsigned *DwordRegIndex);
932   bool isRegister();
933   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
934   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
935   void initializeGprCountSymbol(RegisterKind RegKind);
936   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
937                              unsigned RegWidth);
938   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
939                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
940   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
941                  bool IsGdsHardcoded);
942 
943 public:
944   enum AMDGPUMatchResultTy {
945     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
946   };
947   enum OperandMode {
948     OperandMode_Default,
949     OperandMode_NSA,
950   };
951 
952   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
953 
954   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
955                const MCInstrInfo &MII,
956                const MCTargetOptions &Options)
957       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
958     MCAsmParserExtension::Initialize(Parser);
959 
960     if (getFeatureBits().none()) {
961       // Set default features.
962       copySTI().ToggleFeature("southern-islands");
963     }
964 
965     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
966 
967     {
968       // TODO: make those pre-defined variables read-only.
969       // Currently there is none suitable machinery in the core llvm-mc for this.
970       // MCSymbol::isRedefinable is intended for another purpose, and
971       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
972       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
973       MCContext &Ctx = getContext();
974       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
975         MCSymbol *Sym =
976             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
977         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
978         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
979         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
980         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
981         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
982       } else {
983         MCSymbol *Sym =
984             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
985         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
986         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
987         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
988         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
989         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
990       }
991       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
992         initializeGprCountSymbol(IS_VGPR);
993         initializeGprCountSymbol(IS_SGPR);
994       } else
995         KernelScope.initialize(getContext());
996     }
997   }
998 
999   bool hasXNACK() const {
1000     return AMDGPU::hasXNACK(getSTI());
1001   }
1002 
1003   bool hasMIMG_R128() const {
1004     return AMDGPU::hasMIMG_R128(getSTI());
1005   }
1006 
1007   bool hasPackedD16() const {
1008     return AMDGPU::hasPackedD16(getSTI());
1009   }
1010 
1011   bool isSI() const {
1012     return AMDGPU::isSI(getSTI());
1013   }
1014 
1015   bool isCI() const {
1016     return AMDGPU::isCI(getSTI());
1017   }
1018 
1019   bool isVI() const {
1020     return AMDGPU::isVI(getSTI());
1021   }
1022 
1023   bool isGFX9() const {
1024     return AMDGPU::isGFX9(getSTI());
1025   }
1026 
1027   bool isGFX10() const {
1028     return AMDGPU::isGFX10(getSTI());
1029   }
1030 
1031   bool hasInv2PiInlineImm() const {
1032     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1033   }
1034 
1035   bool hasFlatOffsets() const {
1036     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1037   }
1038 
1039   bool hasSGPR102_SGPR103() const {
1040     return !isVI() && !isGFX9();
1041   }
1042 
1043   bool hasSGPR104_SGPR105() const {
1044     return isGFX10();
1045   }
1046 
1047   bool hasIntClamp() const {
1048     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1049   }
1050 
1051   AMDGPUTargetStreamer &getTargetStreamer() {
1052     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1053     return static_cast<AMDGPUTargetStreamer &>(TS);
1054   }
1055 
1056   const MCRegisterInfo *getMRI() const {
1057     // We need this const_cast because for some reason getContext() is not const
1058     // in MCAsmParser.
1059     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1060   }
1061 
1062   const MCInstrInfo *getMII() const {
1063     return &MII;
1064   }
1065 
1066   const FeatureBitset &getFeatureBits() const {
1067     return getSTI().getFeatureBits();
1068   }
1069 
1070   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1071   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1072   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1073 
1074   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1075   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1076   bool isForcedDPP() const { return ForcedDPP; }
1077   bool isForcedSDWA() const { return ForcedSDWA; }
1078   ArrayRef<unsigned> getMatchedVariants() const;
1079 
1080   std::unique_ptr<AMDGPUOperand> parseRegister();
1081   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1082   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1083   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1084                                       unsigned Kind) override;
1085   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1086                                OperandVector &Operands, MCStreamer &Out,
1087                                uint64_t &ErrorInfo,
1088                                bool MatchingInlineAsm) override;
1089   bool ParseDirective(AsmToken DirectiveID) override;
1090   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1091                                     OperandMode Mode = OperandMode_Default);
1092   StringRef parseMnemonicSuffix(StringRef Name);
1093   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1094                         SMLoc NameLoc, OperandVector &Operands) override;
1095   //bool ProcessInstruction(MCInst &Inst);
1096 
1097   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1098 
1099   OperandMatchResultTy
1100   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1101                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1102                      bool (*ConvertResult)(int64_t &) = nullptr);
1103 
1104   OperandMatchResultTy
1105   parseOperandArrayWithPrefix(const char *Prefix,
1106                               OperandVector &Operands,
1107                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1108                               bool (*ConvertResult)(int64_t&) = nullptr);
1109 
1110   OperandMatchResultTy
1111   parseNamedBit(const char *Name, OperandVector &Operands,
1112                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1113   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1114                                              StringRef &Value);
1115 
1116   bool isModifier();
1117   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1118   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1119   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1120   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1121   bool parseSP3NegModifier();
1122   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1123   OperandMatchResultTy parseReg(OperandVector &Operands);
1124   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1125   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1126   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1127   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1128   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1129   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1130   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1131 
1132   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1133   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1134   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1135   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1136 
1137   bool parseCnt(int64_t &IntVal);
1138   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1139   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1140 
1141 private:
1142   struct OperandInfoTy {
1143     int64_t Id;
1144     bool IsSymbolic = false;
1145     bool IsDefined = false;
1146 
1147     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1148   };
1149 
1150   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1151   bool validateSendMsg(const OperandInfoTy &Msg,
1152                        const OperandInfoTy &Op,
1153                        const OperandInfoTy &Stream,
1154                        const SMLoc Loc);
1155 
1156   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1157   void validateHwreg(const OperandInfoTy &HwReg,
1158                      const int64_t Offset,
1159                      const int64_t Width,
1160                      const SMLoc Loc);
1161 
1162   void errorExpTgt();
1163   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1164 
1165   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1166   bool validateSOPLiteral(const MCInst &Inst) const;
1167   bool validateConstantBusLimitations(const MCInst &Inst);
1168   bool validateEarlyClobberLimitations(const MCInst &Inst);
1169   bool validateIntClampSupported(const MCInst &Inst);
1170   bool validateMIMGAtomicDMask(const MCInst &Inst);
1171   bool validateMIMGGatherDMask(const MCInst &Inst);
1172   bool validateMIMGDataSize(const MCInst &Inst);
1173   bool validateMIMGAddrSize(const MCInst &Inst);
1174   bool validateMIMGD16(const MCInst &Inst);
1175   bool validateMIMGDim(const MCInst &Inst);
1176   bool validateLdsDirect(const MCInst &Inst);
1177   bool validateOpSel(const MCInst &Inst);
1178   bool validateVccOperand(unsigned Reg) const;
1179   bool validateVOP3Literal(const MCInst &Inst) const;
1180   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1181   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1182   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1183 
1184   bool isId(const StringRef Id) const;
1185   bool isId(const AsmToken &Token, const StringRef Id) const;
1186   bool isToken(const AsmToken::TokenKind Kind) const;
1187   bool trySkipId(const StringRef Id);
1188   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1189   bool trySkipToken(const AsmToken::TokenKind Kind);
1190   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1191   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1192   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1193   AsmToken::TokenKind getTokenKind() const;
1194   bool parseExpr(int64_t &Imm);
1195   StringRef getTokenStr() const;
1196   AsmToken peekToken();
1197   AsmToken getToken() const;
1198   SMLoc getLoc() const;
1199   void lex();
1200 
1201 public:
1202   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1203   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1204 
1205   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1206   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1207   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1208   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1209   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1210   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1211 
1212   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1213                             const unsigned MinVal,
1214                             const unsigned MaxVal,
1215                             const StringRef ErrMsg);
1216   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1217   bool parseSwizzleOffset(int64_t &Imm);
1218   bool parseSwizzleMacro(int64_t &Imm);
1219   bool parseSwizzleQuadPerm(int64_t &Imm);
1220   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1221   bool parseSwizzleBroadcast(int64_t &Imm);
1222   bool parseSwizzleSwap(int64_t &Imm);
1223   bool parseSwizzleReverse(int64_t &Imm);
1224 
1225   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1226   int64_t parseGPRIdxMacro();
1227 
1228   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1229   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1230   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1231   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1232   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1233 
1234   AMDGPUOperand::Ptr defaultDLC() const;
1235   AMDGPUOperand::Ptr defaultGLC() const;
1236   AMDGPUOperand::Ptr defaultSLC() const;
1237 
1238   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1239   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1240   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1241   AMDGPUOperand::Ptr defaultOffsetU12() const;
1242   AMDGPUOperand::Ptr defaultOffsetS13() const;
1243 
1244   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1245 
1246   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1247                OptionalImmIndexMap &OptionalIdx);
1248   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1249   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1250   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1251 
1252   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1253 
1254   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1255                bool IsAtomic = false);
1256   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1257 
1258   OperandMatchResultTy parseDim(OperandVector &Operands);
1259   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1260   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1261   AMDGPUOperand::Ptr defaultRowMask() const;
1262   AMDGPUOperand::Ptr defaultBankMask() const;
1263   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1264   AMDGPUOperand::Ptr defaultFI() const;
1265   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1266   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1267 
1268   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1269                                     AMDGPUOperand::ImmTy Type);
1270   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1271   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1272   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1273   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1274   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1275   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1276                 uint64_t BasicInstType, bool skipVcc = false);
1277 
1278   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1279   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1280 };
1281 
1282 struct OptionalOperand {
1283   const char *Name;
1284   AMDGPUOperand::ImmTy Type;
1285   bool IsBit;
1286   bool (*ConvertResult)(int64_t&);
1287 };
1288 
1289 } // end anonymous namespace
1290 
1291 // May be called with integer type with equivalent bitwidth.
1292 static const fltSemantics *getFltSemantics(unsigned Size) {
1293   switch (Size) {
1294   case 4:
1295     return &APFloat::IEEEsingle();
1296   case 8:
1297     return &APFloat::IEEEdouble();
1298   case 2:
1299     return &APFloat::IEEEhalf();
1300   default:
1301     llvm_unreachable("unsupported fp type");
1302   }
1303 }
1304 
1305 static const fltSemantics *getFltSemantics(MVT VT) {
1306   return getFltSemantics(VT.getSizeInBits() / 8);
1307 }
1308 
1309 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1310   switch (OperandType) {
1311   case AMDGPU::OPERAND_REG_IMM_INT32:
1312   case AMDGPU::OPERAND_REG_IMM_FP32:
1313   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1314   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1315     return &APFloat::IEEEsingle();
1316   case AMDGPU::OPERAND_REG_IMM_INT64:
1317   case AMDGPU::OPERAND_REG_IMM_FP64:
1318   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1319   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1320     return &APFloat::IEEEdouble();
1321   case AMDGPU::OPERAND_REG_IMM_INT16:
1322   case AMDGPU::OPERAND_REG_IMM_FP16:
1323   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1324   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1325   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1326   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1327   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1328   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1329     return &APFloat::IEEEhalf();
1330   default:
1331     llvm_unreachable("unsupported fp type");
1332   }
1333 }
1334 
1335 //===----------------------------------------------------------------------===//
1336 // Operand
1337 //===----------------------------------------------------------------------===//
1338 
1339 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1340   bool Lost;
1341 
1342   // Convert literal to single precision
1343   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1344                                                APFloat::rmNearestTiesToEven,
1345                                                &Lost);
1346   // We allow precision lost but not overflow or underflow
1347   if (Status != APFloat::opOK &&
1348       Lost &&
1349       ((Status & APFloat::opOverflow)  != 0 ||
1350        (Status & APFloat::opUnderflow) != 0)) {
1351     return false;
1352   }
1353 
1354   return true;
1355 }
1356 
1357 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1358   return isUIntN(Size, Val) || isIntN(Size, Val);
1359 }
1360 
1361 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1362 
1363   // This is a hack to enable named inline values like
1364   // shared_base with both 32-bit and 64-bit operands.
1365   // Note that these values are defined as
1366   // 32-bit operands only.
1367   if (isInlineValue()) {
1368     return true;
1369   }
1370 
1371   if (!isImmTy(ImmTyNone)) {
1372     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1373     return false;
1374   }
1375   // TODO: We should avoid using host float here. It would be better to
1376   // check the float bit values which is what a few other places do.
1377   // We've had bot failures before due to weird NaN support on mips hosts.
1378 
1379   APInt Literal(64, Imm.Val);
1380 
1381   if (Imm.IsFPImm) { // We got fp literal token
1382     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1383       return AMDGPU::isInlinableLiteral64(Imm.Val,
1384                                           AsmParser->hasInv2PiInlineImm());
1385     }
1386 
1387     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1388     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1389       return false;
1390 
1391     if (type.getScalarSizeInBits() == 16) {
1392       return AMDGPU::isInlinableLiteral16(
1393         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1394         AsmParser->hasInv2PiInlineImm());
1395     }
1396 
1397     // Check if single precision literal is inlinable
1398     return AMDGPU::isInlinableLiteral32(
1399       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1400       AsmParser->hasInv2PiInlineImm());
1401   }
1402 
1403   // We got int literal token.
1404   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1405     return AMDGPU::isInlinableLiteral64(Imm.Val,
1406                                         AsmParser->hasInv2PiInlineImm());
1407   }
1408 
1409   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1410     return false;
1411   }
1412 
1413   if (type.getScalarSizeInBits() == 16) {
1414     return AMDGPU::isInlinableLiteral16(
1415       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1416       AsmParser->hasInv2PiInlineImm());
1417   }
1418 
1419   return AMDGPU::isInlinableLiteral32(
1420     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1421     AsmParser->hasInv2PiInlineImm());
1422 }
1423 
1424 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1425   // Check that this immediate can be added as literal
1426   if (!isImmTy(ImmTyNone)) {
1427     return false;
1428   }
1429 
1430   if (!Imm.IsFPImm) {
1431     // We got int literal token.
1432 
1433     if (type == MVT::f64 && hasFPModifiers()) {
1434       // Cannot apply fp modifiers to int literals preserving the same semantics
1435       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1436       // disable these cases.
1437       return false;
1438     }
1439 
1440     unsigned Size = type.getSizeInBits();
1441     if (Size == 64)
1442       Size = 32;
1443 
1444     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1445     // types.
1446     return isSafeTruncation(Imm.Val, Size);
1447   }
1448 
1449   // We got fp literal token
1450   if (type == MVT::f64) { // Expected 64-bit fp operand
1451     // We would set low 64-bits of literal to zeroes but we accept this literals
1452     return true;
1453   }
1454 
1455   if (type == MVT::i64) { // Expected 64-bit int operand
1456     // We don't allow fp literals in 64-bit integer instructions. It is
1457     // unclear how we should encode them.
1458     return false;
1459   }
1460 
1461   // We allow fp literals with f16x2 operands assuming that the specified
1462   // literal goes into the lower half and the upper half is zero. We also
1463   // require that the literal may be losslesly converted to f16.
1464   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1465                      (type == MVT::v2i16)? MVT::i16 : type;
1466 
1467   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1468   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1469 }
1470 
1471 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1472   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1473 }
1474 
1475 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1476   if (AsmParser->isVI())
1477     return isVReg32();
1478   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1479     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1480   else
1481     return false;
1482 }
1483 
1484 bool AMDGPUOperand::isSDWAFP16Operand() const {
1485   return isSDWAOperand(MVT::f16);
1486 }
1487 
1488 bool AMDGPUOperand::isSDWAFP32Operand() const {
1489   return isSDWAOperand(MVT::f32);
1490 }
1491 
1492 bool AMDGPUOperand::isSDWAInt16Operand() const {
1493   return isSDWAOperand(MVT::i16);
1494 }
1495 
1496 bool AMDGPUOperand::isSDWAInt32Operand() const {
1497   return isSDWAOperand(MVT::i32);
1498 }
1499 
1500 bool AMDGPUOperand::isBoolReg() const {
1501   return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
1502     isSCSrcB64() : isSCSrcB32();
1503 }
1504 
1505 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1506 {
1507   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1508   assert(Size == 2 || Size == 4 || Size == 8);
1509 
1510   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1511 
1512   if (Imm.Mods.Abs) {
1513     Val &= ~FpSignMask;
1514   }
1515   if (Imm.Mods.Neg) {
1516     Val ^= FpSignMask;
1517   }
1518 
1519   return Val;
1520 }
1521 
1522 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1523   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1524                              Inst.getNumOperands())) {
1525     addLiteralImmOperand(Inst, Imm.Val,
1526                          ApplyModifiers &
1527                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1528   } else {
1529     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1530     Inst.addOperand(MCOperand::createImm(Imm.Val));
1531   }
1532 }
1533 
1534 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1535   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1536   auto OpNum = Inst.getNumOperands();
1537   // Check that this operand accepts literals
1538   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1539 
1540   if (ApplyModifiers) {
1541     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1542     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1543     Val = applyInputFPModifiers(Val, Size);
1544   }
1545 
1546   APInt Literal(64, Val);
1547   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1548 
1549   if (Imm.IsFPImm) { // We got fp literal token
1550     switch (OpTy) {
1551     case AMDGPU::OPERAND_REG_IMM_INT64:
1552     case AMDGPU::OPERAND_REG_IMM_FP64:
1553     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1554     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1555       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1556                                        AsmParser->hasInv2PiInlineImm())) {
1557         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1558         return;
1559       }
1560 
1561       // Non-inlineable
1562       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1563         // For fp operands we check if low 32 bits are zeros
1564         if (Literal.getLoBits(32) != 0) {
1565           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1566           "Can't encode literal as exact 64-bit floating-point operand. "
1567           "Low 32-bits will be set to zero");
1568         }
1569 
1570         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1571         return;
1572       }
1573 
1574       // We don't allow fp literals in 64-bit integer instructions. It is
1575       // unclear how we should encode them. This case should be checked earlier
1576       // in predicate methods (isLiteralImm())
1577       llvm_unreachable("fp literal in 64-bit integer instruction.");
1578 
1579     case AMDGPU::OPERAND_REG_IMM_INT32:
1580     case AMDGPU::OPERAND_REG_IMM_FP32:
1581     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1582     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1583     case AMDGPU::OPERAND_REG_IMM_INT16:
1584     case AMDGPU::OPERAND_REG_IMM_FP16:
1585     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1586     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1587     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1588     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1589     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1590     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1591       bool lost;
1592       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1593       // Convert literal to single precision
1594       FPLiteral.convert(*getOpFltSemantics(OpTy),
1595                         APFloat::rmNearestTiesToEven, &lost);
1596       // We allow precision lost but not overflow or underflow. This should be
1597       // checked earlier in isLiteralImm()
1598 
1599       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1600       Inst.addOperand(MCOperand::createImm(ImmVal));
1601       return;
1602     }
1603     default:
1604       llvm_unreachable("invalid operand size");
1605     }
1606 
1607     return;
1608   }
1609 
1610   // We got int literal token.
1611   // Only sign extend inline immediates.
1612   switch (OpTy) {
1613   case AMDGPU::OPERAND_REG_IMM_INT32:
1614   case AMDGPU::OPERAND_REG_IMM_FP32:
1615   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1616   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1617   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1618   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1619     if (isSafeTruncation(Val, 32) &&
1620         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1621                                      AsmParser->hasInv2PiInlineImm())) {
1622       Inst.addOperand(MCOperand::createImm(Val));
1623       return;
1624     }
1625 
1626     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1627     return;
1628 
1629   case AMDGPU::OPERAND_REG_IMM_INT64:
1630   case AMDGPU::OPERAND_REG_IMM_FP64:
1631   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1632   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1633     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1634       Inst.addOperand(MCOperand::createImm(Val));
1635       return;
1636     }
1637 
1638     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1639     return;
1640 
1641   case AMDGPU::OPERAND_REG_IMM_INT16:
1642   case AMDGPU::OPERAND_REG_IMM_FP16:
1643   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1644   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1645     if (isSafeTruncation(Val, 16) &&
1646         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1647                                      AsmParser->hasInv2PiInlineImm())) {
1648       Inst.addOperand(MCOperand::createImm(Val));
1649       return;
1650     }
1651 
1652     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1653     return;
1654 
1655   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1656   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1657     assert(isSafeTruncation(Val, 16));
1658     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1659                                         AsmParser->hasInv2PiInlineImm()));
1660 
1661     Inst.addOperand(MCOperand::createImm(Val));
1662     return;
1663   }
1664   default:
1665     llvm_unreachable("invalid operand size");
1666   }
1667 }
1668 
1669 template <unsigned Bitwidth>
1670 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1671   APInt Literal(64, Imm.Val);
1672 
1673   if (!Imm.IsFPImm) {
1674     // We got int literal token.
1675     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1676     return;
1677   }
1678 
1679   bool Lost;
1680   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1681   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1682                     APFloat::rmNearestTiesToEven, &Lost);
1683   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1684 }
1685 
1686 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1687   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1688 }
1689 
1690 static bool isInlineValue(unsigned Reg) {
1691   switch (Reg) {
1692   case AMDGPU::SRC_SHARED_BASE:
1693   case AMDGPU::SRC_SHARED_LIMIT:
1694   case AMDGPU::SRC_PRIVATE_BASE:
1695   case AMDGPU::SRC_PRIVATE_LIMIT:
1696   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1697     return true;
1698   case AMDGPU::SRC_VCCZ:
1699   case AMDGPU::SRC_EXECZ:
1700   case AMDGPU::SRC_SCC:
1701     return true;
1702   default:
1703     return false;
1704   }
1705 }
1706 
1707 bool AMDGPUOperand::isInlineValue() const {
1708   return isRegKind() && ::isInlineValue(getReg());
1709 }
1710 
1711 //===----------------------------------------------------------------------===//
1712 // AsmParser
1713 //===----------------------------------------------------------------------===//
1714 
1715 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1716   if (Is == IS_VGPR) {
1717     switch (RegWidth) {
1718       default: return -1;
1719       case 1: return AMDGPU::VGPR_32RegClassID;
1720       case 2: return AMDGPU::VReg_64RegClassID;
1721       case 3: return AMDGPU::VReg_96RegClassID;
1722       case 4: return AMDGPU::VReg_128RegClassID;
1723       case 8: return AMDGPU::VReg_256RegClassID;
1724       case 16: return AMDGPU::VReg_512RegClassID;
1725     }
1726   } else if (Is == IS_TTMP) {
1727     switch (RegWidth) {
1728       default: return -1;
1729       case 1: return AMDGPU::TTMP_32RegClassID;
1730       case 2: return AMDGPU::TTMP_64RegClassID;
1731       case 4: return AMDGPU::TTMP_128RegClassID;
1732       case 8: return AMDGPU::TTMP_256RegClassID;
1733       case 16: return AMDGPU::TTMP_512RegClassID;
1734     }
1735   } else if (Is == IS_SGPR) {
1736     switch (RegWidth) {
1737       default: return -1;
1738       case 1: return AMDGPU::SGPR_32RegClassID;
1739       case 2: return AMDGPU::SGPR_64RegClassID;
1740       case 4: return AMDGPU::SGPR_128RegClassID;
1741       case 8: return AMDGPU::SGPR_256RegClassID;
1742       case 16: return AMDGPU::SGPR_512RegClassID;
1743     }
1744   }
1745   return -1;
1746 }
1747 
1748 static unsigned getSpecialRegForName(StringRef RegName) {
1749   return StringSwitch<unsigned>(RegName)
1750     .Case("exec", AMDGPU::EXEC)
1751     .Case("vcc", AMDGPU::VCC)
1752     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1753     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1754     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1755     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1756     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1757     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1758     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1759     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1760     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1761     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1762     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1763     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1764     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1765     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1766     .Case("m0", AMDGPU::M0)
1767     .Case("vccz", AMDGPU::SRC_VCCZ)
1768     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1769     .Case("execz", AMDGPU::SRC_EXECZ)
1770     .Case("src_execz", AMDGPU::SRC_EXECZ)
1771     .Case("scc", AMDGPU::SRC_SCC)
1772     .Case("src_scc", AMDGPU::SRC_SCC)
1773     .Case("tba", AMDGPU::TBA)
1774     .Case("tma", AMDGPU::TMA)
1775     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1776     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1777     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1778     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1779     .Case("vcc_lo", AMDGPU::VCC_LO)
1780     .Case("vcc_hi", AMDGPU::VCC_HI)
1781     .Case("exec_lo", AMDGPU::EXEC_LO)
1782     .Case("exec_hi", AMDGPU::EXEC_HI)
1783     .Case("tma_lo", AMDGPU::TMA_LO)
1784     .Case("tma_hi", AMDGPU::TMA_HI)
1785     .Case("tba_lo", AMDGPU::TBA_LO)
1786     .Case("tba_hi", AMDGPU::TBA_HI)
1787     .Case("null", AMDGPU::SGPR_NULL)
1788     .Default(0);
1789 }
1790 
1791 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1792                                     SMLoc &EndLoc) {
1793   auto R = parseRegister();
1794   if (!R) return true;
1795   assert(R->isReg());
1796   RegNo = R->getReg();
1797   StartLoc = R->getStartLoc();
1798   EndLoc = R->getEndLoc();
1799   return false;
1800 }
1801 
1802 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1803                                             RegisterKind RegKind, unsigned Reg1,
1804                                             unsigned RegNum) {
1805   switch (RegKind) {
1806   case IS_SPECIAL:
1807     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1808       Reg = AMDGPU::EXEC;
1809       RegWidth = 2;
1810       return true;
1811     }
1812     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1813       Reg = AMDGPU::FLAT_SCR;
1814       RegWidth = 2;
1815       return true;
1816     }
1817     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1818       Reg = AMDGPU::XNACK_MASK;
1819       RegWidth = 2;
1820       return true;
1821     }
1822     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1823       Reg = AMDGPU::VCC;
1824       RegWidth = 2;
1825       return true;
1826     }
1827     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1828       Reg = AMDGPU::TBA;
1829       RegWidth = 2;
1830       return true;
1831     }
1832     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1833       Reg = AMDGPU::TMA;
1834       RegWidth = 2;
1835       return true;
1836     }
1837     return false;
1838   case IS_VGPR:
1839   case IS_SGPR:
1840   case IS_TTMP:
1841     if (Reg1 != Reg + RegWidth) {
1842       return false;
1843     }
1844     RegWidth++;
1845     return true;
1846   default:
1847     llvm_unreachable("unexpected register kind");
1848   }
1849 }
1850 
1851 static const StringRef Registers[] = {
1852   { "v" },
1853   { "s" },
1854   { "ttmp" },
1855 };
1856 
1857 bool
1858 AMDGPUAsmParser::isRegister(const AsmToken &Token,
1859                             const AsmToken &NextToken) const {
1860 
1861   // A list of consecutive registers: [s0,s1,s2,s3]
1862   if (Token.is(AsmToken::LBrac))
1863     return true;
1864 
1865   if (!Token.is(AsmToken::Identifier))
1866     return false;
1867 
1868   // A single register like s0 or a range of registers like s[0:1]
1869 
1870   StringRef RegName = Token.getString();
1871 
1872   for (StringRef Reg : Registers) {
1873     if (RegName.startswith(Reg)) {
1874       if (Reg.size() < RegName.size()) {
1875         unsigned RegNum;
1876         // A single register with an index: rXX
1877         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
1878           return true;
1879       } else {
1880         // A range of registers: r[XX:YY].
1881         if (NextToken.is(AsmToken::LBrac))
1882           return true;
1883       }
1884     }
1885   }
1886 
1887   return getSpecialRegForName(RegName);
1888 }
1889 
1890 bool
1891 AMDGPUAsmParser::isRegister()
1892 {
1893   return isRegister(getToken(), peekToken());
1894 }
1895 
1896 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1897                                           unsigned &RegNum, unsigned &RegWidth,
1898                                           unsigned *DwordRegIndex) {
1899   if (DwordRegIndex) { *DwordRegIndex = 0; }
1900   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1901   if (getLexer().is(AsmToken::Identifier)) {
1902     StringRef RegName = Parser.getTok().getString();
1903     if ((Reg = getSpecialRegForName(RegName))) {
1904       Parser.Lex();
1905       RegKind = IS_SPECIAL;
1906     } else {
1907       unsigned RegNumIndex = 0;
1908       if (RegName[0] == 'v') {
1909         RegNumIndex = 1;
1910         RegKind = IS_VGPR;
1911       } else if (RegName[0] == 's') {
1912         RegNumIndex = 1;
1913         RegKind = IS_SGPR;
1914       } else if (RegName.startswith("ttmp")) {
1915         RegNumIndex = strlen("ttmp");
1916         RegKind = IS_TTMP;
1917       } else {
1918         return false;
1919       }
1920       if (RegName.size() > RegNumIndex) {
1921         // Single 32-bit register: vXX.
1922         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1923           return false;
1924         Parser.Lex();
1925         RegWidth = 1;
1926       } else {
1927         // Range of registers: v[XX:YY]. ":YY" is optional.
1928         Parser.Lex();
1929         int64_t RegLo, RegHi;
1930         if (getLexer().isNot(AsmToken::LBrac))
1931           return false;
1932         Parser.Lex();
1933 
1934         if (getParser().parseAbsoluteExpression(RegLo))
1935           return false;
1936 
1937         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1938         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1939           return false;
1940         Parser.Lex();
1941 
1942         if (isRBrace) {
1943           RegHi = RegLo;
1944         } else {
1945           if (getParser().parseAbsoluteExpression(RegHi))
1946             return false;
1947 
1948           if (getLexer().isNot(AsmToken::RBrac))
1949             return false;
1950           Parser.Lex();
1951         }
1952         RegNum = (unsigned) RegLo;
1953         RegWidth = (RegHi - RegLo) + 1;
1954       }
1955     }
1956   } else if (getLexer().is(AsmToken::LBrac)) {
1957     // List of consecutive registers: [s0,s1,s2,s3]
1958     Parser.Lex();
1959     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1960       return false;
1961     if (RegWidth != 1)
1962       return false;
1963     RegisterKind RegKind1;
1964     unsigned Reg1, RegNum1, RegWidth1;
1965     do {
1966       if (getLexer().is(AsmToken::Comma)) {
1967         Parser.Lex();
1968       } else if (getLexer().is(AsmToken::RBrac)) {
1969         Parser.Lex();
1970         break;
1971       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1972         if (RegWidth1 != 1) {
1973           return false;
1974         }
1975         if (RegKind1 != RegKind) {
1976           return false;
1977         }
1978         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1979           return false;
1980         }
1981       } else {
1982         return false;
1983       }
1984     } while (true);
1985   } else {
1986     return false;
1987   }
1988   switch (RegKind) {
1989   case IS_SPECIAL:
1990     RegNum = 0;
1991     RegWidth = 1;
1992     break;
1993   case IS_VGPR:
1994   case IS_SGPR:
1995   case IS_TTMP:
1996   {
1997     unsigned Size = 1;
1998     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1999       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
2000       Size = std::min(RegWidth, 4u);
2001     }
2002     if (RegNum % Size != 0)
2003       return false;
2004     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
2005     RegNum = RegNum / Size;
2006     int RCID = getRegClass(RegKind, RegWidth);
2007     if (RCID == -1)
2008       return false;
2009     const MCRegisterClass RC = TRI->getRegClass(RCID);
2010     if (RegNum >= RC.getNumRegs())
2011       return false;
2012     Reg = RC.getRegister(RegNum);
2013     break;
2014   }
2015 
2016   default:
2017     llvm_unreachable("unexpected register kind");
2018   }
2019 
2020   if (!subtargetHasRegister(*TRI, Reg))
2021     return false;
2022   return true;
2023 }
2024 
2025 Optional<StringRef>
2026 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2027   switch (RegKind) {
2028   case IS_VGPR:
2029     return StringRef(".amdgcn.next_free_vgpr");
2030   case IS_SGPR:
2031     return StringRef(".amdgcn.next_free_sgpr");
2032   default:
2033     return None;
2034   }
2035 }
2036 
2037 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2038   auto SymbolName = getGprCountSymbolName(RegKind);
2039   assert(SymbolName && "initializing invalid register kind");
2040   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2041   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2042 }
2043 
2044 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2045                                             unsigned DwordRegIndex,
2046                                             unsigned RegWidth) {
2047   // Symbols are only defined for GCN targets
2048   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2049     return true;
2050 
2051   auto SymbolName = getGprCountSymbolName(RegKind);
2052   if (!SymbolName)
2053     return true;
2054   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2055 
2056   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2057   int64_t OldCount;
2058 
2059   if (!Sym->isVariable())
2060     return !Error(getParser().getTok().getLoc(),
2061                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2062   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2063     return !Error(
2064         getParser().getTok().getLoc(),
2065         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2066 
2067   if (OldCount <= NewMax)
2068     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2069 
2070   return true;
2071 }
2072 
2073 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2074   const auto &Tok = Parser.getTok();
2075   SMLoc StartLoc = Tok.getLoc();
2076   SMLoc EndLoc = Tok.getEndLoc();
2077   RegisterKind RegKind;
2078   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2079 
2080   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2081     //FIXME: improve error messages (bug 41303).
2082     Error(StartLoc, "not a valid operand.");
2083     return nullptr;
2084   }
2085   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2086     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2087       return nullptr;
2088   } else
2089     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2090   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2091 }
2092 
2093 OperandMatchResultTy
2094 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2095   // TODO: add syntactic sugar for 1/(2*PI)
2096 
2097   assert(!isRegister());
2098   assert(!isModifier());
2099 
2100   const auto& Tok = getToken();
2101   const auto& NextTok = peekToken();
2102   bool IsReal = Tok.is(AsmToken::Real);
2103   SMLoc S = getLoc();
2104   bool Negate = false;
2105 
2106   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2107     lex();
2108     IsReal = true;
2109     Negate = true;
2110   }
2111 
2112   if (IsReal) {
2113     // Floating-point expressions are not supported.
2114     // Can only allow floating-point literals with an
2115     // optional sign.
2116 
2117     StringRef Num = getTokenStr();
2118     lex();
2119 
2120     APFloat RealVal(APFloat::IEEEdouble());
2121     auto roundMode = APFloat::rmNearestTiesToEven;
2122     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2123       return MatchOperand_ParseFail;
2124     }
2125     if (Negate)
2126       RealVal.changeSign();
2127 
2128     Operands.push_back(
2129       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2130                                AMDGPUOperand::ImmTyNone, true));
2131 
2132     return MatchOperand_Success;
2133 
2134   } else {
2135     int64_t IntVal;
2136     const MCExpr *Expr;
2137     SMLoc S = getLoc();
2138 
2139     if (HasSP3AbsModifier) {
2140       // This is a workaround for handling expressions
2141       // as arguments of SP3 'abs' modifier, for example:
2142       //     |1.0|
2143       //     |-1|
2144       //     |1+x|
2145       // This syntax is not compatible with syntax of standard
2146       // MC expressions (due to the trailing '|').
2147       SMLoc EndLoc;
2148       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2149         return MatchOperand_ParseFail;
2150     } else {
2151       if (Parser.parseExpression(Expr))
2152         return MatchOperand_ParseFail;
2153     }
2154 
2155     if (Expr->evaluateAsAbsolute(IntVal)) {
2156       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2157     } else {
2158       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2159     }
2160 
2161     return MatchOperand_Success;
2162   }
2163 
2164   return MatchOperand_NoMatch;
2165 }
2166 
2167 OperandMatchResultTy
2168 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2169   if (!isRegister())
2170     return MatchOperand_NoMatch;
2171 
2172   if (auto R = parseRegister()) {
2173     assert(R->isReg());
2174     Operands.push_back(std::move(R));
2175     return MatchOperand_Success;
2176   }
2177   return MatchOperand_ParseFail;
2178 }
2179 
2180 OperandMatchResultTy
2181 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2182   auto res = parseReg(Operands);
2183   if (res != MatchOperand_NoMatch) {
2184     return res;
2185   } else if (isModifier()) {
2186     return MatchOperand_NoMatch;
2187   } else {
2188     return parseImm(Operands, HasSP3AbsMod);
2189   }
2190 }
2191 
2192 bool
2193 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2194   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2195     const auto &str = Token.getString();
2196     return str == "abs" || str == "neg" || str == "sext";
2197   }
2198   return false;
2199 }
2200 
2201 bool
2202 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2203   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2204 }
2205 
2206 bool
2207 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2208   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2209 }
2210 
2211 bool
2212 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2213   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2214 }
2215 
2216 // Check if this is an operand modifier or an opcode modifier
2217 // which may look like an expression but it is not. We should
2218 // avoid parsing these modifiers as expressions. Currently
2219 // recognized sequences are:
2220 //   |...|
2221 //   abs(...)
2222 //   neg(...)
2223 //   sext(...)
2224 //   -reg
2225 //   -|...|
2226 //   -abs(...)
2227 //   name:...
2228 // Note that simple opcode modifiers like 'gds' may be parsed as
2229 // expressions; this is a special case. See getExpressionAsToken.
2230 //
2231 bool
2232 AMDGPUAsmParser::isModifier() {
2233 
2234   AsmToken Tok = getToken();
2235   AsmToken NextToken[2];
2236   peekTokens(NextToken);
2237 
2238   return isOperandModifier(Tok, NextToken[0]) ||
2239          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2240          isOpcodeModifierWithVal(Tok, NextToken[0]);
2241 }
2242 
2243 // Check if the current token is an SP3 'neg' modifier.
2244 // Currently this modifier is allowed in the following context:
2245 //
2246 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2247 // 2. Before an 'abs' modifier: -abs(...)
2248 // 3. Before an SP3 'abs' modifier: -|...|
2249 //
2250 // In all other cases "-" is handled as a part
2251 // of an expression that follows the sign.
2252 //
2253 // Note: When "-" is followed by an integer literal,
2254 // this is interpreted as integer negation rather
2255 // than a floating-point NEG modifier applied to N.
2256 // Beside being contr-intuitive, such use of floating-point
2257 // NEG modifier would have resulted in different meaning
2258 // of integer literals used with VOP1/2/C and VOP3,
2259 // for example:
2260 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2261 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2262 // Negative fp literals with preceding "-" are
2263 // handled likewise for unifomtity
2264 //
2265 bool
2266 AMDGPUAsmParser::parseSP3NegModifier() {
2267 
2268   AsmToken NextToken[2];
2269   peekTokens(NextToken);
2270 
2271   if (isToken(AsmToken::Minus) &&
2272       (isRegister(NextToken[0], NextToken[1]) ||
2273        NextToken[0].is(AsmToken::Pipe) ||
2274        isId(NextToken[0], "abs"))) {
2275     lex();
2276     return true;
2277   }
2278 
2279   return false;
2280 }
2281 
2282 OperandMatchResultTy
2283 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2284                                               bool AllowImm) {
2285   bool Neg, SP3Neg;
2286   bool Abs, SP3Abs;
2287   SMLoc Loc;
2288 
2289   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2290   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2291     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2292     return MatchOperand_ParseFail;
2293   }
2294 
2295   SP3Neg = parseSP3NegModifier();
2296 
2297   Loc = getLoc();
2298   Neg = trySkipId("neg");
2299   if (Neg && SP3Neg) {
2300     Error(Loc, "expected register or immediate");
2301     return MatchOperand_ParseFail;
2302   }
2303   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2304     return MatchOperand_ParseFail;
2305 
2306   Abs = trySkipId("abs");
2307   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2308     return MatchOperand_ParseFail;
2309 
2310   Loc = getLoc();
2311   SP3Abs = trySkipToken(AsmToken::Pipe);
2312   if (Abs && SP3Abs) {
2313     Error(Loc, "expected register or immediate");
2314     return MatchOperand_ParseFail;
2315   }
2316 
2317   OperandMatchResultTy Res;
2318   if (AllowImm) {
2319     Res = parseRegOrImm(Operands, SP3Abs);
2320   } else {
2321     Res = parseReg(Operands);
2322   }
2323   if (Res != MatchOperand_Success) {
2324     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2325   }
2326 
2327   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2328     return MatchOperand_ParseFail;
2329   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2330     return MatchOperand_ParseFail;
2331   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2332     return MatchOperand_ParseFail;
2333 
2334   AMDGPUOperand::Modifiers Mods;
2335   Mods.Abs = Abs || SP3Abs;
2336   Mods.Neg = Neg || SP3Neg;
2337 
2338   if (Mods.hasFPModifiers()) {
2339     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2340     if (Op.isExpr()) {
2341       Error(Op.getStartLoc(), "expected an absolute expression");
2342       return MatchOperand_ParseFail;
2343     }
2344     Op.setModifiers(Mods);
2345   }
2346   return MatchOperand_Success;
2347 }
2348 
2349 OperandMatchResultTy
2350 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2351                                                bool AllowImm) {
2352   bool Sext = trySkipId("sext");
2353   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2354     return MatchOperand_ParseFail;
2355 
2356   OperandMatchResultTy Res;
2357   if (AllowImm) {
2358     Res = parseRegOrImm(Operands);
2359   } else {
2360     Res = parseReg(Operands);
2361   }
2362   if (Res != MatchOperand_Success) {
2363     return Sext? MatchOperand_ParseFail : Res;
2364   }
2365 
2366   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2367     return MatchOperand_ParseFail;
2368 
2369   AMDGPUOperand::Modifiers Mods;
2370   Mods.Sext = Sext;
2371 
2372   if (Mods.hasIntModifiers()) {
2373     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2374     if (Op.isExpr()) {
2375       Error(Op.getStartLoc(), "expected an absolute expression");
2376       return MatchOperand_ParseFail;
2377     }
2378     Op.setModifiers(Mods);
2379   }
2380 
2381   return MatchOperand_Success;
2382 }
2383 
2384 OperandMatchResultTy
2385 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2386   return parseRegOrImmWithFPInputMods(Operands, false);
2387 }
2388 
2389 OperandMatchResultTy
2390 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2391   return parseRegOrImmWithIntInputMods(Operands, false);
2392 }
2393 
2394 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2395   auto Loc = getLoc();
2396   if (trySkipId("off")) {
2397     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2398                                                 AMDGPUOperand::ImmTyOff, false));
2399     return MatchOperand_Success;
2400   }
2401 
2402   if (!isRegister())
2403     return MatchOperand_NoMatch;
2404 
2405   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2406   if (Reg) {
2407     Operands.push_back(std::move(Reg));
2408     return MatchOperand_Success;
2409   }
2410 
2411   return MatchOperand_ParseFail;
2412 
2413 }
2414 
2415 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2416   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2417 
2418   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2419       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2420       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2421       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2422     return Match_InvalidOperand;
2423 
2424   if ((TSFlags & SIInstrFlags::VOP3) &&
2425       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2426       getForcedEncodingSize() != 64)
2427     return Match_PreferE32;
2428 
2429   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2430       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2431     // v_mac_f32/16 allow only dst_sel == DWORD;
2432     auto OpNum =
2433         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2434     const auto &Op = Inst.getOperand(OpNum);
2435     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2436       return Match_InvalidOperand;
2437     }
2438   }
2439 
2440   if (TSFlags & SIInstrFlags::FLAT) {
2441     // FIXME: Produces error without correct column reported.
2442     auto Opcode = Inst.getOpcode();
2443     auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
2444 
2445     const auto &Op = Inst.getOperand(OpNum);
2446     if (!hasFlatOffsets() && Op.getImm() != 0)
2447       return Match_InvalidOperand;
2448 
2449     // GFX10: Address offset is 12-bit signed byte offset. Must be positive for
2450     // FLAT segment. For FLAT segment MSB is ignored and forced to zero.
2451     if (isGFX10()) {
2452       if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
2453         if (!isInt<12>(Op.getImm()))
2454           return Match_InvalidOperand;
2455       } else {
2456         if (!isUInt<11>(Op.getImm()))
2457           return Match_InvalidOperand;
2458       }
2459     }
2460   }
2461 
2462   return Match_Success;
2463 }
2464 
2465 // What asm variants we should check
2466 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2467   if (getForcedEncodingSize() == 32) {
2468     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2469     return makeArrayRef(Variants);
2470   }
2471 
2472   if (isForcedVOP3()) {
2473     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2474     return makeArrayRef(Variants);
2475   }
2476 
2477   if (isForcedSDWA()) {
2478     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2479                                         AMDGPUAsmVariants::SDWA9};
2480     return makeArrayRef(Variants);
2481   }
2482 
2483   if (isForcedDPP()) {
2484     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2485     return makeArrayRef(Variants);
2486   }
2487 
2488   static const unsigned Variants[] = {
2489     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2490     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2491   };
2492 
2493   return makeArrayRef(Variants);
2494 }
2495 
2496 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2497   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2498   const unsigned Num = Desc.getNumImplicitUses();
2499   for (unsigned i = 0; i < Num; ++i) {
2500     unsigned Reg = Desc.ImplicitUses[i];
2501     switch (Reg) {
2502     case AMDGPU::FLAT_SCR:
2503     case AMDGPU::VCC:
2504     case AMDGPU::VCC_LO:
2505     case AMDGPU::VCC_HI:
2506     case AMDGPU::M0:
2507     case AMDGPU::SGPR_NULL:
2508       return Reg;
2509     default:
2510       break;
2511     }
2512   }
2513   return AMDGPU::NoRegister;
2514 }
2515 
2516 // NB: This code is correct only when used to check constant
2517 // bus limitations because GFX7 support no f16 inline constants.
2518 // Note that there are no cases when a GFX7 opcode violates
2519 // constant bus limitations due to the use of an f16 constant.
2520 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2521                                        unsigned OpIdx) const {
2522   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2523 
2524   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2525     return false;
2526   }
2527 
2528   const MCOperand &MO = Inst.getOperand(OpIdx);
2529 
2530   int64_t Val = MO.getImm();
2531   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2532 
2533   switch (OpSize) { // expected operand size
2534   case 8:
2535     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2536   case 4:
2537     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2538   case 2: {
2539     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2540     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2541         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2542         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2543         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2544       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2545     } else {
2546       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2547     }
2548   }
2549   default:
2550     llvm_unreachable("invalid operand size");
2551   }
2552 }
2553 
2554 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2555   const MCOperand &MO = Inst.getOperand(OpIdx);
2556   if (MO.isImm()) {
2557     return !isInlineConstant(Inst, OpIdx);
2558   }
2559   return !MO.isReg() ||
2560          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2561 }
2562 
2563 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2564   const unsigned Opcode = Inst.getOpcode();
2565   const MCInstrDesc &Desc = MII.get(Opcode);
2566   unsigned ConstantBusUseCount = 0;
2567   unsigned NumLiterals = 0;
2568   unsigned LiteralSize;
2569 
2570   if (Desc.TSFlags &
2571       (SIInstrFlags::VOPC |
2572        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2573        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2574        SIInstrFlags::SDWA)) {
2575     // Check special imm operands (used by madmk, etc)
2576     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2577       ++ConstantBusUseCount;
2578     }
2579 
2580     SmallDenseSet<unsigned> SGPRsUsed;
2581     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2582     if (SGPRUsed != AMDGPU::NoRegister) {
2583       SGPRsUsed.insert(SGPRUsed);
2584       ++ConstantBusUseCount;
2585     }
2586 
2587     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2588     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2589     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2590 
2591     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2592 
2593     for (int OpIdx : OpIndices) {
2594       if (OpIdx == -1) break;
2595 
2596       const MCOperand &MO = Inst.getOperand(OpIdx);
2597       if (usesConstantBus(Inst, OpIdx)) {
2598         if (MO.isReg()) {
2599           const unsigned Reg = mc2PseudoReg(MO.getReg());
2600           // Pairs of registers with a partial intersections like these
2601           //   s0, s[0:1]
2602           //   flat_scratch_lo, flat_scratch
2603           //   flat_scratch_lo, flat_scratch_hi
2604           // are theoretically valid but they are disabled anyway.
2605           // Note that this code mimics SIInstrInfo::verifyInstruction
2606           if (!SGPRsUsed.count(Reg)) {
2607             SGPRsUsed.insert(Reg);
2608             ++ConstantBusUseCount;
2609           }
2610           SGPRUsed = Reg;
2611         } else { // Expression or a literal
2612 
2613           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2614             continue; // special operand like VINTERP attr_chan
2615 
2616           // An instruction may use only one literal.
2617           // This has been validated on the previous step.
2618           // See validateVOP3Literal.
2619           // This literal may be used as more than one operand.
2620           // If all these operands are of the same size,
2621           // this literal counts as one scalar value.
2622           // Otherwise it counts as 2 scalar values.
2623           // See "GFX10 Shader Programming", section 3.6.2.3.
2624 
2625           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2626           if (Size < 4) Size = 4;
2627 
2628           if (NumLiterals == 0) {
2629             NumLiterals = 1;
2630             LiteralSize = Size;
2631           } else if (LiteralSize != Size) {
2632             NumLiterals = 2;
2633           }
2634         }
2635       }
2636     }
2637   }
2638   ConstantBusUseCount += NumLiterals;
2639 
2640   if (isGFX10())
2641     return ConstantBusUseCount <= 2;
2642 
2643   return ConstantBusUseCount <= 1;
2644 }
2645 
2646 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2647   const unsigned Opcode = Inst.getOpcode();
2648   const MCInstrDesc &Desc = MII.get(Opcode);
2649 
2650   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2651   if (DstIdx == -1 ||
2652       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2653     return true;
2654   }
2655 
2656   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2657 
2658   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2659   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2660   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2661 
2662   assert(DstIdx != -1);
2663   const MCOperand &Dst = Inst.getOperand(DstIdx);
2664   assert(Dst.isReg());
2665   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2666 
2667   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2668 
2669   for (int SrcIdx : SrcIndices) {
2670     if (SrcIdx == -1) break;
2671     const MCOperand &Src = Inst.getOperand(SrcIdx);
2672     if (Src.isReg()) {
2673       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2674       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2675         return false;
2676       }
2677     }
2678   }
2679 
2680   return true;
2681 }
2682 
2683 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2684 
2685   const unsigned Opc = Inst.getOpcode();
2686   const MCInstrDesc &Desc = MII.get(Opc);
2687 
2688   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2689     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2690     assert(ClampIdx != -1);
2691     return Inst.getOperand(ClampIdx).getImm() == 0;
2692   }
2693 
2694   return true;
2695 }
2696 
2697 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2698 
2699   const unsigned Opc = Inst.getOpcode();
2700   const MCInstrDesc &Desc = MII.get(Opc);
2701 
2702   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2703     return true;
2704 
2705   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2706   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2707   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2708 
2709   assert(VDataIdx != -1);
2710   assert(DMaskIdx != -1);
2711   assert(TFEIdx != -1);
2712 
2713   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2714   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2715   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2716   if (DMask == 0)
2717     DMask = 1;
2718 
2719   unsigned DataSize =
2720     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2721   if (hasPackedD16()) {
2722     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2723     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2724       DataSize = (DataSize + 1) / 2;
2725   }
2726 
2727   return (VDataSize / 4) == DataSize + TFESize;
2728 }
2729 
2730 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2731   const unsigned Opc = Inst.getOpcode();
2732   const MCInstrDesc &Desc = MII.get(Opc);
2733 
2734   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2735     return true;
2736 
2737   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2738   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2739       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2740   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2741   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2742   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2743 
2744   assert(VAddr0Idx != -1);
2745   assert(SrsrcIdx != -1);
2746   assert(DimIdx != -1);
2747   assert(SrsrcIdx > VAddr0Idx);
2748 
2749   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2750   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2751   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2752   unsigned VAddrSize =
2753       IsNSA ? SrsrcIdx - VAddr0Idx
2754             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2755 
2756   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2757                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2758                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2759                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2760   if (!IsNSA) {
2761     if (AddrSize > 8)
2762       AddrSize = 16;
2763     else if (AddrSize > 4)
2764       AddrSize = 8;
2765   }
2766 
2767   return VAddrSize == AddrSize;
2768 }
2769 
2770 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2771 
2772   const unsigned Opc = Inst.getOpcode();
2773   const MCInstrDesc &Desc = MII.get(Opc);
2774 
2775   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2776     return true;
2777   if (!Desc.mayLoad() || !Desc.mayStore())
2778     return true; // Not atomic
2779 
2780   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2781   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2782 
2783   // This is an incomplete check because image_atomic_cmpswap
2784   // may only use 0x3 and 0xf while other atomic operations
2785   // may use 0x1 and 0x3. However these limitations are
2786   // verified when we check that dmask matches dst size.
2787   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2788 }
2789 
2790 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2791 
2792   const unsigned Opc = Inst.getOpcode();
2793   const MCInstrDesc &Desc = MII.get(Opc);
2794 
2795   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2796     return true;
2797 
2798   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2799   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2800 
2801   // GATHER4 instructions use dmask in a different fashion compared to
2802   // other MIMG instructions. The only useful DMASK values are
2803   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2804   // (red,red,red,red) etc.) The ISA document doesn't mention
2805   // this.
2806   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2807 }
2808 
2809 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2810 
2811   const unsigned Opc = Inst.getOpcode();
2812   const MCInstrDesc &Desc = MII.get(Opc);
2813 
2814   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2815     return true;
2816 
2817   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2818   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2819     if (isCI() || isSI())
2820       return false;
2821   }
2822 
2823   return true;
2824 }
2825 
2826 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2827   const unsigned Opc = Inst.getOpcode();
2828   const MCInstrDesc &Desc = MII.get(Opc);
2829 
2830   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2831     return true;
2832 
2833   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2834   if (DimIdx < 0)
2835     return true;
2836 
2837   long Imm = Inst.getOperand(DimIdx).getImm();
2838   if (Imm < 0 || Imm >= 8)
2839     return false;
2840 
2841   return true;
2842 }
2843 
2844 static bool IsRevOpcode(const unsigned Opcode)
2845 {
2846   switch (Opcode) {
2847   case AMDGPU::V_SUBREV_F32_e32:
2848   case AMDGPU::V_SUBREV_F32_e64:
2849   case AMDGPU::V_SUBREV_F32_e32_gfx10:
2850   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
2851   case AMDGPU::V_SUBREV_F32_e32_vi:
2852   case AMDGPU::V_SUBREV_F32_e64_gfx10:
2853   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
2854   case AMDGPU::V_SUBREV_F32_e64_vi:
2855 
2856   case AMDGPU::V_SUBREV_I32_e32:
2857   case AMDGPU::V_SUBREV_I32_e64:
2858   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
2859   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
2860 
2861   case AMDGPU::V_SUBBREV_U32_e32:
2862   case AMDGPU::V_SUBBREV_U32_e64:
2863   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
2864   case AMDGPU::V_SUBBREV_U32_e32_vi:
2865   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
2866   case AMDGPU::V_SUBBREV_U32_e64_vi:
2867 
2868   case AMDGPU::V_SUBREV_U32_e32:
2869   case AMDGPU::V_SUBREV_U32_e64:
2870   case AMDGPU::V_SUBREV_U32_e32_gfx9:
2871   case AMDGPU::V_SUBREV_U32_e32_vi:
2872   case AMDGPU::V_SUBREV_U32_e64_gfx9:
2873   case AMDGPU::V_SUBREV_U32_e64_vi:
2874 
2875   case AMDGPU::V_SUBREV_F16_e32:
2876   case AMDGPU::V_SUBREV_F16_e64:
2877   case AMDGPU::V_SUBREV_F16_e32_gfx10:
2878   case AMDGPU::V_SUBREV_F16_e32_vi:
2879   case AMDGPU::V_SUBREV_F16_e64_gfx10:
2880   case AMDGPU::V_SUBREV_F16_e64_vi:
2881 
2882   case AMDGPU::V_SUBREV_U16_e32:
2883   case AMDGPU::V_SUBREV_U16_e64:
2884   case AMDGPU::V_SUBREV_U16_e32_vi:
2885   case AMDGPU::V_SUBREV_U16_e64_vi:
2886 
2887   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
2888   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
2889   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
2890 
2891   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
2892   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
2893 
2894   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
2895   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
2896 
2897   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
2898   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
2899 
2900   case AMDGPU::V_LSHRREV_B32_e32:
2901   case AMDGPU::V_LSHRREV_B32_e64:
2902   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
2903   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
2904   case AMDGPU::V_LSHRREV_B32_e32_vi:
2905   case AMDGPU::V_LSHRREV_B32_e64_vi:
2906   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
2907   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
2908 
2909   case AMDGPU::V_ASHRREV_I32_e32:
2910   case AMDGPU::V_ASHRREV_I32_e64:
2911   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
2912   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
2913   case AMDGPU::V_ASHRREV_I32_e32_vi:
2914   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
2915   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
2916   case AMDGPU::V_ASHRREV_I32_e64_vi:
2917 
2918   case AMDGPU::V_LSHLREV_B32_e32:
2919   case AMDGPU::V_LSHLREV_B32_e64:
2920   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
2921   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
2922   case AMDGPU::V_LSHLREV_B32_e32_vi:
2923   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
2924   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
2925   case AMDGPU::V_LSHLREV_B32_e64_vi:
2926 
2927   case AMDGPU::V_LSHLREV_B16_e32:
2928   case AMDGPU::V_LSHLREV_B16_e64:
2929   case AMDGPU::V_LSHLREV_B16_e32_vi:
2930   case AMDGPU::V_LSHLREV_B16_e64_vi:
2931   case AMDGPU::V_LSHLREV_B16_gfx10:
2932 
2933   case AMDGPU::V_LSHRREV_B16_e32:
2934   case AMDGPU::V_LSHRREV_B16_e64:
2935   case AMDGPU::V_LSHRREV_B16_e32_vi:
2936   case AMDGPU::V_LSHRREV_B16_e64_vi:
2937   case AMDGPU::V_LSHRREV_B16_gfx10:
2938 
2939   case AMDGPU::V_ASHRREV_I16_e32:
2940   case AMDGPU::V_ASHRREV_I16_e64:
2941   case AMDGPU::V_ASHRREV_I16_e32_vi:
2942   case AMDGPU::V_ASHRREV_I16_e64_vi:
2943   case AMDGPU::V_ASHRREV_I16_gfx10:
2944 
2945   case AMDGPU::V_LSHLREV_B64:
2946   case AMDGPU::V_LSHLREV_B64_gfx10:
2947   case AMDGPU::V_LSHLREV_B64_vi:
2948 
2949   case AMDGPU::V_LSHRREV_B64:
2950   case AMDGPU::V_LSHRREV_B64_gfx10:
2951   case AMDGPU::V_LSHRREV_B64_vi:
2952 
2953   case AMDGPU::V_ASHRREV_I64:
2954   case AMDGPU::V_ASHRREV_I64_gfx10:
2955   case AMDGPU::V_ASHRREV_I64_vi:
2956 
2957   case AMDGPU::V_PK_LSHLREV_B16:
2958   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
2959   case AMDGPU::V_PK_LSHLREV_B16_vi:
2960 
2961   case AMDGPU::V_PK_LSHRREV_B16:
2962   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
2963   case AMDGPU::V_PK_LSHRREV_B16_vi:
2964   case AMDGPU::V_PK_ASHRREV_I16:
2965   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
2966   case AMDGPU::V_PK_ASHRREV_I16_vi:
2967     return true;
2968   default:
2969     return false;
2970   }
2971 }
2972 
2973 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2974 
2975   using namespace SIInstrFlags;
2976   const unsigned Opcode = Inst.getOpcode();
2977   const MCInstrDesc &Desc = MII.get(Opcode);
2978 
2979   // lds_direct register is defined so that it can be used
2980   // with 9-bit operands only. Ignore encodings which do not accept these.
2981   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2982     return true;
2983 
2984   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2985   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2986   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2987 
2988   const int SrcIndices[] = { Src1Idx, Src2Idx };
2989 
2990   // lds_direct cannot be specified as either src1 or src2.
2991   for (int SrcIdx : SrcIndices) {
2992     if (SrcIdx == -1) break;
2993     const MCOperand &Src = Inst.getOperand(SrcIdx);
2994     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2995       return false;
2996     }
2997   }
2998 
2999   if (Src0Idx == -1)
3000     return true;
3001 
3002   const MCOperand &Src = Inst.getOperand(Src0Idx);
3003   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3004     return true;
3005 
3006   // lds_direct is specified as src0. Check additional limitations.
3007   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3008 }
3009 
3010 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3011   unsigned Opcode = Inst.getOpcode();
3012   const MCInstrDesc &Desc = MII.get(Opcode);
3013   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3014     return true;
3015 
3016   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3017   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3018 
3019   const int OpIndices[] = { Src0Idx, Src1Idx };
3020 
3021   unsigned NumLiterals = 0;
3022   uint32_t LiteralValue;
3023 
3024   for (int OpIdx : OpIndices) {
3025     if (OpIdx == -1) break;
3026 
3027     const MCOperand &MO = Inst.getOperand(OpIdx);
3028     if (MO.isImm() &&
3029         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3030         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
3031         !isInlineConstant(Inst, OpIdx)) {
3032       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3033       if (NumLiterals == 0 || LiteralValue != Value) {
3034         LiteralValue = Value;
3035         ++NumLiterals;
3036       }
3037     }
3038   }
3039 
3040   return NumLiterals <= 1;
3041 }
3042 
3043 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3044   const unsigned Opc = Inst.getOpcode();
3045   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3046       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3047     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3048     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3049 
3050     if (OpSel & ~3)
3051       return false;
3052   }
3053   return true;
3054 }
3055 
3056 // Check if VCC register matches wavefront size
3057 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3058   auto FB = getFeatureBits();
3059   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3060     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3061 }
3062 
3063 // VOP3 literal is only allowed in GFX10+ and only one can be used
3064 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3065   unsigned Opcode = Inst.getOpcode();
3066   const MCInstrDesc &Desc = MII.get(Opcode);
3067   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3068     return true;
3069 
3070   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3071   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3072   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3073 
3074   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3075 
3076   unsigned NumLiterals = 0;
3077   uint32_t LiteralValue;
3078 
3079   for (int OpIdx : OpIndices) {
3080     if (OpIdx == -1) break;
3081 
3082     const MCOperand &MO = Inst.getOperand(OpIdx);
3083     if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
3084       continue;
3085 
3086     if (!isInlineConstant(Inst, OpIdx)) {
3087       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3088       if (NumLiterals == 0 || LiteralValue != Value) {
3089         LiteralValue = Value;
3090         ++NumLiterals;
3091       }
3092     }
3093   }
3094 
3095   return !NumLiterals ||
3096          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3097 }
3098 
3099 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3100                                           const SMLoc &IDLoc) {
3101   if (!validateLdsDirect(Inst)) {
3102     Error(IDLoc,
3103       "invalid use of lds_direct");
3104     return false;
3105   }
3106   if (!validateSOPLiteral(Inst)) {
3107     Error(IDLoc,
3108       "only one literal operand is allowed");
3109     return false;
3110   }
3111   if (!validateVOP3Literal(Inst)) {
3112     Error(IDLoc,
3113       "invalid literal operand");
3114     return false;
3115   }
3116   if (!validateConstantBusLimitations(Inst)) {
3117     Error(IDLoc,
3118       "invalid operand (violates constant bus restrictions)");
3119     return false;
3120   }
3121   if (!validateEarlyClobberLimitations(Inst)) {
3122     Error(IDLoc,
3123       "destination must be different than all sources");
3124     return false;
3125   }
3126   if (!validateIntClampSupported(Inst)) {
3127     Error(IDLoc,
3128       "integer clamping is not supported on this GPU");
3129     return false;
3130   }
3131   if (!validateOpSel(Inst)) {
3132     Error(IDLoc,
3133       "invalid op_sel operand");
3134     return false;
3135   }
3136   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3137   if (!validateMIMGD16(Inst)) {
3138     Error(IDLoc,
3139       "d16 modifier is not supported on this GPU");
3140     return false;
3141   }
3142   if (!validateMIMGDim(Inst)) {
3143     Error(IDLoc, "dim modifier is required on this GPU");
3144     return false;
3145   }
3146   if (!validateMIMGDataSize(Inst)) {
3147     Error(IDLoc,
3148       "image data size does not match dmask and tfe");
3149     return false;
3150   }
3151   if (!validateMIMGAddrSize(Inst)) {
3152     Error(IDLoc,
3153       "image address size does not match dim and a16");
3154     return false;
3155   }
3156   if (!validateMIMGAtomicDMask(Inst)) {
3157     Error(IDLoc,
3158       "invalid atomic image dmask");
3159     return false;
3160   }
3161   if (!validateMIMGGatherDMask(Inst)) {
3162     Error(IDLoc,
3163       "invalid image_gather dmask: only one bit must be set");
3164     return false;
3165   }
3166 
3167   return true;
3168 }
3169 
3170 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3171                                             const FeatureBitset &FBS,
3172                                             unsigned VariantID = 0);
3173 
3174 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3175                                               OperandVector &Operands,
3176                                               MCStreamer &Out,
3177                                               uint64_t &ErrorInfo,
3178                                               bool MatchingInlineAsm) {
3179   MCInst Inst;
3180   unsigned Result = Match_Success;
3181   for (auto Variant : getMatchedVariants()) {
3182     uint64_t EI;
3183     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3184                                   Variant);
3185     // We order match statuses from least to most specific. We use most specific
3186     // status as resulting
3187     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3188     if ((R == Match_Success) ||
3189         (R == Match_PreferE32) ||
3190         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3191         (R == Match_InvalidOperand && Result != Match_MissingFeature
3192                                    && Result != Match_PreferE32) ||
3193         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3194                                    && Result != Match_MissingFeature
3195                                    && Result != Match_PreferE32)) {
3196       Result = R;
3197       ErrorInfo = EI;
3198     }
3199     if (R == Match_Success)
3200       break;
3201   }
3202 
3203   switch (Result) {
3204   default: break;
3205   case Match_Success:
3206     if (!validateInstruction(Inst, IDLoc)) {
3207       return true;
3208     }
3209     Inst.setLoc(IDLoc);
3210     Out.EmitInstruction(Inst, getSTI());
3211     return false;
3212 
3213   case Match_MissingFeature:
3214     return Error(IDLoc, "instruction not supported on this GPU");
3215 
3216   case Match_MnemonicFail: {
3217     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3218     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3219         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3220     return Error(IDLoc, "invalid instruction" + Suggestion,
3221                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3222   }
3223 
3224   case Match_InvalidOperand: {
3225     SMLoc ErrorLoc = IDLoc;
3226     if (ErrorInfo != ~0ULL) {
3227       if (ErrorInfo >= Operands.size()) {
3228         return Error(IDLoc, "too few operands for instruction");
3229       }
3230       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3231       if (ErrorLoc == SMLoc())
3232         ErrorLoc = IDLoc;
3233     }
3234     return Error(ErrorLoc, "invalid operand for instruction");
3235   }
3236 
3237   case Match_PreferE32:
3238     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3239                         "should be encoded as e32");
3240   }
3241   llvm_unreachable("Implement any new match types added!");
3242 }
3243 
3244 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3245   int64_t Tmp = -1;
3246   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3247     return true;
3248   }
3249   if (getParser().parseAbsoluteExpression(Tmp)) {
3250     return true;
3251   }
3252   Ret = static_cast<uint32_t>(Tmp);
3253   return false;
3254 }
3255 
3256 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3257                                                uint32_t &Minor) {
3258   if (ParseAsAbsoluteExpression(Major))
3259     return TokError("invalid major version");
3260 
3261   if (getLexer().isNot(AsmToken::Comma))
3262     return TokError("minor version number required, comma expected");
3263   Lex();
3264 
3265   if (ParseAsAbsoluteExpression(Minor))
3266     return TokError("invalid minor version");
3267 
3268   return false;
3269 }
3270 
3271 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3272   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3273     return TokError("directive only supported for amdgcn architecture");
3274 
3275   std::string Target;
3276 
3277   SMLoc TargetStart = getTok().getLoc();
3278   if (getParser().parseEscapedString(Target))
3279     return true;
3280   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3281 
3282   std::string ExpectedTarget;
3283   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3284   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3285 
3286   if (Target != ExpectedTargetOS.str())
3287     return getParser().Error(TargetRange.Start, "target must match options",
3288                              TargetRange);
3289 
3290   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3291   return false;
3292 }
3293 
3294 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3295   return getParser().Error(Range.Start, "value out of range", Range);
3296 }
3297 
3298 bool AMDGPUAsmParser::calculateGPRBlocks(
3299     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3300     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3301     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3302     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3303   // TODO(scott.linder): These calculations are duplicated from
3304   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3305   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3306 
3307   unsigned NumVGPRs = NextFreeVGPR;
3308   unsigned NumSGPRs = NextFreeSGPR;
3309 
3310   if (Version.Major >= 10)
3311     NumSGPRs = 0;
3312   else {
3313     unsigned MaxAddressableNumSGPRs =
3314         IsaInfo::getAddressableNumSGPRs(&getSTI());
3315 
3316     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3317         NumSGPRs > MaxAddressableNumSGPRs)
3318       return OutOfRangeError(SGPRRange);
3319 
3320     NumSGPRs +=
3321         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3322 
3323     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3324         NumSGPRs > MaxAddressableNumSGPRs)
3325       return OutOfRangeError(SGPRRange);
3326 
3327     if (Features.test(FeatureSGPRInitBug))
3328       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3329   }
3330 
3331   VGPRBlocks =
3332       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3333   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3334 
3335   return false;
3336 }
3337 
3338 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3339   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3340     return TokError("directive only supported for amdgcn architecture");
3341 
3342   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3343     return TokError("directive only supported for amdhsa OS");
3344 
3345   StringRef KernelName;
3346   if (getParser().parseIdentifier(KernelName))
3347     return true;
3348 
3349   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3350 
3351   StringSet<> Seen;
3352 
3353   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3354 
3355   SMRange VGPRRange;
3356   uint64_t NextFreeVGPR = 0;
3357   SMRange SGPRRange;
3358   uint64_t NextFreeSGPR = 0;
3359   unsigned UserSGPRCount = 0;
3360   bool ReserveVCC = true;
3361   bool ReserveFlatScr = true;
3362   bool ReserveXNACK = hasXNACK();
3363   Optional<bool> EnableWavefrontSize32;
3364 
3365   while (true) {
3366     while (getLexer().is(AsmToken::EndOfStatement))
3367       Lex();
3368 
3369     if (getLexer().isNot(AsmToken::Identifier))
3370       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3371 
3372     StringRef ID = getTok().getIdentifier();
3373     SMRange IDRange = getTok().getLocRange();
3374     Lex();
3375 
3376     if (ID == ".end_amdhsa_kernel")
3377       break;
3378 
3379     if (Seen.find(ID) != Seen.end())
3380       return TokError(".amdhsa_ directives cannot be repeated");
3381     Seen.insert(ID);
3382 
3383     SMLoc ValStart = getTok().getLoc();
3384     int64_t IVal;
3385     if (getParser().parseAbsoluteExpression(IVal))
3386       return true;
3387     SMLoc ValEnd = getTok().getLoc();
3388     SMRange ValRange = SMRange(ValStart, ValEnd);
3389 
3390     if (IVal < 0)
3391       return OutOfRangeError(ValRange);
3392 
3393     uint64_t Val = IVal;
3394 
3395 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3396   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3397     return OutOfRangeError(RANGE);                                             \
3398   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3399 
3400     if (ID == ".amdhsa_group_segment_fixed_size") {
3401       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3402         return OutOfRangeError(ValRange);
3403       KD.group_segment_fixed_size = Val;
3404     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3405       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3406         return OutOfRangeError(ValRange);
3407       KD.private_segment_fixed_size = Val;
3408     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3409       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3410                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3411                        Val, ValRange);
3412       UserSGPRCount += 4;
3413     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3414       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3415                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3416                        ValRange);
3417       UserSGPRCount += 2;
3418     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3419       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3420                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3421                        ValRange);
3422       UserSGPRCount += 2;
3423     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3424       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3425                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3426                        Val, ValRange);
3427       UserSGPRCount += 2;
3428     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3429       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3430                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3431                        ValRange);
3432       UserSGPRCount += 2;
3433     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3434       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3435                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3436                        ValRange);
3437       UserSGPRCount += 2;
3438     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3439       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3440                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3441                        Val, ValRange);
3442       UserSGPRCount += 1;
3443     } else if (ID == ".amdhsa_wavefront_size32") {
3444       if (IVersion.Major < 10)
3445         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3446                                  IDRange);
3447       EnableWavefrontSize32 = Val;
3448       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3449                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3450                        Val, ValRange);
3451     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3452       PARSE_BITS_ENTRY(
3453           KD.compute_pgm_rsrc2,
3454           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3455           ValRange);
3456     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3457       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3458                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3459                        ValRange);
3460     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3461       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3462                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3463                        ValRange);
3464     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3465       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3466                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3467                        ValRange);
3468     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3469       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3470                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3471                        ValRange);
3472     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3473       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3474                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3475                        ValRange);
3476     } else if (ID == ".amdhsa_next_free_vgpr") {
3477       VGPRRange = ValRange;
3478       NextFreeVGPR = Val;
3479     } else if (ID == ".amdhsa_next_free_sgpr") {
3480       SGPRRange = ValRange;
3481       NextFreeSGPR = Val;
3482     } else if (ID == ".amdhsa_reserve_vcc") {
3483       if (!isUInt<1>(Val))
3484         return OutOfRangeError(ValRange);
3485       ReserveVCC = Val;
3486     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3487       if (IVersion.Major < 7)
3488         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3489                                  IDRange);
3490       if (!isUInt<1>(Val))
3491         return OutOfRangeError(ValRange);
3492       ReserveFlatScr = Val;
3493     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3494       if (IVersion.Major < 8)
3495         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3496                                  IDRange);
3497       if (!isUInt<1>(Val))
3498         return OutOfRangeError(ValRange);
3499       ReserveXNACK = Val;
3500     } else if (ID == ".amdhsa_float_round_mode_32") {
3501       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3502                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3503     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3504       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3505                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3506     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3507       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3508                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3509     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3510       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3511                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3512                        ValRange);
3513     } else if (ID == ".amdhsa_dx10_clamp") {
3514       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3515                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3516     } else if (ID == ".amdhsa_ieee_mode") {
3517       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3518                        Val, ValRange);
3519     } else if (ID == ".amdhsa_fp16_overflow") {
3520       if (IVersion.Major < 9)
3521         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3522                                  IDRange);
3523       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3524                        ValRange);
3525     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3526       if (IVersion.Major < 10)
3527         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3528                                  IDRange);
3529       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3530                        ValRange);
3531     } else if (ID == ".amdhsa_memory_ordered") {
3532       if (IVersion.Major < 10)
3533         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3534                                  IDRange);
3535       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3536                        ValRange);
3537     } else if (ID == ".amdhsa_forward_progress") {
3538       if (IVersion.Major < 10)
3539         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3540                                  IDRange);
3541       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3542                        ValRange);
3543     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3544       PARSE_BITS_ENTRY(
3545           KD.compute_pgm_rsrc2,
3546           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3547           ValRange);
3548     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3549       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3550                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3551                        Val, ValRange);
3552     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3553       PARSE_BITS_ENTRY(
3554           KD.compute_pgm_rsrc2,
3555           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3556           ValRange);
3557     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3558       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3559                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3560                        Val, ValRange);
3561     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3562       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3563                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3564                        Val, ValRange);
3565     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3566       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3567                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3568                        Val, ValRange);
3569     } else if (ID == ".amdhsa_exception_int_div_zero") {
3570       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3571                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3572                        Val, ValRange);
3573     } else {
3574       return getParser().Error(IDRange.Start,
3575                                "unknown .amdhsa_kernel directive", IDRange);
3576     }
3577 
3578 #undef PARSE_BITS_ENTRY
3579   }
3580 
3581   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3582     return TokError(".amdhsa_next_free_vgpr directive is required");
3583 
3584   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3585     return TokError(".amdhsa_next_free_sgpr directive is required");
3586 
3587   unsigned VGPRBlocks;
3588   unsigned SGPRBlocks;
3589   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3590                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3591                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3592                          SGPRBlocks))
3593     return true;
3594 
3595   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3596           VGPRBlocks))
3597     return OutOfRangeError(VGPRRange);
3598   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3599                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3600 
3601   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3602           SGPRBlocks))
3603     return OutOfRangeError(SGPRRange);
3604   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3605                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3606                   SGPRBlocks);
3607 
3608   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3609     return TokError("too many user SGPRs enabled");
3610   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3611                   UserSGPRCount);
3612 
3613   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3614       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3615       ReserveFlatScr, ReserveXNACK);
3616   return false;
3617 }
3618 
3619 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3620   uint32_t Major;
3621   uint32_t Minor;
3622 
3623   if (ParseDirectiveMajorMinor(Major, Minor))
3624     return true;
3625 
3626   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3627   return false;
3628 }
3629 
3630 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3631   uint32_t Major;
3632   uint32_t Minor;
3633   uint32_t Stepping;
3634   StringRef VendorName;
3635   StringRef ArchName;
3636 
3637   // If this directive has no arguments, then use the ISA version for the
3638   // targeted GPU.
3639   if (getLexer().is(AsmToken::EndOfStatement)) {
3640     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3641     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3642                                                       ISA.Stepping,
3643                                                       "AMD", "AMDGPU");
3644     return false;
3645   }
3646 
3647   if (ParseDirectiveMajorMinor(Major, Minor))
3648     return true;
3649 
3650   if (getLexer().isNot(AsmToken::Comma))
3651     return TokError("stepping version number required, comma expected");
3652   Lex();
3653 
3654   if (ParseAsAbsoluteExpression(Stepping))
3655     return TokError("invalid stepping version");
3656 
3657   if (getLexer().isNot(AsmToken::Comma))
3658     return TokError("vendor name required, comma expected");
3659   Lex();
3660 
3661   if (getLexer().isNot(AsmToken::String))
3662     return TokError("invalid vendor name");
3663 
3664   VendorName = getLexer().getTok().getStringContents();
3665   Lex();
3666 
3667   if (getLexer().isNot(AsmToken::Comma))
3668     return TokError("arch name required, comma expected");
3669   Lex();
3670 
3671   if (getLexer().isNot(AsmToken::String))
3672     return TokError("invalid arch name");
3673 
3674   ArchName = getLexer().getTok().getStringContents();
3675   Lex();
3676 
3677   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3678                                                     VendorName, ArchName);
3679   return false;
3680 }
3681 
3682 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3683                                                amd_kernel_code_t &Header) {
3684   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3685   // assembly for backwards compatibility.
3686   if (ID == "max_scratch_backing_memory_byte_size") {
3687     Parser.eatToEndOfStatement();
3688     return false;
3689   }
3690 
3691   SmallString<40> ErrStr;
3692   raw_svector_ostream Err(ErrStr);
3693   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3694     return TokError(Err.str());
3695   }
3696   Lex();
3697 
3698   if (ID == "enable_wavefront_size32") {
3699     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
3700       if (!isGFX10())
3701         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
3702       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3703         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
3704     } else {
3705       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3706         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
3707     }
3708   }
3709 
3710   if (ID == "wavefront_size") {
3711     if (Header.wavefront_size == 5) {
3712       if (!isGFX10())
3713         return TokError("wavefront_size=5 is only allowed on GFX10+");
3714       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3715         return TokError("wavefront_size=5 requires +WavefrontSize32");
3716     } else if (Header.wavefront_size == 6) {
3717       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3718         return TokError("wavefront_size=6 requires +WavefrontSize64");
3719     }
3720   }
3721 
3722   if (ID == "enable_wgp_mode") {
3723     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3724       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3725   }
3726 
3727   if (ID == "enable_mem_ordered") {
3728     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3729       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3730   }
3731 
3732   if (ID == "enable_fwd_progress") {
3733     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3734       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3735   }
3736 
3737   return false;
3738 }
3739 
3740 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3741   amd_kernel_code_t Header;
3742   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3743 
3744   while (true) {
3745     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3746     // will set the current token to EndOfStatement.
3747     while(getLexer().is(AsmToken::EndOfStatement))
3748       Lex();
3749 
3750     if (getLexer().isNot(AsmToken::Identifier))
3751       return TokError("expected value identifier or .end_amd_kernel_code_t");
3752 
3753     StringRef ID = getLexer().getTok().getIdentifier();
3754     Lex();
3755 
3756     if (ID == ".end_amd_kernel_code_t")
3757       break;
3758 
3759     if (ParseAMDKernelCodeTValue(ID, Header))
3760       return true;
3761   }
3762 
3763   getTargetStreamer().EmitAMDKernelCodeT(Header);
3764 
3765   return false;
3766 }
3767 
3768 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3769   if (getLexer().isNot(AsmToken::Identifier))
3770     return TokError("expected symbol name");
3771 
3772   StringRef KernelName = Parser.getTok().getString();
3773 
3774   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3775                                            ELF::STT_AMDGPU_HSA_KERNEL);
3776   Lex();
3777   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3778     KernelScope.initialize(getContext());
3779   return false;
3780 }
3781 
3782 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3783   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3784     return Error(getParser().getTok().getLoc(),
3785                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3786                  "architectures");
3787   }
3788 
3789   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3790 
3791   std::string ISAVersionStringFromSTI;
3792   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3793   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3794 
3795   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3796     return Error(getParser().getTok().getLoc(),
3797                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3798                  "arguments specified through the command line");
3799   }
3800 
3801   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3802   Lex();
3803 
3804   return false;
3805 }
3806 
3807 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3808   const char *AssemblerDirectiveBegin;
3809   const char *AssemblerDirectiveEnd;
3810   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3811       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3812           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3813                             HSAMD::V3::AssemblerDirectiveEnd)
3814           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3815                             HSAMD::AssemblerDirectiveEnd);
3816 
3817   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3818     return Error(getParser().getTok().getLoc(),
3819                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3820                  "not available on non-amdhsa OSes")).str());
3821   }
3822 
3823   std::string HSAMetadataString;
3824   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
3825                           HSAMetadataString))
3826     return true;
3827 
3828   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3829     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3830       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3831   } else {
3832     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3833       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3834   }
3835 
3836   return false;
3837 }
3838 
3839 /// Common code to parse out a block of text (typically YAML) between start and
3840 /// end directives.
3841 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
3842                                           const char *AssemblerDirectiveEnd,
3843                                           std::string &CollectString) {
3844 
3845   raw_string_ostream CollectStream(CollectString);
3846 
3847   getLexer().setSkipSpace(false);
3848 
3849   bool FoundEnd = false;
3850   while (!getLexer().is(AsmToken::Eof)) {
3851     while (getLexer().is(AsmToken::Space)) {
3852       CollectStream << getLexer().getTok().getString();
3853       Lex();
3854     }
3855 
3856     if (getLexer().is(AsmToken::Identifier)) {
3857       StringRef ID = getLexer().getTok().getIdentifier();
3858       if (ID == AssemblerDirectiveEnd) {
3859         Lex();
3860         FoundEnd = true;
3861         break;
3862       }
3863     }
3864 
3865     CollectStream << Parser.parseStringToEndOfStatement()
3866                   << getContext().getAsmInfo()->getSeparatorString();
3867 
3868     Parser.eatToEndOfStatement();
3869   }
3870 
3871   getLexer().setSkipSpace(true);
3872 
3873   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3874     return TokError(Twine("expected directive ") +
3875                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
3876   }
3877 
3878   CollectStream.flush();
3879   return false;
3880 }
3881 
3882 /// Parse the assembler directive for new MsgPack-format PAL metadata.
3883 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
3884   std::string String;
3885   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
3886                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
3887     return true;
3888 
3889   auto PALMetadata = getTargetStreamer().getPALMetadata();
3890   if (!PALMetadata->setFromString(String))
3891     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
3892   return false;
3893 }
3894 
3895 /// Parse the assembler directive for old linear-format PAL metadata.
3896 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3897   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3898     return Error(getParser().getTok().getLoc(),
3899                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3900                  "not available on non-amdpal OSes")).str());
3901   }
3902 
3903   auto PALMetadata = getTargetStreamer().getPALMetadata();
3904   PALMetadata->setLegacy();
3905   for (;;) {
3906     uint32_t Key, Value;
3907     if (ParseAsAbsoluteExpression(Key)) {
3908       return TokError(Twine("invalid value in ") +
3909                       Twine(PALMD::AssemblerDirective));
3910     }
3911     if (getLexer().isNot(AsmToken::Comma)) {
3912       return TokError(Twine("expected an even number of values in ") +
3913                       Twine(PALMD::AssemblerDirective));
3914     }
3915     Lex();
3916     if (ParseAsAbsoluteExpression(Value)) {
3917       return TokError(Twine("invalid value in ") +
3918                       Twine(PALMD::AssemblerDirective));
3919     }
3920     PALMetadata->setRegister(Key, Value);
3921     if (getLexer().isNot(AsmToken::Comma))
3922       break;
3923     Lex();
3924   }
3925   return false;
3926 }
3927 
3928 /// ParseDirectiveAMDGPULDS
3929 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
3930 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
3931   if (getParser().checkForValidSection())
3932     return true;
3933 
3934   StringRef Name;
3935   SMLoc NameLoc = getLexer().getLoc();
3936   if (getParser().parseIdentifier(Name))
3937     return TokError("expected identifier in directive");
3938 
3939   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
3940   if (parseToken(AsmToken::Comma, "expected ','"))
3941     return true;
3942 
3943   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
3944 
3945   int64_t Size;
3946   SMLoc SizeLoc = getLexer().getLoc();
3947   if (getParser().parseAbsoluteExpression(Size))
3948     return true;
3949   if (Size < 0)
3950     return Error(SizeLoc, "size must be non-negative");
3951   if (Size > LocalMemorySize)
3952     return Error(SizeLoc, "size is too large");
3953 
3954   int64_t Align = 4;
3955   if (getLexer().is(AsmToken::Comma)) {
3956     Lex();
3957     SMLoc AlignLoc = getLexer().getLoc();
3958     if (getParser().parseAbsoluteExpression(Align))
3959       return true;
3960     if (Align < 0 || !isPowerOf2_64(Align))
3961       return Error(AlignLoc, "alignment must be a power of two");
3962 
3963     // Alignment larger than the size of LDS is possible in theory, as long
3964     // as the linker manages to place to symbol at address 0, but we do want
3965     // to make sure the alignment fits nicely into a 32-bit integer.
3966     if (Align >= 1u << 31)
3967       return Error(AlignLoc, "alignment is too large");
3968   }
3969 
3970   if (parseToken(AsmToken::EndOfStatement,
3971                  "unexpected token in '.amdgpu_lds' directive"))
3972     return true;
3973 
3974   Symbol->redefineIfPossible();
3975   if (!Symbol->isUndefined())
3976     return Error(NameLoc, "invalid symbol redefinition");
3977 
3978   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
3979   return false;
3980 }
3981 
3982 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3983   StringRef IDVal = DirectiveID.getString();
3984 
3985   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3986     if (IDVal == ".amdgcn_target")
3987       return ParseDirectiveAMDGCNTarget();
3988 
3989     if (IDVal == ".amdhsa_kernel")
3990       return ParseDirectiveAMDHSAKernel();
3991 
3992     // TODO: Restructure/combine with PAL metadata directive.
3993     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3994       return ParseDirectiveHSAMetadata();
3995   } else {
3996     if (IDVal == ".hsa_code_object_version")
3997       return ParseDirectiveHSACodeObjectVersion();
3998 
3999     if (IDVal == ".hsa_code_object_isa")
4000       return ParseDirectiveHSACodeObjectISA();
4001 
4002     if (IDVal == ".amd_kernel_code_t")
4003       return ParseDirectiveAMDKernelCodeT();
4004 
4005     if (IDVal == ".amdgpu_hsa_kernel")
4006       return ParseDirectiveAMDGPUHsaKernel();
4007 
4008     if (IDVal == ".amd_amdgpu_isa")
4009       return ParseDirectiveISAVersion();
4010 
4011     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4012       return ParseDirectiveHSAMetadata();
4013   }
4014 
4015   if (IDVal == ".amdgpu_lds")
4016     return ParseDirectiveAMDGPULDS();
4017 
4018   if (IDVal == PALMD::AssemblerDirectiveBegin)
4019     return ParseDirectivePALMetadataBegin();
4020 
4021   if (IDVal == PALMD::AssemblerDirective)
4022     return ParseDirectivePALMetadata();
4023 
4024   return true;
4025 }
4026 
4027 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4028                                            unsigned RegNo) const {
4029 
4030   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4031        R.isValid(); ++R) {
4032     if (*R == RegNo)
4033       return isGFX9() || isGFX10();
4034   }
4035 
4036   // GFX10 has 2 more SGPRs 104 and 105.
4037   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4038        R.isValid(); ++R) {
4039     if (*R == RegNo)
4040       return hasSGPR104_SGPR105();
4041   }
4042 
4043   switch (RegNo) {
4044   case AMDGPU::SRC_SHARED_BASE:
4045   case AMDGPU::SRC_SHARED_LIMIT:
4046   case AMDGPU::SRC_PRIVATE_BASE:
4047   case AMDGPU::SRC_PRIVATE_LIMIT:
4048   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4049     return !isCI() && !isSI() && !isVI();
4050   case AMDGPU::TBA:
4051   case AMDGPU::TBA_LO:
4052   case AMDGPU::TBA_HI:
4053   case AMDGPU::TMA:
4054   case AMDGPU::TMA_LO:
4055   case AMDGPU::TMA_HI:
4056     return !isGFX9() && !isGFX10();
4057   case AMDGPU::XNACK_MASK:
4058   case AMDGPU::XNACK_MASK_LO:
4059   case AMDGPU::XNACK_MASK_HI:
4060     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4061   case AMDGPU::SGPR_NULL:
4062     return isGFX10();
4063   default:
4064     break;
4065   }
4066 
4067   if (isCI())
4068     return true;
4069 
4070   if (isSI() || isGFX10()) {
4071     // No flat_scr on SI.
4072     // On GFX10 flat scratch is not a valid register operand and can only be
4073     // accessed with s_setreg/s_getreg.
4074     switch (RegNo) {
4075     case AMDGPU::FLAT_SCR:
4076     case AMDGPU::FLAT_SCR_LO:
4077     case AMDGPU::FLAT_SCR_HI:
4078       return false;
4079     default:
4080       return true;
4081     }
4082   }
4083 
4084   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4085   // SI/CI have.
4086   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4087        R.isValid(); ++R) {
4088     if (*R == RegNo)
4089       return hasSGPR102_SGPR103();
4090   }
4091 
4092   return true;
4093 }
4094 
4095 OperandMatchResultTy
4096 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4097                               OperandMode Mode) {
4098   // Try to parse with a custom parser
4099   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4100 
4101   // If we successfully parsed the operand or if there as an error parsing,
4102   // we are done.
4103   //
4104   // If we are parsing after we reach EndOfStatement then this means we
4105   // are appending default values to the Operands list.  This is only done
4106   // by custom parser, so we shouldn't continue on to the generic parsing.
4107   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4108       getLexer().is(AsmToken::EndOfStatement))
4109     return ResTy;
4110 
4111   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4112     unsigned Prefix = Operands.size();
4113     SMLoc LBraceLoc = getTok().getLoc();
4114     Parser.Lex(); // eat the '['
4115 
4116     for (;;) {
4117       ResTy = parseReg(Operands);
4118       if (ResTy != MatchOperand_Success)
4119         return ResTy;
4120 
4121       if (getLexer().is(AsmToken::RBrac))
4122         break;
4123 
4124       if (getLexer().isNot(AsmToken::Comma))
4125         return MatchOperand_ParseFail;
4126       Parser.Lex();
4127     }
4128 
4129     if (Operands.size() - Prefix > 1) {
4130       Operands.insert(Operands.begin() + Prefix,
4131                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4132       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4133                                                     getTok().getLoc()));
4134     }
4135 
4136     Parser.Lex(); // eat the ']'
4137     return MatchOperand_Success;
4138   }
4139 
4140   return parseRegOrImm(Operands);
4141 }
4142 
4143 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4144   // Clear any forced encodings from the previous instruction.
4145   setForcedEncodingSize(0);
4146   setForcedDPP(false);
4147   setForcedSDWA(false);
4148 
4149   if (Name.endswith("_e64")) {
4150     setForcedEncodingSize(64);
4151     return Name.substr(0, Name.size() - 4);
4152   } else if (Name.endswith("_e32")) {
4153     setForcedEncodingSize(32);
4154     return Name.substr(0, Name.size() - 4);
4155   } else if (Name.endswith("_dpp")) {
4156     setForcedDPP(true);
4157     return Name.substr(0, Name.size() - 4);
4158   } else if (Name.endswith("_sdwa")) {
4159     setForcedSDWA(true);
4160     return Name.substr(0, Name.size() - 5);
4161   }
4162   return Name;
4163 }
4164 
4165 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4166                                        StringRef Name,
4167                                        SMLoc NameLoc, OperandVector &Operands) {
4168   // Add the instruction mnemonic
4169   Name = parseMnemonicSuffix(Name);
4170   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4171 
4172   bool IsMIMG = Name.startswith("image_");
4173 
4174   while (!getLexer().is(AsmToken::EndOfStatement)) {
4175     OperandMode Mode = OperandMode_Default;
4176     if (IsMIMG && isGFX10() && Operands.size() == 2)
4177       Mode = OperandMode_NSA;
4178     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4179 
4180     // Eat the comma or space if there is one.
4181     if (getLexer().is(AsmToken::Comma))
4182       Parser.Lex();
4183 
4184     switch (Res) {
4185       case MatchOperand_Success: break;
4186       case MatchOperand_ParseFail:
4187         // FIXME: use real operand location rather than the current location.
4188         Error(getLexer().getLoc(), "failed parsing operand.");
4189         while (!getLexer().is(AsmToken::EndOfStatement)) {
4190           Parser.Lex();
4191         }
4192         return true;
4193       case MatchOperand_NoMatch:
4194         // FIXME: use real operand location rather than the current location.
4195         Error(getLexer().getLoc(), "not a valid operand.");
4196         while (!getLexer().is(AsmToken::EndOfStatement)) {
4197           Parser.Lex();
4198         }
4199         return true;
4200     }
4201   }
4202 
4203   return false;
4204 }
4205 
4206 //===----------------------------------------------------------------------===//
4207 // Utility functions
4208 //===----------------------------------------------------------------------===//
4209 
4210 OperandMatchResultTy
4211 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4212 
4213   if (!trySkipId(Prefix, AsmToken::Colon))
4214     return MatchOperand_NoMatch;
4215 
4216   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4217 }
4218 
4219 OperandMatchResultTy
4220 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4221                                     AMDGPUOperand::ImmTy ImmTy,
4222                                     bool (*ConvertResult)(int64_t&)) {
4223   SMLoc S = getLoc();
4224   int64_t Value = 0;
4225 
4226   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4227   if (Res != MatchOperand_Success)
4228     return Res;
4229 
4230   if (ConvertResult && !ConvertResult(Value)) {
4231     Error(S, "invalid " + StringRef(Prefix) + " value.");
4232   }
4233 
4234   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4235   return MatchOperand_Success;
4236 }
4237 
4238 OperandMatchResultTy
4239 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4240                                              OperandVector &Operands,
4241                                              AMDGPUOperand::ImmTy ImmTy,
4242                                              bool (*ConvertResult)(int64_t&)) {
4243   SMLoc S = getLoc();
4244   if (!trySkipId(Prefix, AsmToken::Colon))
4245     return MatchOperand_NoMatch;
4246 
4247   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4248     return MatchOperand_ParseFail;
4249 
4250   unsigned Val = 0;
4251   const unsigned MaxSize = 4;
4252 
4253   // FIXME: How to verify the number of elements matches the number of src
4254   // operands?
4255   for (int I = 0; ; ++I) {
4256     int64_t Op;
4257     SMLoc Loc = getLoc();
4258     if (!parseExpr(Op))
4259       return MatchOperand_ParseFail;
4260 
4261     if (Op != 0 && Op != 1) {
4262       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4263       return MatchOperand_ParseFail;
4264     }
4265 
4266     Val |= (Op << I);
4267 
4268     if (trySkipToken(AsmToken::RBrac))
4269       break;
4270 
4271     if (I + 1 == MaxSize) {
4272       Error(getLoc(), "expected a closing square bracket");
4273       return MatchOperand_ParseFail;
4274     }
4275 
4276     if (!skipToken(AsmToken::Comma, "expected a comma"))
4277       return MatchOperand_ParseFail;
4278   }
4279 
4280   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4281   return MatchOperand_Success;
4282 }
4283 
4284 OperandMatchResultTy
4285 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4286                                AMDGPUOperand::ImmTy ImmTy) {
4287   int64_t Bit = 0;
4288   SMLoc S = Parser.getTok().getLoc();
4289 
4290   // We are at the end of the statement, and this is a default argument, so
4291   // use a default value.
4292   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4293     switch(getLexer().getKind()) {
4294       case AsmToken::Identifier: {
4295         StringRef Tok = Parser.getTok().getString();
4296         if (Tok == Name) {
4297           if (Tok == "r128" && isGFX9())
4298             Error(S, "r128 modifier is not supported on this GPU");
4299           if (Tok == "a16" && !isGFX9())
4300             Error(S, "a16 modifier is not supported on this GPU");
4301           Bit = 1;
4302           Parser.Lex();
4303         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4304           Bit = 0;
4305           Parser.Lex();
4306         } else {
4307           return MatchOperand_NoMatch;
4308         }
4309         break;
4310       }
4311       default:
4312         return MatchOperand_NoMatch;
4313     }
4314   }
4315 
4316   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4317     return MatchOperand_ParseFail;
4318 
4319   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4320   return MatchOperand_Success;
4321 }
4322 
4323 static void addOptionalImmOperand(
4324   MCInst& Inst, const OperandVector& Operands,
4325   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4326   AMDGPUOperand::ImmTy ImmT,
4327   int64_t Default = 0) {
4328   auto i = OptionalIdx.find(ImmT);
4329   if (i != OptionalIdx.end()) {
4330     unsigned Idx = i->second;
4331     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4332   } else {
4333     Inst.addOperand(MCOperand::createImm(Default));
4334   }
4335 }
4336 
4337 OperandMatchResultTy
4338 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4339   if (getLexer().isNot(AsmToken::Identifier)) {
4340     return MatchOperand_NoMatch;
4341   }
4342   StringRef Tok = Parser.getTok().getString();
4343   if (Tok != Prefix) {
4344     return MatchOperand_NoMatch;
4345   }
4346 
4347   Parser.Lex();
4348   if (getLexer().isNot(AsmToken::Colon)) {
4349     return MatchOperand_ParseFail;
4350   }
4351 
4352   Parser.Lex();
4353   if (getLexer().isNot(AsmToken::Identifier)) {
4354     return MatchOperand_ParseFail;
4355   }
4356 
4357   Value = Parser.getTok().getString();
4358   return MatchOperand_Success;
4359 }
4360 
4361 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4362 // values to live in a joint format operand in the MCInst encoding.
4363 OperandMatchResultTy
4364 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4365   SMLoc S = Parser.getTok().getLoc();
4366   int64_t Dfmt = 0, Nfmt = 0;
4367   // dfmt and nfmt can appear in either order, and each is optional.
4368   bool GotDfmt = false, GotNfmt = false;
4369   while (!GotDfmt || !GotNfmt) {
4370     if (!GotDfmt) {
4371       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4372       if (Res != MatchOperand_NoMatch) {
4373         if (Res != MatchOperand_Success)
4374           return Res;
4375         if (Dfmt >= 16) {
4376           Error(Parser.getTok().getLoc(), "out of range dfmt");
4377           return MatchOperand_ParseFail;
4378         }
4379         GotDfmt = true;
4380         Parser.Lex();
4381         continue;
4382       }
4383     }
4384     if (!GotNfmt) {
4385       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4386       if (Res != MatchOperand_NoMatch) {
4387         if (Res != MatchOperand_Success)
4388           return Res;
4389         if (Nfmt >= 8) {
4390           Error(Parser.getTok().getLoc(), "out of range nfmt");
4391           return MatchOperand_ParseFail;
4392         }
4393         GotNfmt = true;
4394         Parser.Lex();
4395         continue;
4396       }
4397     }
4398     break;
4399   }
4400   if (!GotDfmt && !GotNfmt)
4401     return MatchOperand_NoMatch;
4402   auto Format = Dfmt | Nfmt << 4;
4403   Operands.push_back(
4404       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4405   return MatchOperand_Success;
4406 }
4407 
4408 //===----------------------------------------------------------------------===//
4409 // ds
4410 //===----------------------------------------------------------------------===//
4411 
4412 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4413                                     const OperandVector &Operands) {
4414   OptionalImmIndexMap OptionalIdx;
4415 
4416   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4417     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4418 
4419     // Add the register arguments
4420     if (Op.isReg()) {
4421       Op.addRegOperands(Inst, 1);
4422       continue;
4423     }
4424 
4425     // Handle optional arguments
4426     OptionalIdx[Op.getImmTy()] = i;
4427   }
4428 
4429   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4430   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4431   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4432 
4433   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4434 }
4435 
4436 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4437                                 bool IsGdsHardcoded) {
4438   OptionalImmIndexMap OptionalIdx;
4439 
4440   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4441     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4442 
4443     // Add the register arguments
4444     if (Op.isReg()) {
4445       Op.addRegOperands(Inst, 1);
4446       continue;
4447     }
4448 
4449     if (Op.isToken() && Op.getToken() == "gds") {
4450       IsGdsHardcoded = true;
4451       continue;
4452     }
4453 
4454     // Handle optional arguments
4455     OptionalIdx[Op.getImmTy()] = i;
4456   }
4457 
4458   AMDGPUOperand::ImmTy OffsetType =
4459     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4460      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4461      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4462                                                       AMDGPUOperand::ImmTyOffset;
4463 
4464   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4465 
4466   if (!IsGdsHardcoded) {
4467     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4468   }
4469   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4470 }
4471 
4472 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4473   OptionalImmIndexMap OptionalIdx;
4474 
4475   unsigned OperandIdx[4];
4476   unsigned EnMask = 0;
4477   int SrcIdx = 0;
4478 
4479   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4480     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4481 
4482     // Add the register arguments
4483     if (Op.isReg()) {
4484       assert(SrcIdx < 4);
4485       OperandIdx[SrcIdx] = Inst.size();
4486       Op.addRegOperands(Inst, 1);
4487       ++SrcIdx;
4488       continue;
4489     }
4490 
4491     if (Op.isOff()) {
4492       assert(SrcIdx < 4);
4493       OperandIdx[SrcIdx] = Inst.size();
4494       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4495       ++SrcIdx;
4496       continue;
4497     }
4498 
4499     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4500       Op.addImmOperands(Inst, 1);
4501       continue;
4502     }
4503 
4504     if (Op.isToken() && Op.getToken() == "done")
4505       continue;
4506 
4507     // Handle optional arguments
4508     OptionalIdx[Op.getImmTy()] = i;
4509   }
4510 
4511   assert(SrcIdx == 4);
4512 
4513   bool Compr = false;
4514   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4515     Compr = true;
4516     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4517     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4518     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4519   }
4520 
4521   for (auto i = 0; i < SrcIdx; ++i) {
4522     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4523       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4524     }
4525   }
4526 
4527   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4528   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4529 
4530   Inst.addOperand(MCOperand::createImm(EnMask));
4531 }
4532 
4533 //===----------------------------------------------------------------------===//
4534 // s_waitcnt
4535 //===----------------------------------------------------------------------===//
4536 
4537 static bool
4538 encodeCnt(
4539   const AMDGPU::IsaVersion ISA,
4540   int64_t &IntVal,
4541   int64_t CntVal,
4542   bool Saturate,
4543   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4544   unsigned (*decode)(const IsaVersion &Version, unsigned))
4545 {
4546   bool Failed = false;
4547 
4548   IntVal = encode(ISA, IntVal, CntVal);
4549   if (CntVal != decode(ISA, IntVal)) {
4550     if (Saturate) {
4551       IntVal = encode(ISA, IntVal, -1);
4552     } else {
4553       Failed = true;
4554     }
4555   }
4556   return Failed;
4557 }
4558 
4559 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4560 
4561   SMLoc CntLoc = getLoc();
4562   StringRef CntName = getTokenStr();
4563 
4564   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4565       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4566     return false;
4567 
4568   int64_t CntVal;
4569   SMLoc ValLoc = getLoc();
4570   if (!parseExpr(CntVal))
4571     return false;
4572 
4573   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4574 
4575   bool Failed = true;
4576   bool Sat = CntName.endswith("_sat");
4577 
4578   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4579     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4580   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4581     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4582   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4583     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4584   } else {
4585     Error(CntLoc, "invalid counter name " + CntName);
4586     return false;
4587   }
4588 
4589   if (Failed) {
4590     Error(ValLoc, "too large value for " + CntName);
4591     return false;
4592   }
4593 
4594   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4595     return false;
4596 
4597   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4598     if (isToken(AsmToken::EndOfStatement)) {
4599       Error(getLoc(), "expected a counter name");
4600       return false;
4601     }
4602   }
4603 
4604   return true;
4605 }
4606 
4607 OperandMatchResultTy
4608 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4609   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4610   int64_t Waitcnt = getWaitcntBitMask(ISA);
4611   SMLoc S = getLoc();
4612 
4613   // If parse failed, do not return error code
4614   // to avoid excessive error messages.
4615   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4616     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4617   } else {
4618     parseExpr(Waitcnt);
4619   }
4620 
4621   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4622   return MatchOperand_Success;
4623 }
4624 
4625 bool
4626 AMDGPUOperand::isSWaitCnt() const {
4627   return isImm();
4628 }
4629 
4630 //===----------------------------------------------------------------------===//
4631 // hwreg
4632 //===----------------------------------------------------------------------===//
4633 
4634 bool
4635 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4636                                 int64_t &Offset,
4637                                 int64_t &Width) {
4638   using namespace llvm::AMDGPU::Hwreg;
4639 
4640   // The register may be specified by name or using a numeric code
4641   if (isToken(AsmToken::Identifier) &&
4642       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4643     HwReg.IsSymbolic = true;
4644     lex(); // skip message name
4645   } else if (!parseExpr(HwReg.Id)) {
4646     return false;
4647   }
4648 
4649   if (trySkipToken(AsmToken::RParen))
4650     return true;
4651 
4652   // parse optional params
4653   return
4654     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4655     parseExpr(Offset) &&
4656     skipToken(AsmToken::Comma, "expected a comma") &&
4657     parseExpr(Width) &&
4658     skipToken(AsmToken::RParen, "expected a closing parenthesis");
4659 }
4660 
4661 void
4662 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4663                                const int64_t Offset,
4664                                const int64_t Width,
4665                                const SMLoc Loc) {
4666 
4667   using namespace llvm::AMDGPU::Hwreg;
4668 
4669   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4670     Error(Loc, "specified hardware register is not supported on this GPU");
4671   } else if (!isValidHwreg(HwReg.Id)) {
4672     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4673   } else if (!isValidHwregOffset(Offset)) {
4674     Error(Loc, "invalid bit offset: only 5-bit values are legal");
4675   } else if (!isValidHwregWidth(Width)) {
4676     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4677   }
4678 }
4679 
4680 OperandMatchResultTy
4681 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4682   using namespace llvm::AMDGPU::Hwreg;
4683 
4684   int64_t ImmVal = 0;
4685   SMLoc Loc = getLoc();
4686 
4687   // If parse failed, do not return error code
4688   // to avoid excessive error messages.
4689   if (trySkipId("hwreg", AsmToken::LParen)) {
4690     OperandInfoTy HwReg(ID_UNKNOWN_);
4691     int64_t Offset = OFFSET_DEFAULT_;
4692     int64_t Width = WIDTH_DEFAULT_;
4693     if (parseHwregBody(HwReg, Offset, Width)) {
4694       validateHwreg(HwReg, Offset, Width, Loc);
4695       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
4696     }
4697   } else if (parseExpr(ImmVal)) {
4698     if (ImmVal < 0 || !isUInt<16>(ImmVal))
4699       Error(Loc, "invalid immediate: only 16-bit values are legal");
4700   }
4701 
4702   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
4703   return MatchOperand_Success;
4704 }
4705 
4706 bool AMDGPUOperand::isHwreg() const {
4707   return isImmTy(ImmTyHwreg);
4708 }
4709 
4710 //===----------------------------------------------------------------------===//
4711 // sendmsg
4712 //===----------------------------------------------------------------------===//
4713 
4714 bool
4715 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
4716                                   OperandInfoTy &Op,
4717                                   OperandInfoTy &Stream) {
4718   using namespace llvm::AMDGPU::SendMsg;
4719 
4720   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
4721     Msg.IsSymbolic = true;
4722     lex(); // skip message name
4723   } else if (!parseExpr(Msg.Id)) {
4724     return false;
4725   }
4726 
4727   if (trySkipToken(AsmToken::Comma)) {
4728     Op.IsDefined = true;
4729     if (isToken(AsmToken::Identifier) &&
4730         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
4731       lex(); // skip operation name
4732     } else if (!parseExpr(Op.Id)) {
4733       return false;
4734     }
4735 
4736     if (trySkipToken(AsmToken::Comma)) {
4737       Stream.IsDefined = true;
4738       if (!parseExpr(Stream.Id))
4739         return false;
4740     }
4741   }
4742 
4743   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
4744 }
4745 
4746 bool
4747 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
4748                                  const OperandInfoTy &Op,
4749                                  const OperandInfoTy &Stream,
4750                                  const SMLoc S) {
4751   using namespace llvm::AMDGPU::SendMsg;
4752 
4753   // Validation strictness depends on whether message is specified
4754   // in a symbolc or in a numeric form. In the latter case
4755   // only encoding possibility is checked.
4756   bool Strict = Msg.IsSymbolic;
4757 
4758   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
4759     Error(S, "invalid message id");
4760     return false;
4761   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
4762     Error(S, Op.IsDefined ?
4763              "message does not support operations" :
4764              "missing message operation");
4765     return false;
4766   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
4767     Error(S, "invalid operation id");
4768     return false;
4769   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
4770     Error(S, "message operation does not support streams");
4771     return false;
4772   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
4773     Error(S, "invalid message stream id");
4774     return false;
4775   }
4776   return true;
4777 }
4778 
4779 OperandMatchResultTy
4780 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4781   using namespace llvm::AMDGPU::SendMsg;
4782 
4783   int64_t ImmVal = 0;
4784   SMLoc Loc = getLoc();
4785 
4786   // If parse failed, do not return error code
4787   // to avoid excessive error messages.
4788   if (trySkipId("sendmsg", AsmToken::LParen)) {
4789     OperandInfoTy Msg(ID_UNKNOWN_);
4790     OperandInfoTy Op(OP_NONE_);
4791     OperandInfoTy Stream(STREAM_ID_NONE_);
4792     if (parseSendMsgBody(Msg, Op, Stream) &&
4793         validateSendMsg(Msg, Op, Stream, Loc)) {
4794       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
4795     }
4796   } else if (parseExpr(ImmVal)) {
4797     if (ImmVal < 0 || !isUInt<16>(ImmVal))
4798       Error(Loc, "invalid immediate: only 16-bit values are legal");
4799   }
4800 
4801   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
4802   return MatchOperand_Success;
4803 }
4804 
4805 bool AMDGPUOperand::isSendMsg() const {
4806   return isImmTy(ImmTySendMsg);
4807 }
4808 
4809 //===----------------------------------------------------------------------===//
4810 // v_interp
4811 //===----------------------------------------------------------------------===//
4812 
4813 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4814   if (getLexer().getKind() != AsmToken::Identifier)
4815     return MatchOperand_NoMatch;
4816 
4817   StringRef Str = Parser.getTok().getString();
4818   int Slot = StringSwitch<int>(Str)
4819     .Case("p10", 0)
4820     .Case("p20", 1)
4821     .Case("p0", 2)
4822     .Default(-1);
4823 
4824   SMLoc S = Parser.getTok().getLoc();
4825   if (Slot == -1)
4826     return MatchOperand_ParseFail;
4827 
4828   Parser.Lex();
4829   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4830                                               AMDGPUOperand::ImmTyInterpSlot));
4831   return MatchOperand_Success;
4832 }
4833 
4834 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4835   if (getLexer().getKind() != AsmToken::Identifier)
4836     return MatchOperand_NoMatch;
4837 
4838   StringRef Str = Parser.getTok().getString();
4839   if (!Str.startswith("attr"))
4840     return MatchOperand_NoMatch;
4841 
4842   StringRef Chan = Str.take_back(2);
4843   int AttrChan = StringSwitch<int>(Chan)
4844     .Case(".x", 0)
4845     .Case(".y", 1)
4846     .Case(".z", 2)
4847     .Case(".w", 3)
4848     .Default(-1);
4849   if (AttrChan == -1)
4850     return MatchOperand_ParseFail;
4851 
4852   Str = Str.drop_back(2).drop_front(4);
4853 
4854   uint8_t Attr;
4855   if (Str.getAsInteger(10, Attr))
4856     return MatchOperand_ParseFail;
4857 
4858   SMLoc S = Parser.getTok().getLoc();
4859   Parser.Lex();
4860   if (Attr > 63) {
4861     Error(S, "out of bounds attr");
4862     return MatchOperand_Success;
4863   }
4864 
4865   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4866 
4867   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4868                                               AMDGPUOperand::ImmTyInterpAttr));
4869   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4870                                               AMDGPUOperand::ImmTyAttrChan));
4871   return MatchOperand_Success;
4872 }
4873 
4874 //===----------------------------------------------------------------------===//
4875 // exp
4876 //===----------------------------------------------------------------------===//
4877 
4878 void AMDGPUAsmParser::errorExpTgt() {
4879   Error(Parser.getTok().getLoc(), "invalid exp target");
4880 }
4881 
4882 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4883                                                       uint8_t &Val) {
4884   if (Str == "null") {
4885     Val = 9;
4886     return MatchOperand_Success;
4887   }
4888 
4889   if (Str.startswith("mrt")) {
4890     Str = Str.drop_front(3);
4891     if (Str == "z") { // == mrtz
4892       Val = 8;
4893       return MatchOperand_Success;
4894     }
4895 
4896     if (Str.getAsInteger(10, Val))
4897       return MatchOperand_ParseFail;
4898 
4899     if (Val > 7)
4900       errorExpTgt();
4901 
4902     return MatchOperand_Success;
4903   }
4904 
4905   if (Str.startswith("pos")) {
4906     Str = Str.drop_front(3);
4907     if (Str.getAsInteger(10, Val))
4908       return MatchOperand_ParseFail;
4909 
4910     if (Val > 4 || (Val == 4 && !isGFX10()))
4911       errorExpTgt();
4912 
4913     Val += 12;
4914     return MatchOperand_Success;
4915   }
4916 
4917   if (isGFX10() && Str == "prim") {
4918     Val = 20;
4919     return MatchOperand_Success;
4920   }
4921 
4922   if (Str.startswith("param")) {
4923     Str = Str.drop_front(5);
4924     if (Str.getAsInteger(10, Val))
4925       return MatchOperand_ParseFail;
4926 
4927     if (Val >= 32)
4928       errorExpTgt();
4929 
4930     Val += 32;
4931     return MatchOperand_Success;
4932   }
4933 
4934   if (Str.startswith("invalid_target_")) {
4935     Str = Str.drop_front(15);
4936     if (Str.getAsInteger(10, Val))
4937       return MatchOperand_ParseFail;
4938 
4939     errorExpTgt();
4940     return MatchOperand_Success;
4941   }
4942 
4943   return MatchOperand_NoMatch;
4944 }
4945 
4946 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4947   uint8_t Val;
4948   StringRef Str = Parser.getTok().getString();
4949 
4950   auto Res = parseExpTgtImpl(Str, Val);
4951   if (Res != MatchOperand_Success)
4952     return Res;
4953 
4954   SMLoc S = Parser.getTok().getLoc();
4955   Parser.Lex();
4956 
4957   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4958                                               AMDGPUOperand::ImmTyExpTgt));
4959   return MatchOperand_Success;
4960 }
4961 
4962 //===----------------------------------------------------------------------===//
4963 // parser helpers
4964 //===----------------------------------------------------------------------===//
4965 
4966 bool
4967 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
4968   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
4969 }
4970 
4971 bool
4972 AMDGPUAsmParser::isId(const StringRef Id) const {
4973   return isId(getToken(), Id);
4974 }
4975 
4976 bool
4977 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
4978   return getTokenKind() == Kind;
4979 }
4980 
4981 bool
4982 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4983   if (isId(Id)) {
4984     lex();
4985     return true;
4986   }
4987   return false;
4988 }
4989 
4990 bool
4991 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
4992   if (isId(Id) && peekToken().is(Kind)) {
4993     lex();
4994     lex();
4995     return true;
4996   }
4997   return false;
4998 }
4999 
5000 bool
5001 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5002   if (isToken(Kind)) {
5003     lex();
5004     return true;
5005   }
5006   return false;
5007 }
5008 
5009 bool
5010 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5011                            const StringRef ErrMsg) {
5012   if (!trySkipToken(Kind)) {
5013     Error(getLoc(), ErrMsg);
5014     return false;
5015   }
5016   return true;
5017 }
5018 
5019 bool
5020 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5021   return !getParser().parseAbsoluteExpression(Imm);
5022 }
5023 
5024 bool
5025 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5026   if (isToken(AsmToken::String)) {
5027     Val = getToken().getStringContents();
5028     lex();
5029     return true;
5030   } else {
5031     Error(getLoc(), ErrMsg);
5032     return false;
5033   }
5034 }
5035 
5036 AsmToken
5037 AMDGPUAsmParser::getToken() const {
5038   return Parser.getTok();
5039 }
5040 
5041 AsmToken
5042 AMDGPUAsmParser::peekToken() {
5043   return getLexer().peekTok();
5044 }
5045 
5046 void
5047 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5048   auto TokCount = getLexer().peekTokens(Tokens);
5049 
5050   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5051     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5052 }
5053 
5054 AsmToken::TokenKind
5055 AMDGPUAsmParser::getTokenKind() const {
5056   return getLexer().getKind();
5057 }
5058 
5059 SMLoc
5060 AMDGPUAsmParser::getLoc() const {
5061   return getToken().getLoc();
5062 }
5063 
5064 StringRef
5065 AMDGPUAsmParser::getTokenStr() const {
5066   return getToken().getString();
5067 }
5068 
5069 void
5070 AMDGPUAsmParser::lex() {
5071   Parser.Lex();
5072 }
5073 
5074 //===----------------------------------------------------------------------===//
5075 // swizzle
5076 //===----------------------------------------------------------------------===//
5077 
5078 LLVM_READNONE
5079 static unsigned
5080 encodeBitmaskPerm(const unsigned AndMask,
5081                   const unsigned OrMask,
5082                   const unsigned XorMask) {
5083   using namespace llvm::AMDGPU::Swizzle;
5084 
5085   return BITMASK_PERM_ENC |
5086          (AndMask << BITMASK_AND_SHIFT) |
5087          (OrMask  << BITMASK_OR_SHIFT)  |
5088          (XorMask << BITMASK_XOR_SHIFT);
5089 }
5090 
5091 bool
5092 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5093                                       const unsigned MinVal,
5094                                       const unsigned MaxVal,
5095                                       const StringRef ErrMsg) {
5096   for (unsigned i = 0; i < OpNum; ++i) {
5097     if (!skipToken(AsmToken::Comma, "expected a comma")){
5098       return false;
5099     }
5100     SMLoc ExprLoc = Parser.getTok().getLoc();
5101     if (!parseExpr(Op[i])) {
5102       return false;
5103     }
5104     if (Op[i] < MinVal || Op[i] > MaxVal) {
5105       Error(ExprLoc, ErrMsg);
5106       return false;
5107     }
5108   }
5109 
5110   return true;
5111 }
5112 
5113 bool
5114 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5115   using namespace llvm::AMDGPU::Swizzle;
5116 
5117   int64_t Lane[LANE_NUM];
5118   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5119                            "expected a 2-bit lane id")) {
5120     Imm = QUAD_PERM_ENC;
5121     for (unsigned I = 0; I < LANE_NUM; ++I) {
5122       Imm |= Lane[I] << (LANE_SHIFT * I);
5123     }
5124     return true;
5125   }
5126   return false;
5127 }
5128 
5129 bool
5130 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5131   using namespace llvm::AMDGPU::Swizzle;
5132 
5133   SMLoc S = Parser.getTok().getLoc();
5134   int64_t GroupSize;
5135   int64_t LaneIdx;
5136 
5137   if (!parseSwizzleOperands(1, &GroupSize,
5138                             2, 32,
5139                             "group size must be in the interval [2,32]")) {
5140     return false;
5141   }
5142   if (!isPowerOf2_64(GroupSize)) {
5143     Error(S, "group size must be a power of two");
5144     return false;
5145   }
5146   if (parseSwizzleOperands(1, &LaneIdx,
5147                            0, GroupSize - 1,
5148                            "lane id must be in the interval [0,group size - 1]")) {
5149     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5150     return true;
5151   }
5152   return false;
5153 }
5154 
5155 bool
5156 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5157   using namespace llvm::AMDGPU::Swizzle;
5158 
5159   SMLoc S = Parser.getTok().getLoc();
5160   int64_t GroupSize;
5161 
5162   if (!parseSwizzleOperands(1, &GroupSize,
5163       2, 32, "group size must be in the interval [2,32]")) {
5164     return false;
5165   }
5166   if (!isPowerOf2_64(GroupSize)) {
5167     Error(S, "group size must be a power of two");
5168     return false;
5169   }
5170 
5171   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5172   return true;
5173 }
5174 
5175 bool
5176 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5177   using namespace llvm::AMDGPU::Swizzle;
5178 
5179   SMLoc S = Parser.getTok().getLoc();
5180   int64_t GroupSize;
5181 
5182   if (!parseSwizzleOperands(1, &GroupSize,
5183       1, 16, "group size must be in the interval [1,16]")) {
5184     return false;
5185   }
5186   if (!isPowerOf2_64(GroupSize)) {
5187     Error(S, "group size must be a power of two");
5188     return false;
5189   }
5190 
5191   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5192   return true;
5193 }
5194 
5195 bool
5196 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5197   using namespace llvm::AMDGPU::Swizzle;
5198 
5199   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5200     return false;
5201   }
5202 
5203   StringRef Ctl;
5204   SMLoc StrLoc = Parser.getTok().getLoc();
5205   if (!parseString(Ctl)) {
5206     return false;
5207   }
5208   if (Ctl.size() != BITMASK_WIDTH) {
5209     Error(StrLoc, "expected a 5-character mask");
5210     return false;
5211   }
5212 
5213   unsigned AndMask = 0;
5214   unsigned OrMask = 0;
5215   unsigned XorMask = 0;
5216 
5217   for (size_t i = 0; i < Ctl.size(); ++i) {
5218     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5219     switch(Ctl[i]) {
5220     default:
5221       Error(StrLoc, "invalid mask");
5222       return false;
5223     case '0':
5224       break;
5225     case '1':
5226       OrMask |= Mask;
5227       break;
5228     case 'p':
5229       AndMask |= Mask;
5230       break;
5231     case 'i':
5232       AndMask |= Mask;
5233       XorMask |= Mask;
5234       break;
5235     }
5236   }
5237 
5238   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5239   return true;
5240 }
5241 
5242 bool
5243 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5244 
5245   SMLoc OffsetLoc = Parser.getTok().getLoc();
5246 
5247   if (!parseExpr(Imm)) {
5248     return false;
5249   }
5250   if (!isUInt<16>(Imm)) {
5251     Error(OffsetLoc, "expected a 16-bit offset");
5252     return false;
5253   }
5254   return true;
5255 }
5256 
5257 bool
5258 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5259   using namespace llvm::AMDGPU::Swizzle;
5260 
5261   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5262 
5263     SMLoc ModeLoc = Parser.getTok().getLoc();
5264     bool Ok = false;
5265 
5266     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5267       Ok = parseSwizzleQuadPerm(Imm);
5268     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5269       Ok = parseSwizzleBitmaskPerm(Imm);
5270     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5271       Ok = parseSwizzleBroadcast(Imm);
5272     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5273       Ok = parseSwizzleSwap(Imm);
5274     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5275       Ok = parseSwizzleReverse(Imm);
5276     } else {
5277       Error(ModeLoc, "expected a swizzle mode");
5278     }
5279 
5280     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5281   }
5282 
5283   return false;
5284 }
5285 
5286 OperandMatchResultTy
5287 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5288   SMLoc S = Parser.getTok().getLoc();
5289   int64_t Imm = 0;
5290 
5291   if (trySkipId("offset")) {
5292 
5293     bool Ok = false;
5294     if (skipToken(AsmToken::Colon, "expected a colon")) {
5295       if (trySkipId("swizzle")) {
5296         Ok = parseSwizzleMacro(Imm);
5297       } else {
5298         Ok = parseSwizzleOffset(Imm);
5299       }
5300     }
5301 
5302     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5303 
5304     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5305   } else {
5306     // Swizzle "offset" operand is optional.
5307     // If it is omitted, try parsing other optional operands.
5308     return parseOptionalOpr(Operands);
5309   }
5310 }
5311 
5312 bool
5313 AMDGPUOperand::isSwizzle() const {
5314   return isImmTy(ImmTySwizzle);
5315 }
5316 
5317 //===----------------------------------------------------------------------===//
5318 // VGPR Index Mode
5319 //===----------------------------------------------------------------------===//
5320 
5321 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5322 
5323   using namespace llvm::AMDGPU::VGPRIndexMode;
5324 
5325   if (trySkipToken(AsmToken::RParen)) {
5326     return OFF;
5327   }
5328 
5329   int64_t Imm = 0;
5330 
5331   while (true) {
5332     unsigned Mode = 0;
5333     SMLoc S = Parser.getTok().getLoc();
5334 
5335     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5336       if (trySkipId(IdSymbolic[ModeId])) {
5337         Mode = 1 << ModeId;
5338         break;
5339       }
5340     }
5341 
5342     if (Mode == 0) {
5343       Error(S, (Imm == 0)?
5344                "expected a VGPR index mode or a closing parenthesis" :
5345                "expected a VGPR index mode");
5346       break;
5347     }
5348 
5349     if (Imm & Mode) {
5350       Error(S, "duplicate VGPR index mode");
5351       break;
5352     }
5353     Imm |= Mode;
5354 
5355     if (trySkipToken(AsmToken::RParen))
5356       break;
5357     if (!skipToken(AsmToken::Comma,
5358                    "expected a comma or a closing parenthesis"))
5359       break;
5360   }
5361 
5362   return Imm;
5363 }
5364 
5365 OperandMatchResultTy
5366 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5367 
5368   int64_t Imm = 0;
5369   SMLoc S = Parser.getTok().getLoc();
5370 
5371   if (getLexer().getKind() == AsmToken::Identifier &&
5372       Parser.getTok().getString() == "gpr_idx" &&
5373       getLexer().peekTok().is(AsmToken::LParen)) {
5374 
5375     Parser.Lex();
5376     Parser.Lex();
5377 
5378     // If parse failed, trigger an error but do not return error code
5379     // to avoid excessive error messages.
5380     Imm = parseGPRIdxMacro();
5381 
5382   } else {
5383     if (getParser().parseAbsoluteExpression(Imm))
5384       return MatchOperand_NoMatch;
5385     if (Imm < 0 || !isUInt<4>(Imm)) {
5386       Error(S, "invalid immediate: only 4-bit values are legal");
5387     }
5388   }
5389 
5390   Operands.push_back(
5391       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5392   return MatchOperand_Success;
5393 }
5394 
5395 bool AMDGPUOperand::isGPRIdxMode() const {
5396   return isImmTy(ImmTyGprIdxMode);
5397 }
5398 
5399 //===----------------------------------------------------------------------===//
5400 // sopp branch targets
5401 //===----------------------------------------------------------------------===//
5402 
5403 OperandMatchResultTy
5404 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5405   SMLoc S = Parser.getTok().getLoc();
5406 
5407   switch (getLexer().getKind()) {
5408     default: return MatchOperand_ParseFail;
5409     case AsmToken::Integer: {
5410       int64_t Imm;
5411       if (getParser().parseAbsoluteExpression(Imm))
5412         return MatchOperand_ParseFail;
5413       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
5414       return MatchOperand_Success;
5415     }
5416 
5417     case AsmToken::Identifier:
5418       Operands.push_back(AMDGPUOperand::CreateExpr(this,
5419           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
5420                                   Parser.getTok().getString()), getContext()), S));
5421       Parser.Lex();
5422       return MatchOperand_Success;
5423   }
5424 }
5425 
5426 //===----------------------------------------------------------------------===//
5427 // Boolean holding registers
5428 //===----------------------------------------------------------------------===//
5429 
5430 OperandMatchResultTy
5431 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5432   return parseReg(Operands);
5433 }
5434 
5435 //===----------------------------------------------------------------------===//
5436 // mubuf
5437 //===----------------------------------------------------------------------===//
5438 
5439 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5440   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5441 }
5442 
5443 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5444   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5445 }
5446 
5447 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5448   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5449 }
5450 
5451 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5452                                const OperandVector &Operands,
5453                                bool IsAtomic,
5454                                bool IsAtomicReturn,
5455                                bool IsLds) {
5456   bool IsLdsOpcode = IsLds;
5457   bool HasLdsModifier = false;
5458   OptionalImmIndexMap OptionalIdx;
5459   assert(IsAtomicReturn ? IsAtomic : true);
5460   unsigned FirstOperandIdx = 1;
5461 
5462   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5463     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5464 
5465     // Add the register arguments
5466     if (Op.isReg()) {
5467       Op.addRegOperands(Inst, 1);
5468       // Insert a tied src for atomic return dst.
5469       // This cannot be postponed as subsequent calls to
5470       // addImmOperands rely on correct number of MC operands.
5471       if (IsAtomicReturn && i == FirstOperandIdx)
5472         Op.addRegOperands(Inst, 1);
5473       continue;
5474     }
5475 
5476     // Handle the case where soffset is an immediate
5477     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5478       Op.addImmOperands(Inst, 1);
5479       continue;
5480     }
5481 
5482     HasLdsModifier |= Op.isLDS();
5483 
5484     // Handle tokens like 'offen' which are sometimes hard-coded into the
5485     // asm string.  There are no MCInst operands for these.
5486     if (Op.isToken()) {
5487       continue;
5488     }
5489     assert(Op.isImm());
5490 
5491     // Handle optional arguments
5492     OptionalIdx[Op.getImmTy()] = i;
5493   }
5494 
5495   // This is a workaround for an llvm quirk which may result in an
5496   // incorrect instruction selection. Lds and non-lds versions of
5497   // MUBUF instructions are identical except that lds versions
5498   // have mandatory 'lds' modifier. However this modifier follows
5499   // optional modifiers and llvm asm matcher regards this 'lds'
5500   // modifier as an optional one. As a result, an lds version
5501   // of opcode may be selected even if it has no 'lds' modifier.
5502   if (IsLdsOpcode && !HasLdsModifier) {
5503     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5504     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5505       Inst.setOpcode(NoLdsOpcode);
5506       IsLdsOpcode = false;
5507     }
5508   }
5509 
5510   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5511   if (!IsAtomic) { // glc is hard-coded.
5512     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5513   }
5514   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5515 
5516   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5517     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5518   }
5519 
5520   if (isGFX10())
5521     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5522 }
5523 
5524 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5525   OptionalImmIndexMap OptionalIdx;
5526 
5527   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5528     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5529 
5530     // Add the register arguments
5531     if (Op.isReg()) {
5532       Op.addRegOperands(Inst, 1);
5533       continue;
5534     }
5535 
5536     // Handle the case where soffset is an immediate
5537     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5538       Op.addImmOperands(Inst, 1);
5539       continue;
5540     }
5541 
5542     // Handle tokens like 'offen' which are sometimes hard-coded into the
5543     // asm string.  There are no MCInst operands for these.
5544     if (Op.isToken()) {
5545       continue;
5546     }
5547     assert(Op.isImm());
5548 
5549     // Handle optional arguments
5550     OptionalIdx[Op.getImmTy()] = i;
5551   }
5552 
5553   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5554                         AMDGPUOperand::ImmTyOffset);
5555   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5556   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5557   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5558   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5559 
5560   if (isGFX10())
5561     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5562 }
5563 
5564 //===----------------------------------------------------------------------===//
5565 // mimg
5566 //===----------------------------------------------------------------------===//
5567 
5568 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5569                               bool IsAtomic) {
5570   unsigned I = 1;
5571   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5572   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5573     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5574   }
5575 
5576   if (IsAtomic) {
5577     // Add src, same as dst
5578     assert(Desc.getNumDefs() == 1);
5579     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5580   }
5581 
5582   OptionalImmIndexMap OptionalIdx;
5583 
5584   for (unsigned E = Operands.size(); I != E; ++I) {
5585     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5586 
5587     // Add the register arguments
5588     if (Op.isReg()) {
5589       Op.addRegOperands(Inst, 1);
5590     } else if (Op.isImmModifier()) {
5591       OptionalIdx[Op.getImmTy()] = I;
5592     } else if (!Op.isToken()) {
5593       llvm_unreachable("unexpected operand type");
5594     }
5595   }
5596 
5597   bool IsGFX10 = isGFX10();
5598 
5599   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5600   if (IsGFX10)
5601     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5602   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5603   if (IsGFX10)
5604     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5605   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5606   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5607   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5608   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5609   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5610   if (!IsGFX10)
5611     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5612   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5613 }
5614 
5615 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5616   cvtMIMG(Inst, Operands, true);
5617 }
5618 
5619 //===----------------------------------------------------------------------===//
5620 // smrd
5621 //===----------------------------------------------------------------------===//
5622 
5623 bool AMDGPUOperand::isSMRDOffset8() const {
5624   return isImm() && isUInt<8>(getImm());
5625 }
5626 
5627 bool AMDGPUOperand::isSMRDOffset20() const {
5628   return isImm() && isUInt<20>(getImm());
5629 }
5630 
5631 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5632   // 32-bit literals are only supported on CI and we only want to use them
5633   // when the offset is > 8-bits.
5634   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5635 }
5636 
5637 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5638   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5639 }
5640 
5641 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5642   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5643 }
5644 
5645 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5646   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5647 }
5648 
5649 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
5650   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5651 }
5652 
5653 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
5654   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5655 }
5656 
5657 //===----------------------------------------------------------------------===//
5658 // vop3
5659 //===----------------------------------------------------------------------===//
5660 
5661 static bool ConvertOmodMul(int64_t &Mul) {
5662   if (Mul != 1 && Mul != 2 && Mul != 4)
5663     return false;
5664 
5665   Mul >>= 1;
5666   return true;
5667 }
5668 
5669 static bool ConvertOmodDiv(int64_t &Div) {
5670   if (Div == 1) {
5671     Div = 0;
5672     return true;
5673   }
5674 
5675   if (Div == 2) {
5676     Div = 3;
5677     return true;
5678   }
5679 
5680   return false;
5681 }
5682 
5683 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5684   if (BoundCtrl == 0) {
5685     BoundCtrl = 1;
5686     return true;
5687   }
5688 
5689   if (BoundCtrl == -1) {
5690     BoundCtrl = 0;
5691     return true;
5692   }
5693 
5694   return false;
5695 }
5696 
5697 // Note: the order in this table matches the order of operands in AsmString.
5698 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5699   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5700   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5701   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5702   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5703   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5704   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5705   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5706   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5707   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5708   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
5709   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5710   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5711   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5712   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5713   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5714   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5715   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5716   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5717   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5718   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5719   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5720   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5721   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5722   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5723   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5724   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
5725   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5726   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5727   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5728   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
5729   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5730   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5731   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5732   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5733   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5734   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5735   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5736   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5737   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5738   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
5739 };
5740 
5741 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5742   unsigned size = Operands.size();
5743   assert(size > 0);
5744 
5745   OperandMatchResultTy res = parseOptionalOpr(Operands);
5746 
5747   // This is a hack to enable hardcoded mandatory operands which follow
5748   // optional operands.
5749   //
5750   // Current design assumes that all operands after the first optional operand
5751   // are also optional. However implementation of some instructions violates
5752   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5753   //
5754   // To alleviate this problem, we have to (implicitly) parse extra operands
5755   // to make sure autogenerated parser of custom operands never hit hardcoded
5756   // mandatory operands.
5757 
5758   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5759 
5760     // We have parsed the first optional operand.
5761     // Parse as many operands as necessary to skip all mandatory operands.
5762 
5763     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5764       if (res != MatchOperand_Success ||
5765           getLexer().is(AsmToken::EndOfStatement)) break;
5766       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5767       res = parseOptionalOpr(Operands);
5768     }
5769   }
5770 
5771   return res;
5772 }
5773 
5774 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5775   OperandMatchResultTy res;
5776   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5777     // try to parse any optional operand here
5778     if (Op.IsBit) {
5779       res = parseNamedBit(Op.Name, Operands, Op.Type);
5780     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5781       res = parseOModOperand(Operands);
5782     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5783                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5784                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5785       res = parseSDWASel(Operands, Op.Name, Op.Type);
5786     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5787       res = parseSDWADstUnused(Operands);
5788     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5789                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5790                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5791                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5792       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5793                                         Op.ConvertResult);
5794     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
5795       res = parseDim(Operands);
5796     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
5797       res = parseDfmtNfmt(Operands);
5798     } else {
5799       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
5800     }
5801     if (res != MatchOperand_NoMatch) {
5802       return res;
5803     }
5804   }
5805   return MatchOperand_NoMatch;
5806 }
5807 
5808 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
5809   StringRef Name = Parser.getTok().getString();
5810   if (Name == "mul") {
5811     return parseIntWithPrefix("mul", Operands,
5812                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
5813   }
5814 
5815   if (Name == "div") {
5816     return parseIntWithPrefix("div", Operands,
5817                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
5818   }
5819 
5820   return MatchOperand_NoMatch;
5821 }
5822 
5823 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
5824   cvtVOP3P(Inst, Operands);
5825 
5826   int Opc = Inst.getOpcode();
5827 
5828   int SrcNum;
5829   const int Ops[] = { AMDGPU::OpName::src0,
5830                       AMDGPU::OpName::src1,
5831                       AMDGPU::OpName::src2 };
5832   for (SrcNum = 0;
5833        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
5834        ++SrcNum);
5835   assert(SrcNum > 0);
5836 
5837   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5838   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5839 
5840   if ((OpSel & (1 << SrcNum)) != 0) {
5841     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
5842     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
5843     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
5844   }
5845 }
5846 
5847 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
5848       // 1. This operand is input modifiers
5849   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
5850       // 2. This is not last operand
5851       && Desc.NumOperands > (OpNum + 1)
5852       // 3. Next operand is register class
5853       && Desc.OpInfo[OpNum + 1].RegClass != -1
5854       // 4. Next register is not tied to any other operand
5855       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
5856 }
5857 
5858 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
5859 {
5860   OptionalImmIndexMap OptionalIdx;
5861   unsigned Opc = Inst.getOpcode();
5862 
5863   unsigned I = 1;
5864   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5865   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5866     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5867   }
5868 
5869   for (unsigned E = Operands.size(); I != E; ++I) {
5870     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5871     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5872       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5873     } else if (Op.isInterpSlot() ||
5874                Op.isInterpAttr() ||
5875                Op.isAttrChan()) {
5876       Inst.addOperand(MCOperand::createImm(Op.getImm()));
5877     } else if (Op.isImmModifier()) {
5878       OptionalIdx[Op.getImmTy()] = I;
5879     } else {
5880       llvm_unreachable("unhandled operand type");
5881     }
5882   }
5883 
5884   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5885     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5886   }
5887 
5888   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5889     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5890   }
5891 
5892   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5893     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5894   }
5895 }
5896 
5897 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5898                               OptionalImmIndexMap &OptionalIdx) {
5899   unsigned Opc = Inst.getOpcode();
5900 
5901   unsigned I = 1;
5902   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5903   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5904     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5905   }
5906 
5907   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5908     // This instruction has src modifiers
5909     for (unsigned E = Operands.size(); I != E; ++I) {
5910       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5911       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5912         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5913       } else if (Op.isImmModifier()) {
5914         OptionalIdx[Op.getImmTy()] = I;
5915       } else if (Op.isRegOrImm()) {
5916         Op.addRegOrImmOperands(Inst, 1);
5917       } else {
5918         llvm_unreachable("unhandled operand type");
5919       }
5920     }
5921   } else {
5922     // No src modifiers
5923     for (unsigned E = Operands.size(); I != E; ++I) {
5924       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5925       if (Op.isMod()) {
5926         OptionalIdx[Op.getImmTy()] = I;
5927       } else {
5928         Op.addRegOrImmOperands(Inst, 1);
5929       }
5930     }
5931   }
5932 
5933   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5934     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5935   }
5936 
5937   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5938     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5939   }
5940 
5941   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
5942   // it has src2 register operand that is tied to dst operand
5943   // we don't allow modifiers for this operand in assembler so src2_modifiers
5944   // should be 0.
5945   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
5946       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
5947       Opc == AMDGPU::V_MAC_F32_e64_vi ||
5948       Opc == AMDGPU::V_MAC_F16_e64_vi ||
5949       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
5950       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
5951       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
5952     auto it = Inst.begin();
5953     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5954     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5955     ++it;
5956     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5957   }
5958 }
5959 
5960 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5961   OptionalImmIndexMap OptionalIdx;
5962   cvtVOP3(Inst, Operands, OptionalIdx);
5963 }
5964 
5965 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5966                                const OperandVector &Operands) {
5967   OptionalImmIndexMap OptIdx;
5968   const int Opc = Inst.getOpcode();
5969   const MCInstrDesc &Desc = MII.get(Opc);
5970 
5971   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5972 
5973   cvtVOP3(Inst, Operands, OptIdx);
5974 
5975   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5976     assert(!IsPacked);
5977     Inst.addOperand(Inst.getOperand(0));
5978   }
5979 
5980   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5981   // instruction, and then figure out where to actually put the modifiers
5982 
5983   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5984 
5985   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5986   if (OpSelHiIdx != -1) {
5987     int DefaultVal = IsPacked ? -1 : 0;
5988     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5989                           DefaultVal);
5990   }
5991 
5992   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5993   if (NegLoIdx != -1) {
5994     assert(IsPacked);
5995     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5996     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5997   }
5998 
5999   const int Ops[] = { AMDGPU::OpName::src0,
6000                       AMDGPU::OpName::src1,
6001                       AMDGPU::OpName::src2 };
6002   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6003                          AMDGPU::OpName::src1_modifiers,
6004                          AMDGPU::OpName::src2_modifiers };
6005 
6006   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6007 
6008   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6009   unsigned OpSelHi = 0;
6010   unsigned NegLo = 0;
6011   unsigned NegHi = 0;
6012 
6013   if (OpSelHiIdx != -1) {
6014     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6015   }
6016 
6017   if (NegLoIdx != -1) {
6018     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6019     NegLo = Inst.getOperand(NegLoIdx).getImm();
6020     NegHi = Inst.getOperand(NegHiIdx).getImm();
6021   }
6022 
6023   for (int J = 0; J < 3; ++J) {
6024     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6025     if (OpIdx == -1)
6026       break;
6027 
6028     uint32_t ModVal = 0;
6029 
6030     if ((OpSel & (1 << J)) != 0)
6031       ModVal |= SISrcMods::OP_SEL_0;
6032 
6033     if ((OpSelHi & (1 << J)) != 0)
6034       ModVal |= SISrcMods::OP_SEL_1;
6035 
6036     if ((NegLo & (1 << J)) != 0)
6037       ModVal |= SISrcMods::NEG;
6038 
6039     if ((NegHi & (1 << J)) != 0)
6040       ModVal |= SISrcMods::NEG_HI;
6041 
6042     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6043 
6044     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6045   }
6046 }
6047 
6048 //===----------------------------------------------------------------------===//
6049 // dpp
6050 //===----------------------------------------------------------------------===//
6051 
6052 bool AMDGPUOperand::isDPP8() const {
6053   return isImmTy(ImmTyDPP8);
6054 }
6055 
6056 bool AMDGPUOperand::isDPPCtrl() const {
6057   using namespace AMDGPU::DPP;
6058 
6059   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6060   if (result) {
6061     int64_t Imm = getImm();
6062     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6063            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6064            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6065            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6066            (Imm == DppCtrl::WAVE_SHL1) ||
6067            (Imm == DppCtrl::WAVE_ROL1) ||
6068            (Imm == DppCtrl::WAVE_SHR1) ||
6069            (Imm == DppCtrl::WAVE_ROR1) ||
6070            (Imm == DppCtrl::ROW_MIRROR) ||
6071            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6072            (Imm == DppCtrl::BCAST15) ||
6073            (Imm == DppCtrl::BCAST31) ||
6074            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6075            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6076   }
6077   return false;
6078 }
6079 
6080 bool AMDGPUOperand::isS16Imm() const {
6081   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6082 }
6083 
6084 bool AMDGPUOperand::isU16Imm() const {
6085   return isImm() && isUInt<16>(getImm());
6086 }
6087 
6088 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6089   if (!isGFX10())
6090     return MatchOperand_NoMatch;
6091 
6092   SMLoc S = Parser.getTok().getLoc();
6093 
6094   if (getLexer().isNot(AsmToken::Identifier))
6095     return MatchOperand_NoMatch;
6096   if (getLexer().getTok().getString() != "dim")
6097     return MatchOperand_NoMatch;
6098 
6099   Parser.Lex();
6100   if (getLexer().isNot(AsmToken::Colon))
6101     return MatchOperand_ParseFail;
6102 
6103   Parser.Lex();
6104 
6105   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6106   // integer.
6107   std::string Token;
6108   if (getLexer().is(AsmToken::Integer)) {
6109     SMLoc Loc = getLexer().getTok().getEndLoc();
6110     Token = getLexer().getTok().getString();
6111     Parser.Lex();
6112     if (getLexer().getTok().getLoc() != Loc)
6113       return MatchOperand_ParseFail;
6114   }
6115   if (getLexer().isNot(AsmToken::Identifier))
6116     return MatchOperand_ParseFail;
6117   Token += getLexer().getTok().getString();
6118 
6119   StringRef DimId = Token;
6120   if (DimId.startswith("SQ_RSRC_IMG_"))
6121     DimId = DimId.substr(12);
6122 
6123   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6124   if (!DimInfo)
6125     return MatchOperand_ParseFail;
6126 
6127   Parser.Lex();
6128 
6129   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6130                                               AMDGPUOperand::ImmTyDim));
6131   return MatchOperand_Success;
6132 }
6133 
6134 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6135   SMLoc S = Parser.getTok().getLoc();
6136   StringRef Prefix;
6137 
6138   if (getLexer().getKind() == AsmToken::Identifier) {
6139     Prefix = Parser.getTok().getString();
6140   } else {
6141     return MatchOperand_NoMatch;
6142   }
6143 
6144   if (Prefix != "dpp8")
6145     return parseDPPCtrl(Operands);
6146   if (!isGFX10())
6147     return MatchOperand_NoMatch;
6148 
6149   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6150 
6151   int64_t Sels[8];
6152 
6153   Parser.Lex();
6154   if (getLexer().isNot(AsmToken::Colon))
6155     return MatchOperand_ParseFail;
6156 
6157   Parser.Lex();
6158   if (getLexer().isNot(AsmToken::LBrac))
6159     return MatchOperand_ParseFail;
6160 
6161   Parser.Lex();
6162   if (getParser().parseAbsoluteExpression(Sels[0]))
6163     return MatchOperand_ParseFail;
6164   if (0 > Sels[0] || 7 < Sels[0])
6165     return MatchOperand_ParseFail;
6166 
6167   for (size_t i = 1; i < 8; ++i) {
6168     if (getLexer().isNot(AsmToken::Comma))
6169       return MatchOperand_ParseFail;
6170 
6171     Parser.Lex();
6172     if (getParser().parseAbsoluteExpression(Sels[i]))
6173       return MatchOperand_ParseFail;
6174     if (0 > Sels[i] || 7 < Sels[i])
6175       return MatchOperand_ParseFail;
6176   }
6177 
6178   if (getLexer().isNot(AsmToken::RBrac))
6179     return MatchOperand_ParseFail;
6180   Parser.Lex();
6181 
6182   unsigned DPP8 = 0;
6183   for (size_t i = 0; i < 8; ++i)
6184     DPP8 |= (Sels[i] << (i * 3));
6185 
6186   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6187   return MatchOperand_Success;
6188 }
6189 
6190 OperandMatchResultTy
6191 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6192   using namespace AMDGPU::DPP;
6193 
6194   SMLoc S = Parser.getTok().getLoc();
6195   StringRef Prefix;
6196   int64_t Int;
6197 
6198   if (getLexer().getKind() == AsmToken::Identifier) {
6199     Prefix = Parser.getTok().getString();
6200   } else {
6201     return MatchOperand_NoMatch;
6202   }
6203 
6204   if (Prefix == "row_mirror") {
6205     Int = DppCtrl::ROW_MIRROR;
6206     Parser.Lex();
6207   } else if (Prefix == "row_half_mirror") {
6208     Int = DppCtrl::ROW_HALF_MIRROR;
6209     Parser.Lex();
6210   } else {
6211     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6212     if (Prefix != "quad_perm"
6213         && Prefix != "row_shl"
6214         && Prefix != "row_shr"
6215         && Prefix != "row_ror"
6216         && Prefix != "wave_shl"
6217         && Prefix != "wave_rol"
6218         && Prefix != "wave_shr"
6219         && Prefix != "wave_ror"
6220         && Prefix != "row_bcast"
6221         && Prefix != "row_share"
6222         && Prefix != "row_xmask") {
6223       return MatchOperand_NoMatch;
6224     }
6225 
6226     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6227       return MatchOperand_NoMatch;
6228 
6229     if (!isVI() && !isGFX9() &&
6230         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6231          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6232          Prefix == "row_bcast"))
6233       return MatchOperand_NoMatch;
6234 
6235     Parser.Lex();
6236     if (getLexer().isNot(AsmToken::Colon))
6237       return MatchOperand_ParseFail;
6238 
6239     if (Prefix == "quad_perm") {
6240       // quad_perm:[%d,%d,%d,%d]
6241       Parser.Lex();
6242       if (getLexer().isNot(AsmToken::LBrac))
6243         return MatchOperand_ParseFail;
6244       Parser.Lex();
6245 
6246       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6247         return MatchOperand_ParseFail;
6248 
6249       for (int i = 0; i < 3; ++i) {
6250         if (getLexer().isNot(AsmToken::Comma))
6251           return MatchOperand_ParseFail;
6252         Parser.Lex();
6253 
6254         int64_t Temp;
6255         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6256           return MatchOperand_ParseFail;
6257         const int shift = i*2 + 2;
6258         Int += (Temp << shift);
6259       }
6260 
6261       if (getLexer().isNot(AsmToken::RBrac))
6262         return MatchOperand_ParseFail;
6263       Parser.Lex();
6264     } else {
6265       // sel:%d
6266       Parser.Lex();
6267       if (getParser().parseAbsoluteExpression(Int))
6268         return MatchOperand_ParseFail;
6269 
6270       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6271         Int |= DppCtrl::ROW_SHL0;
6272       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6273         Int |= DppCtrl::ROW_SHR0;
6274       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6275         Int |= DppCtrl::ROW_ROR0;
6276       } else if (Prefix == "wave_shl" && 1 == Int) {
6277         Int = DppCtrl::WAVE_SHL1;
6278       } else if (Prefix == "wave_rol" && 1 == Int) {
6279         Int = DppCtrl::WAVE_ROL1;
6280       } else if (Prefix == "wave_shr" && 1 == Int) {
6281         Int = DppCtrl::WAVE_SHR1;
6282       } else if (Prefix == "wave_ror" && 1 == Int) {
6283         Int = DppCtrl::WAVE_ROR1;
6284       } else if (Prefix == "row_bcast") {
6285         if (Int == 15) {
6286           Int = DppCtrl::BCAST15;
6287         } else if (Int == 31) {
6288           Int = DppCtrl::BCAST31;
6289         } else {
6290           return MatchOperand_ParseFail;
6291         }
6292       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6293         Int |= DppCtrl::ROW_SHARE_FIRST;
6294       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6295         Int |= DppCtrl::ROW_XMASK_FIRST;
6296       } else {
6297         return MatchOperand_ParseFail;
6298       }
6299     }
6300   }
6301 
6302   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6303   return MatchOperand_Success;
6304 }
6305 
6306 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6307   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6308 }
6309 
6310 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6311   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6312 }
6313 
6314 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6315   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6316 }
6317 
6318 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6319   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6320 }
6321 
6322 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6323   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6324 }
6325 
6326 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6327   OptionalImmIndexMap OptionalIdx;
6328 
6329   unsigned I = 1;
6330   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6331   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6332     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6333   }
6334 
6335   int Fi = 0;
6336   for (unsigned E = Operands.size(); I != E; ++I) {
6337     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6338                                             MCOI::TIED_TO);
6339     if (TiedTo != -1) {
6340       assert((unsigned)TiedTo < Inst.getNumOperands());
6341       // handle tied old or src2 for MAC instructions
6342       Inst.addOperand(Inst.getOperand(TiedTo));
6343     }
6344     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6345     // Add the register arguments
6346     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6347       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6348       // Skip it.
6349       continue;
6350     }
6351 
6352     if (IsDPP8) {
6353       if (Op.isDPP8()) {
6354         Op.addImmOperands(Inst, 1);
6355       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6356         Op.addRegWithFPInputModsOperands(Inst, 2);
6357       } else if (Op.isFI()) {
6358         Fi = Op.getImm();
6359       } else if (Op.isReg()) {
6360         Op.addRegOperands(Inst, 1);
6361       } else {
6362         llvm_unreachable("Invalid operand type");
6363       }
6364     } else {
6365       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6366         Op.addRegWithFPInputModsOperands(Inst, 2);
6367       } else if (Op.isDPPCtrl()) {
6368         Op.addImmOperands(Inst, 1);
6369       } else if (Op.isImm()) {
6370         // Handle optional arguments
6371         OptionalIdx[Op.getImmTy()] = I;
6372       } else {
6373         llvm_unreachable("Invalid operand type");
6374       }
6375     }
6376   }
6377 
6378   if (IsDPP8) {
6379     using namespace llvm::AMDGPU::DPP;
6380     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6381   } else {
6382     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6383     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6384     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6385     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6386       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6387     }
6388   }
6389 }
6390 
6391 //===----------------------------------------------------------------------===//
6392 // sdwa
6393 //===----------------------------------------------------------------------===//
6394 
6395 OperandMatchResultTy
6396 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6397                               AMDGPUOperand::ImmTy Type) {
6398   using namespace llvm::AMDGPU::SDWA;
6399 
6400   SMLoc S = Parser.getTok().getLoc();
6401   StringRef Value;
6402   OperandMatchResultTy res;
6403 
6404   res = parseStringWithPrefix(Prefix, Value);
6405   if (res != MatchOperand_Success) {
6406     return res;
6407   }
6408 
6409   int64_t Int;
6410   Int = StringSwitch<int64_t>(Value)
6411         .Case("BYTE_0", SdwaSel::BYTE_0)
6412         .Case("BYTE_1", SdwaSel::BYTE_1)
6413         .Case("BYTE_2", SdwaSel::BYTE_2)
6414         .Case("BYTE_3", SdwaSel::BYTE_3)
6415         .Case("WORD_0", SdwaSel::WORD_0)
6416         .Case("WORD_1", SdwaSel::WORD_1)
6417         .Case("DWORD", SdwaSel::DWORD)
6418         .Default(0xffffffff);
6419   Parser.Lex(); // eat last token
6420 
6421   if (Int == 0xffffffff) {
6422     return MatchOperand_ParseFail;
6423   }
6424 
6425   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6426   return MatchOperand_Success;
6427 }
6428 
6429 OperandMatchResultTy
6430 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6431   using namespace llvm::AMDGPU::SDWA;
6432 
6433   SMLoc S = Parser.getTok().getLoc();
6434   StringRef Value;
6435   OperandMatchResultTy res;
6436 
6437   res = parseStringWithPrefix("dst_unused", Value);
6438   if (res != MatchOperand_Success) {
6439     return res;
6440   }
6441 
6442   int64_t Int;
6443   Int = StringSwitch<int64_t>(Value)
6444         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6445         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6446         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6447         .Default(0xffffffff);
6448   Parser.Lex(); // eat last token
6449 
6450   if (Int == 0xffffffff) {
6451     return MatchOperand_ParseFail;
6452   }
6453 
6454   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6455   return MatchOperand_Success;
6456 }
6457 
6458 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6459   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6460 }
6461 
6462 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6463   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6464 }
6465 
6466 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6467   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6468 }
6469 
6470 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6471   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6472 }
6473 
6474 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6475                               uint64_t BasicInstType, bool skipVcc) {
6476   using namespace llvm::AMDGPU::SDWA;
6477 
6478   OptionalImmIndexMap OptionalIdx;
6479   bool skippedVcc = false;
6480 
6481   unsigned I = 1;
6482   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6483   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6484     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6485   }
6486 
6487   for (unsigned E = Operands.size(); I != E; ++I) {
6488     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6489     if (skipVcc && !skippedVcc && Op.isReg() &&
6490         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6491       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6492       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6493       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6494       // Skip VCC only if we didn't skip it on previous iteration.
6495       if (BasicInstType == SIInstrFlags::VOP2 &&
6496           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6497         skippedVcc = true;
6498         continue;
6499       } else if (BasicInstType == SIInstrFlags::VOPC &&
6500                  Inst.getNumOperands() == 0) {
6501         skippedVcc = true;
6502         continue;
6503       }
6504     }
6505     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6506       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6507     } else if (Op.isImm()) {
6508       // Handle optional arguments
6509       OptionalIdx[Op.getImmTy()] = I;
6510     } else {
6511       llvm_unreachable("Invalid operand type");
6512     }
6513     skippedVcc = false;
6514   }
6515 
6516   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6517       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6518       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6519     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6520     switch (BasicInstType) {
6521     case SIInstrFlags::VOP1:
6522       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6523       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6524         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6525       }
6526       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6527       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6528       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6529       break;
6530 
6531     case SIInstrFlags::VOP2:
6532       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6533       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6534         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6535       }
6536       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6537       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6538       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6539       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6540       break;
6541 
6542     case SIInstrFlags::VOPC:
6543       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6544         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6545       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6546       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6547       break;
6548 
6549     default:
6550       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6551     }
6552   }
6553 
6554   // special case v_mac_{f16, f32}:
6555   // it has src2 register operand that is tied to dst operand
6556   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6557       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6558     auto it = Inst.begin();
6559     std::advance(
6560       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6561     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6562   }
6563 }
6564 
6565 /// Force static initialization.
6566 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6567   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6568   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6569 }
6570 
6571 #define GET_REGISTER_MATCHER
6572 #define GET_MATCHER_IMPLEMENTATION
6573 #define GET_MNEMONIC_SPELL_CHECKER
6574 #include "AMDGPUGenAsmMatcher.inc"
6575 
6576 // This fuction should be defined after auto-generated include so that we have
6577 // MatchClassKind enum defined
6578 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6579                                                      unsigned Kind) {
6580   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6581   // But MatchInstructionImpl() expects to meet token and fails to validate
6582   // operand. This method checks if we are given immediate operand but expect to
6583   // get corresponding token.
6584   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6585   switch (Kind) {
6586   case MCK_addr64:
6587     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6588   case MCK_gds:
6589     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6590   case MCK_lds:
6591     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6592   case MCK_glc:
6593     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6594   case MCK_idxen:
6595     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6596   case MCK_offen:
6597     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6598   case MCK_SSrcB32:
6599     // When operands have expression values, they will return true for isToken,
6600     // because it is not possible to distinguish between a token and an
6601     // expression at parse time. MatchInstructionImpl() will always try to
6602     // match an operand as a token, when isToken returns true, and when the
6603     // name of the expression is not a valid token, the match will fail,
6604     // so we need to handle it here.
6605     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6606   case MCK_SSrcF32:
6607     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6608   case MCK_SoppBrTarget:
6609     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6610   case MCK_VReg32OrOff:
6611     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6612   case MCK_InterpSlot:
6613     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6614   case MCK_Attr:
6615     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6616   case MCK_AttrChan:
6617     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6618   default:
6619     return Match_InvalidOperand;
6620   }
6621 }
6622 
6623 //===----------------------------------------------------------------------===//
6624 // endpgm
6625 //===----------------------------------------------------------------------===//
6626 
6627 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6628   SMLoc S = Parser.getTok().getLoc();
6629   int64_t Imm = 0;
6630 
6631   if (!parseExpr(Imm)) {
6632     // The operand is optional, if not present default to 0
6633     Imm = 0;
6634   }
6635 
6636   if (!isUInt<16>(Imm)) {
6637     Error(S, "expected a 16-bit value");
6638     return MatchOperand_ParseFail;
6639   }
6640 
6641   Operands.push_back(
6642       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6643   return MatchOperand_Success;
6644 }
6645 
6646 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6647