1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/MC/MCInst.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCInstrInfo.h"
34 #include "llvm/MC/MCParser/MCAsmLexer.h"
35 #include "llvm/MC/MCParser/MCAsmParser.h"
36 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
38 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
39 #include "llvm/MC/MCRegisterInfo.h"
40 #include "llvm/MC/MCStreamer.h"
41 #include "llvm/MC/MCSubtargetInfo.h"
42 #include "llvm/MC/MCSymbol.h"
43 #include "llvm/Support/AMDGPUMetadata.h"
44 #include "llvm/Support/AMDHSAKernelDescriptor.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MachineValueType.h"
49 #include "llvm/Support/MathExtras.h"
50 #include "llvm/Support/SMLoc.h"
51 #include "llvm/Support/TargetParser.h"
52 #include "llvm/Support/TargetRegistry.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include <algorithm>
55 #include <cassert>
56 #include <cstdint>
57 #include <cstring>
58 #include <iterator>
59 #include <map>
60 #include <memory>
61 #include <string>
62 
63 using namespace llvm;
64 using namespace llvm::AMDGPU;
65 using namespace llvm::amdhsa;
66 
67 namespace {
68 
69 class AMDGPUAsmParser;
70 
71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
72 
73 //===----------------------------------------------------------------------===//
74 // Operand
75 //===----------------------------------------------------------------------===//
76 
77 class AMDGPUOperand : public MCParsedAsmOperand {
78   enum KindTy {
79     Token,
80     Immediate,
81     Register,
82     Expression
83   } Kind;
84 
85   SMLoc StartLoc, EndLoc;
86   const AMDGPUAsmParser *AsmParser;
87 
88 public:
89   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
90     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
91 
92   using Ptr = std::unique_ptr<AMDGPUOperand>;
93 
94   struct Modifiers {
95     bool Abs = false;
96     bool Neg = false;
97     bool Sext = false;
98 
99     bool hasFPModifiers() const { return Abs || Neg; }
100     bool hasIntModifiers() const { return Sext; }
101     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
102 
103     int64_t getFPModifiersOperand() const {
104       int64_t Operand = 0;
105       Operand |= Abs ? SISrcMods::ABS : 0;
106       Operand |= Neg ? SISrcMods::NEG : 0;
107       return Operand;
108     }
109 
110     int64_t getIntModifiersOperand() const {
111       int64_t Operand = 0;
112       Operand |= Sext ? SISrcMods::SEXT : 0;
113       return Operand;
114     }
115 
116     int64_t getModifiersOperand() const {
117       assert(!(hasFPModifiers() && hasIntModifiers())
118            && "fp and int modifiers should not be used simultaneously");
119       if (hasFPModifiers()) {
120         return getFPModifiersOperand();
121       } else if (hasIntModifiers()) {
122         return getIntModifiersOperand();
123       } else {
124         return 0;
125       }
126     }
127 
128     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
129   };
130 
131   enum ImmTy {
132     ImmTyNone,
133     ImmTyGDS,
134     ImmTyLDS,
135     ImmTyOffen,
136     ImmTyIdxen,
137     ImmTyAddr64,
138     ImmTyOffset,
139     ImmTyInstOffset,
140     ImmTyOffset0,
141     ImmTyOffset1,
142     ImmTyGLC,
143     ImmTySLC,
144     ImmTyTFE,
145     ImmTyD16,
146     ImmTyClampSI,
147     ImmTyOModSI,
148     ImmTyDppCtrl,
149     ImmTyDppRowMask,
150     ImmTyDppBankMask,
151     ImmTyDppBoundCtrl,
152     ImmTySdwaDstSel,
153     ImmTySdwaSrc0Sel,
154     ImmTySdwaSrc1Sel,
155     ImmTySdwaDstUnused,
156     ImmTyDMask,
157     ImmTyUNorm,
158     ImmTyDA,
159     ImmTyR128A16,
160     ImmTyLWE,
161     ImmTyExpTgt,
162     ImmTyExpCompr,
163     ImmTyExpVM,
164     ImmTyFORMAT,
165     ImmTyHwreg,
166     ImmTyOff,
167     ImmTySendMsg,
168     ImmTyInterpSlot,
169     ImmTyInterpAttr,
170     ImmTyAttrChan,
171     ImmTyOpSel,
172     ImmTyOpSelHi,
173     ImmTyNegLo,
174     ImmTyNegHi,
175     ImmTySwizzle,
176     ImmTyGprIdxMode,
177     ImmTyHigh
178   };
179 
180   struct TokOp {
181     const char *Data;
182     unsigned Length;
183   };
184 
185   struct ImmOp {
186     int64_t Val;
187     ImmTy Type;
188     bool IsFPImm;
189     Modifiers Mods;
190   };
191 
192   struct RegOp {
193     unsigned RegNo;
194     bool IsForcedVOP3;
195     Modifiers Mods;
196   };
197 
198   union {
199     TokOp Tok;
200     ImmOp Imm;
201     RegOp Reg;
202     const MCExpr *Expr;
203   };
204 
205   bool isToken() const override {
206     if (Kind == Token)
207       return true;
208 
209     if (Kind != Expression || !Expr)
210       return false;
211 
212     // When parsing operands, we can't always tell if something was meant to be
213     // a token, like 'gds', or an expression that references a global variable.
214     // In this case, we assume the string is an expression, and if we need to
215     // interpret is a token, then we treat the symbol name as the token.
216     return isa<MCSymbolRefExpr>(Expr);
217   }
218 
219   bool isImm() const override {
220     return Kind == Immediate;
221   }
222 
223   bool isInlinableImm(MVT type) const;
224   bool isLiteralImm(MVT type) const;
225 
226   bool isRegKind() const {
227     return Kind == Register;
228   }
229 
230   bool isReg() const override {
231     return isRegKind() && !hasModifiers();
232   }
233 
234   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
235     return isRegClass(RCID) || isInlinableImm(type);
236   }
237 
238   bool isRegOrImmWithInt16InputMods() const {
239     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
240   }
241 
242   bool isRegOrImmWithInt32InputMods() const {
243     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
244   }
245 
246   bool isRegOrImmWithInt64InputMods() const {
247     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
248   }
249 
250   bool isRegOrImmWithFP16InputMods() const {
251     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
252   }
253 
254   bool isRegOrImmWithFP32InputMods() const {
255     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
256   }
257 
258   bool isRegOrImmWithFP64InputMods() const {
259     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
260   }
261 
262   bool isVReg() const {
263     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
264            isRegClass(AMDGPU::VReg_64RegClassID) ||
265            isRegClass(AMDGPU::VReg_96RegClassID) ||
266            isRegClass(AMDGPU::VReg_128RegClassID) ||
267            isRegClass(AMDGPU::VReg_256RegClassID) ||
268            isRegClass(AMDGPU::VReg_512RegClassID);
269   }
270 
271   bool isVReg32() const {
272     return isRegClass(AMDGPU::VGPR_32RegClassID);
273   }
274 
275   bool isVReg32OrOff() const {
276     return isOff() || isVReg32();
277   }
278 
279   bool isSDWAOperand(MVT type) const;
280   bool isSDWAFP16Operand() const;
281   bool isSDWAFP32Operand() const;
282   bool isSDWAInt16Operand() const;
283   bool isSDWAInt32Operand() const;
284 
285   bool isImmTy(ImmTy ImmT) const {
286     return isImm() && Imm.Type == ImmT;
287   }
288 
289   bool isImmModifier() const {
290     return isImm() && Imm.Type != ImmTyNone;
291   }
292 
293   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
294   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
295   bool isDMask() const { return isImmTy(ImmTyDMask); }
296   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
297   bool isDA() const { return isImmTy(ImmTyDA); }
298   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
299   bool isLWE() const { return isImmTy(ImmTyLWE); }
300   bool isOff() const { return isImmTy(ImmTyOff); }
301   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
302   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
303   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
304   bool isOffen() const { return isImmTy(ImmTyOffen); }
305   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
306   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
307   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
308   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
309   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
310 
311   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
312   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
313   bool isGDS() const { return isImmTy(ImmTyGDS); }
314   bool isLDS() const { return isImmTy(ImmTyLDS); }
315   bool isGLC() const { return isImmTy(ImmTyGLC); }
316   bool isSLC() const { return isImmTy(ImmTySLC); }
317   bool isTFE() const { return isImmTy(ImmTyTFE); }
318   bool isD16() const { return isImmTy(ImmTyD16); }
319   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
320   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
321   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
322   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
323   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
324   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
325   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
326   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
327   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
328   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
329   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
330   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
331   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
332   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
333   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
334   bool isHigh() const { return isImmTy(ImmTyHigh); }
335 
336   bool isMod() const {
337     return isClampSI() || isOModSI();
338   }
339 
340   bool isRegOrImm() const {
341     return isReg() || isImm();
342   }
343 
344   bool isRegClass(unsigned RCID) const;
345 
346   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
347     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
348   }
349 
350   bool isSCSrcB16() const {
351     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
352   }
353 
354   bool isSCSrcV2B16() const {
355     return isSCSrcB16();
356   }
357 
358   bool isSCSrcB32() const {
359     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
360   }
361 
362   bool isSCSrcB64() const {
363     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
364   }
365 
366   bool isSCSrcF16() const {
367     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
368   }
369 
370   bool isSCSrcV2F16() const {
371     return isSCSrcF16();
372   }
373 
374   bool isSCSrcF32() const {
375     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
376   }
377 
378   bool isSCSrcF64() const {
379     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
380   }
381 
382   bool isSSrcB32() const {
383     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
384   }
385 
386   bool isSSrcB16() const {
387     return isSCSrcB16() || isLiteralImm(MVT::i16);
388   }
389 
390   bool isSSrcV2B16() const {
391     llvm_unreachable("cannot happen");
392     return isSSrcB16();
393   }
394 
395   bool isSSrcB64() const {
396     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
397     // See isVSrc64().
398     return isSCSrcB64() || isLiteralImm(MVT::i64);
399   }
400 
401   bool isSSrcF32() const {
402     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
403   }
404 
405   bool isSSrcF64() const {
406     return isSCSrcB64() || isLiteralImm(MVT::f64);
407   }
408 
409   bool isSSrcF16() const {
410     return isSCSrcB16() || isLiteralImm(MVT::f16);
411   }
412 
413   bool isSSrcV2F16() const {
414     llvm_unreachable("cannot happen");
415     return isSSrcF16();
416   }
417 
418   bool isSSrcOrLdsB32() const {
419     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
420            isLiteralImm(MVT::i32) || isExpr();
421   }
422 
423   bool isVCSrcB32() const {
424     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
425   }
426 
427   bool isVCSrcB64() const {
428     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
429   }
430 
431   bool isVCSrcB16() const {
432     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
433   }
434 
435   bool isVCSrcV2B16() const {
436     return isVCSrcB16();
437   }
438 
439   bool isVCSrcF32() const {
440     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
441   }
442 
443   bool isVCSrcF64() const {
444     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
445   }
446 
447   bool isVCSrcF16() const {
448     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
449   }
450 
451   bool isVCSrcV2F16() const {
452     return isVCSrcF16();
453   }
454 
455   bool isVSrcB32() const {
456     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
457   }
458 
459   bool isVSrcB64() const {
460     return isVCSrcF64() || isLiteralImm(MVT::i64);
461   }
462 
463   bool isVSrcB16() const {
464     return isVCSrcF16() || isLiteralImm(MVT::i16);
465   }
466 
467   bool isVSrcV2B16() const {
468     llvm_unreachable("cannot happen");
469     return isVSrcB16();
470   }
471 
472   bool isVSrcF32() const {
473     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
474   }
475 
476   bool isVSrcF64() const {
477     return isVCSrcF64() || isLiteralImm(MVT::f64);
478   }
479 
480   bool isVSrcF16() const {
481     return isVCSrcF16() || isLiteralImm(MVT::f16);
482   }
483 
484   bool isVSrcV2F16() const {
485     llvm_unreachable("cannot happen");
486     return isVSrcF16();
487   }
488 
489   bool isKImmFP32() const {
490     return isLiteralImm(MVT::f32);
491   }
492 
493   bool isKImmFP16() const {
494     return isLiteralImm(MVT::f16);
495   }
496 
497   bool isMem() const override {
498     return false;
499   }
500 
501   bool isExpr() const {
502     return Kind == Expression;
503   }
504 
505   bool isSoppBrTarget() const {
506     return isExpr() || isImm();
507   }
508 
509   bool isSWaitCnt() const;
510   bool isHwreg() const;
511   bool isSendMsg() const;
512   bool isSwizzle() const;
513   bool isSMRDOffset8() const;
514   bool isSMRDOffset20() const;
515   bool isSMRDLiteralOffset() const;
516   bool isDPPCtrl() const;
517   bool isGPRIdxMode() const;
518   bool isS16Imm() const;
519   bool isU16Imm() const;
520 
521   StringRef getExpressionAsToken() const {
522     assert(isExpr());
523     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
524     return S->getSymbol().getName();
525   }
526 
527   StringRef getToken() const {
528     assert(isToken());
529 
530     if (Kind == Expression)
531       return getExpressionAsToken();
532 
533     return StringRef(Tok.Data, Tok.Length);
534   }
535 
536   int64_t getImm() const {
537     assert(isImm());
538     return Imm.Val;
539   }
540 
541   ImmTy getImmTy() const {
542     assert(isImm());
543     return Imm.Type;
544   }
545 
546   unsigned getReg() const override {
547     return Reg.RegNo;
548   }
549 
550   SMLoc getStartLoc() const override {
551     return StartLoc;
552   }
553 
554   SMLoc getEndLoc() const override {
555     return EndLoc;
556   }
557 
558   SMRange getLocRange() const {
559     return SMRange(StartLoc, EndLoc);
560   }
561 
562   Modifiers getModifiers() const {
563     assert(isRegKind() || isImmTy(ImmTyNone));
564     return isRegKind() ? Reg.Mods : Imm.Mods;
565   }
566 
567   void setModifiers(Modifiers Mods) {
568     assert(isRegKind() || isImmTy(ImmTyNone));
569     if (isRegKind())
570       Reg.Mods = Mods;
571     else
572       Imm.Mods = Mods;
573   }
574 
575   bool hasModifiers() const {
576     return getModifiers().hasModifiers();
577   }
578 
579   bool hasFPModifiers() const {
580     return getModifiers().hasFPModifiers();
581   }
582 
583   bool hasIntModifiers() const {
584     return getModifiers().hasIntModifiers();
585   }
586 
587   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
588 
589   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
590 
591   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
592 
593   template <unsigned Bitwidth>
594   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
595 
596   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
597     addKImmFPOperands<16>(Inst, N);
598   }
599 
600   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
601     addKImmFPOperands<32>(Inst, N);
602   }
603 
604   void addRegOperands(MCInst &Inst, unsigned N) const;
605 
606   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
607     if (isRegKind())
608       addRegOperands(Inst, N);
609     else if (isExpr())
610       Inst.addOperand(MCOperand::createExpr(Expr));
611     else
612       addImmOperands(Inst, N);
613   }
614 
615   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
616     Modifiers Mods = getModifiers();
617     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
618     if (isRegKind()) {
619       addRegOperands(Inst, N);
620     } else {
621       addImmOperands(Inst, N, false);
622     }
623   }
624 
625   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
626     assert(!hasIntModifiers());
627     addRegOrImmWithInputModsOperands(Inst, N);
628   }
629 
630   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
631     assert(!hasFPModifiers());
632     addRegOrImmWithInputModsOperands(Inst, N);
633   }
634 
635   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
636     Modifiers Mods = getModifiers();
637     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
638     assert(isRegKind());
639     addRegOperands(Inst, N);
640   }
641 
642   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
643     assert(!hasIntModifiers());
644     addRegWithInputModsOperands(Inst, N);
645   }
646 
647   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
648     assert(!hasFPModifiers());
649     addRegWithInputModsOperands(Inst, N);
650   }
651 
652   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
653     if (isImm())
654       addImmOperands(Inst, N);
655     else {
656       assert(isExpr());
657       Inst.addOperand(MCOperand::createExpr(Expr));
658     }
659   }
660 
661   static void printImmTy(raw_ostream& OS, ImmTy Type) {
662     switch (Type) {
663     case ImmTyNone: OS << "None"; break;
664     case ImmTyGDS: OS << "GDS"; break;
665     case ImmTyLDS: OS << "LDS"; break;
666     case ImmTyOffen: OS << "Offen"; break;
667     case ImmTyIdxen: OS << "Idxen"; break;
668     case ImmTyAddr64: OS << "Addr64"; break;
669     case ImmTyOffset: OS << "Offset"; break;
670     case ImmTyInstOffset: OS << "InstOffset"; break;
671     case ImmTyOffset0: OS << "Offset0"; break;
672     case ImmTyOffset1: OS << "Offset1"; break;
673     case ImmTyGLC: OS << "GLC"; break;
674     case ImmTySLC: OS << "SLC"; break;
675     case ImmTyTFE: OS << "TFE"; break;
676     case ImmTyD16: OS << "D16"; break;
677     case ImmTyFORMAT: OS << "FORMAT"; break;
678     case ImmTyClampSI: OS << "ClampSI"; break;
679     case ImmTyOModSI: OS << "OModSI"; break;
680     case ImmTyDppCtrl: OS << "DppCtrl"; break;
681     case ImmTyDppRowMask: OS << "DppRowMask"; break;
682     case ImmTyDppBankMask: OS << "DppBankMask"; break;
683     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
684     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
685     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
686     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
687     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
688     case ImmTyDMask: OS << "DMask"; break;
689     case ImmTyUNorm: OS << "UNorm"; break;
690     case ImmTyDA: OS << "DA"; break;
691     case ImmTyR128A16: OS << "R128A16"; break;
692     case ImmTyLWE: OS << "LWE"; break;
693     case ImmTyOff: OS << "Off"; break;
694     case ImmTyExpTgt: OS << "ExpTgt"; break;
695     case ImmTyExpCompr: OS << "ExpCompr"; break;
696     case ImmTyExpVM: OS << "ExpVM"; break;
697     case ImmTyHwreg: OS << "Hwreg"; break;
698     case ImmTySendMsg: OS << "SendMsg"; break;
699     case ImmTyInterpSlot: OS << "InterpSlot"; break;
700     case ImmTyInterpAttr: OS << "InterpAttr"; break;
701     case ImmTyAttrChan: OS << "AttrChan"; break;
702     case ImmTyOpSel: OS << "OpSel"; break;
703     case ImmTyOpSelHi: OS << "OpSelHi"; break;
704     case ImmTyNegLo: OS << "NegLo"; break;
705     case ImmTyNegHi: OS << "NegHi"; break;
706     case ImmTySwizzle: OS << "Swizzle"; break;
707     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
708     case ImmTyHigh: OS << "High"; break;
709     }
710   }
711 
712   void print(raw_ostream &OS) const override {
713     switch (Kind) {
714     case Register:
715       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
716       break;
717     case Immediate:
718       OS << '<' << getImm();
719       if (getImmTy() != ImmTyNone) {
720         OS << " type: "; printImmTy(OS, getImmTy());
721       }
722       OS << " mods: " << Imm.Mods << '>';
723       break;
724     case Token:
725       OS << '\'' << getToken() << '\'';
726       break;
727     case Expression:
728       OS << "<expr " << *Expr << '>';
729       break;
730     }
731   }
732 
733   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
734                                       int64_t Val, SMLoc Loc,
735                                       ImmTy Type = ImmTyNone,
736                                       bool IsFPImm = false) {
737     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
738     Op->Imm.Val = Val;
739     Op->Imm.IsFPImm = IsFPImm;
740     Op->Imm.Type = Type;
741     Op->Imm.Mods = Modifiers();
742     Op->StartLoc = Loc;
743     Op->EndLoc = Loc;
744     return Op;
745   }
746 
747   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
748                                         StringRef Str, SMLoc Loc,
749                                         bool HasExplicitEncodingSize = true) {
750     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
751     Res->Tok.Data = Str.data();
752     Res->Tok.Length = Str.size();
753     Res->StartLoc = Loc;
754     Res->EndLoc = Loc;
755     return Res;
756   }
757 
758   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
759                                       unsigned RegNo, SMLoc S,
760                                       SMLoc E,
761                                       bool ForceVOP3) {
762     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
763     Op->Reg.RegNo = RegNo;
764     Op->Reg.Mods = Modifiers();
765     Op->Reg.IsForcedVOP3 = ForceVOP3;
766     Op->StartLoc = S;
767     Op->EndLoc = E;
768     return Op;
769   }
770 
771   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
772                                        const class MCExpr *Expr, SMLoc S) {
773     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
774     Op->Expr = Expr;
775     Op->StartLoc = S;
776     Op->EndLoc = S;
777     return Op;
778   }
779 };
780 
781 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
782   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
783   return OS;
784 }
785 
786 //===----------------------------------------------------------------------===//
787 // AsmParser
788 //===----------------------------------------------------------------------===//
789 
790 // Holds info related to the current kernel, e.g. count of SGPRs used.
791 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
792 // .amdgpu_hsa_kernel or at EOF.
793 class KernelScopeInfo {
794   int SgprIndexUnusedMin = -1;
795   int VgprIndexUnusedMin = -1;
796   MCContext *Ctx = nullptr;
797 
798   void usesSgprAt(int i) {
799     if (i >= SgprIndexUnusedMin) {
800       SgprIndexUnusedMin = ++i;
801       if (Ctx) {
802         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
803         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
804       }
805     }
806   }
807 
808   void usesVgprAt(int i) {
809     if (i >= VgprIndexUnusedMin) {
810       VgprIndexUnusedMin = ++i;
811       if (Ctx) {
812         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
813         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
814       }
815     }
816   }
817 
818 public:
819   KernelScopeInfo() = default;
820 
821   void initialize(MCContext &Context) {
822     Ctx = &Context;
823     usesSgprAt(SgprIndexUnusedMin = -1);
824     usesVgprAt(VgprIndexUnusedMin = -1);
825   }
826 
827   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
828     switch (RegKind) {
829       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
830       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
831       default: break;
832     }
833   }
834 };
835 
836 class AMDGPUAsmParser : public MCTargetAsmParser {
837   MCAsmParser &Parser;
838 
839   // Number of extra operands parsed after the first optional operand.
840   // This may be necessary to skip hardcoded mandatory operands.
841   static const unsigned MAX_OPR_LOOKAHEAD = 8;
842 
843   unsigned ForcedEncodingSize = 0;
844   bool ForcedDPP = false;
845   bool ForcedSDWA = false;
846   KernelScopeInfo KernelScope;
847 
848   /// @name Auto-generated Match Functions
849   /// {
850 
851 #define GET_ASSEMBLER_HEADER
852 #include "AMDGPUGenAsmMatcher.inc"
853 
854   /// }
855 
856 private:
857   bool ParseAsAbsoluteExpression(uint32_t &Ret);
858   bool OutOfRangeError(SMRange Range);
859   /// Calculate VGPR/SGPR blocks required for given target, reserved
860   /// registers, and user-specified NextFreeXGPR values.
861   ///
862   /// \param Features [in] Target features, used for bug corrections.
863   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
864   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
865   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
866   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
867   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
868   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
869   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
870   /// \param VGPRBlocks [out] Result VGPR block count.
871   /// \param SGPRBlocks [out] Result SGPR block count.
872   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
873                           bool FlatScrUsed, bool XNACKUsed,
874                           unsigned NextFreeVGPR, SMRange VGPRRange,
875                           unsigned NextFreeSGPR, SMRange SGPRRange,
876                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
877   bool ParseDirectiveAMDGCNTarget();
878   bool ParseDirectiveAMDHSAKernel();
879   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
880   bool ParseDirectiveHSACodeObjectVersion();
881   bool ParseDirectiveHSACodeObjectISA();
882   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
883   bool ParseDirectiveAMDKernelCodeT();
884   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
885   bool ParseDirectiveAMDGPUHsaKernel();
886 
887   bool ParseDirectiveISAVersion();
888   bool ParseDirectiveHSAMetadata();
889   bool ParseDirectivePALMetadata();
890 
891   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
892                              RegisterKind RegKind, unsigned Reg1,
893                              unsigned RegNum);
894   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
895                            unsigned& RegNum, unsigned& RegWidth,
896                            unsigned *DwordRegIndex);
897   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
898   void initializeGprCountSymbol(RegisterKind RegKind);
899   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
900                              unsigned RegWidth);
901   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
902                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
903   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
904                  bool IsGdsHardcoded);
905 
906 public:
907   enum AMDGPUMatchResultTy {
908     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
909   };
910 
911   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
912 
913   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
914                const MCInstrInfo &MII,
915                const MCTargetOptions &Options)
916       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
917     MCAsmParserExtension::Initialize(Parser);
918 
919     if (getFeatureBits().none()) {
920       // Set default features.
921       copySTI().ToggleFeature("SOUTHERN_ISLANDS");
922     }
923 
924     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
925 
926     {
927       // TODO: make those pre-defined variables read-only.
928       // Currently there is none suitable machinery in the core llvm-mc for this.
929       // MCSymbol::isRedefinable is intended for another purpose, and
930       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
931       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
932       MCContext &Ctx = getContext();
933       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
934         MCSymbol *Sym =
935             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
936         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
937         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
938         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
939         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
940         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
941       } else {
942         MCSymbol *Sym =
943             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
944         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
945         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
946         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
947         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
948         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
949       }
950       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
951         initializeGprCountSymbol(IS_VGPR);
952         initializeGprCountSymbol(IS_SGPR);
953       } else
954         KernelScope.initialize(getContext());
955     }
956   }
957 
958   bool hasXNACK() const {
959     return AMDGPU::hasXNACK(getSTI());
960   }
961 
962   bool hasMIMG_R128() const {
963     return AMDGPU::hasMIMG_R128(getSTI());
964   }
965 
966   bool hasPackedD16() const {
967     return AMDGPU::hasPackedD16(getSTI());
968   }
969 
970   bool isSI() const {
971     return AMDGPU::isSI(getSTI());
972   }
973 
974   bool isCI() const {
975     return AMDGPU::isCI(getSTI());
976   }
977 
978   bool isVI() const {
979     return AMDGPU::isVI(getSTI());
980   }
981 
982   bool isGFX9() const {
983     return AMDGPU::isGFX9(getSTI());
984   }
985 
986   bool hasInv2PiInlineImm() const {
987     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
988   }
989 
990   bool hasFlatOffsets() const {
991     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
992   }
993 
994   bool hasSGPR102_SGPR103() const {
995     return !isVI();
996   }
997 
998   bool hasIntClamp() const {
999     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1000   }
1001 
1002   AMDGPUTargetStreamer &getTargetStreamer() {
1003     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1004     return static_cast<AMDGPUTargetStreamer &>(TS);
1005   }
1006 
1007   const MCRegisterInfo *getMRI() const {
1008     // We need this const_cast because for some reason getContext() is not const
1009     // in MCAsmParser.
1010     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1011   }
1012 
1013   const MCInstrInfo *getMII() const {
1014     return &MII;
1015   }
1016 
1017   const FeatureBitset &getFeatureBits() const {
1018     return getSTI().getFeatureBits();
1019   }
1020 
1021   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1022   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1023   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1024 
1025   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1026   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1027   bool isForcedDPP() const { return ForcedDPP; }
1028   bool isForcedSDWA() const { return ForcedSDWA; }
1029   ArrayRef<unsigned> getMatchedVariants() const;
1030 
1031   std::unique_ptr<AMDGPUOperand> parseRegister();
1032   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1033   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1034   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1035                                       unsigned Kind) override;
1036   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1037                                OperandVector &Operands, MCStreamer &Out,
1038                                uint64_t &ErrorInfo,
1039                                bool MatchingInlineAsm) override;
1040   bool ParseDirective(AsmToken DirectiveID) override;
1041   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
1042   StringRef parseMnemonicSuffix(StringRef Name);
1043   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1044                         SMLoc NameLoc, OperandVector &Operands) override;
1045   //bool ProcessInstruction(MCInst &Inst);
1046 
1047   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1048 
1049   OperandMatchResultTy
1050   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1051                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1052                      bool (*ConvertResult)(int64_t &) = nullptr);
1053 
1054   OperandMatchResultTy parseOperandArrayWithPrefix(
1055     const char *Prefix,
1056     OperandVector &Operands,
1057     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1058     bool (*ConvertResult)(int64_t&) = nullptr);
1059 
1060   OperandMatchResultTy
1061   parseNamedBit(const char *Name, OperandVector &Operands,
1062                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1063   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1064                                              StringRef &Value);
1065 
1066   bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
1067   OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
1068   OperandMatchResultTy parseReg(OperandVector &Operands);
1069   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
1070   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1071   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1072   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1073   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1074   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1075   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1076 
1077   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1078   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1079   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1080   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1081 
1082   bool parseCnt(int64_t &IntVal);
1083   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1084   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1085 
1086 private:
1087   struct OperandInfoTy {
1088     int64_t Id;
1089     bool IsSymbolic = false;
1090 
1091     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1092   };
1093 
1094   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1095   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1096 
1097   void errorExpTgt();
1098   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1099 
1100   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1101   bool validateSOPLiteral(const MCInst &Inst) const;
1102   bool validateConstantBusLimitations(const MCInst &Inst);
1103   bool validateEarlyClobberLimitations(const MCInst &Inst);
1104   bool validateIntClampSupported(const MCInst &Inst);
1105   bool validateMIMGAtomicDMask(const MCInst &Inst);
1106   bool validateMIMGGatherDMask(const MCInst &Inst);
1107   bool validateMIMGDataSize(const MCInst &Inst);
1108   bool validateMIMGD16(const MCInst &Inst);
1109   bool validateLdsDirect(const MCInst &Inst);
1110   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1111   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1112   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1113 
1114   bool trySkipId(const StringRef Id);
1115   bool trySkipToken(const AsmToken::TokenKind Kind);
1116   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1117   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1118   bool parseExpr(int64_t &Imm);
1119 
1120 public:
1121   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1122   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1123 
1124   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1125   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1126   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1127   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1128   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1129 
1130   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1131                             const unsigned MinVal,
1132                             const unsigned MaxVal,
1133                             const StringRef ErrMsg);
1134   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1135   bool parseSwizzleOffset(int64_t &Imm);
1136   bool parseSwizzleMacro(int64_t &Imm);
1137   bool parseSwizzleQuadPerm(int64_t &Imm);
1138   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1139   bool parseSwizzleBroadcast(int64_t &Imm);
1140   bool parseSwizzleSwap(int64_t &Imm);
1141   bool parseSwizzleReverse(int64_t &Imm);
1142 
1143   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1144   int64_t parseGPRIdxMacro();
1145 
1146   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1147   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1148   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1149   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1150   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1151 
1152   AMDGPUOperand::Ptr defaultGLC() const;
1153   AMDGPUOperand::Ptr defaultSLC() const;
1154 
1155   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1156   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1157   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1158   AMDGPUOperand::Ptr defaultOffsetU12() const;
1159   AMDGPUOperand::Ptr defaultOffsetS13() const;
1160 
1161   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1162 
1163   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1164                OptionalImmIndexMap &OptionalIdx);
1165   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1166   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1167   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1168 
1169   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1170 
1171   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1172                bool IsAtomic = false);
1173   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1174 
1175   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1176   AMDGPUOperand::Ptr defaultRowMask() const;
1177   AMDGPUOperand::Ptr defaultBankMask() const;
1178   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1179   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1180 
1181   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1182                                     AMDGPUOperand::ImmTy Type);
1183   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1184   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1185   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1186   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1187   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1188   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1189                 uint64_t BasicInstType, bool skipVcc = false);
1190 };
1191 
1192 struct OptionalOperand {
1193   const char *Name;
1194   AMDGPUOperand::ImmTy Type;
1195   bool IsBit;
1196   bool (*ConvertResult)(int64_t&);
1197 };
1198 
1199 } // end anonymous namespace
1200 
1201 // May be called with integer type with equivalent bitwidth.
1202 static const fltSemantics *getFltSemantics(unsigned Size) {
1203   switch (Size) {
1204   case 4:
1205     return &APFloat::IEEEsingle();
1206   case 8:
1207     return &APFloat::IEEEdouble();
1208   case 2:
1209     return &APFloat::IEEEhalf();
1210   default:
1211     llvm_unreachable("unsupported fp type");
1212   }
1213 }
1214 
1215 static const fltSemantics *getFltSemantics(MVT VT) {
1216   return getFltSemantics(VT.getSizeInBits() / 8);
1217 }
1218 
1219 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1220   switch (OperandType) {
1221   case AMDGPU::OPERAND_REG_IMM_INT32:
1222   case AMDGPU::OPERAND_REG_IMM_FP32:
1223   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1224   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1225     return &APFloat::IEEEsingle();
1226   case AMDGPU::OPERAND_REG_IMM_INT64:
1227   case AMDGPU::OPERAND_REG_IMM_FP64:
1228   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1229   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1230     return &APFloat::IEEEdouble();
1231   case AMDGPU::OPERAND_REG_IMM_INT16:
1232   case AMDGPU::OPERAND_REG_IMM_FP16:
1233   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1234   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1235   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1236   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1237     return &APFloat::IEEEhalf();
1238   default:
1239     llvm_unreachable("unsupported fp type");
1240   }
1241 }
1242 
1243 //===----------------------------------------------------------------------===//
1244 // Operand
1245 //===----------------------------------------------------------------------===//
1246 
1247 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1248   bool Lost;
1249 
1250   // Convert literal to single precision
1251   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1252                                                APFloat::rmNearestTiesToEven,
1253                                                &Lost);
1254   // We allow precision lost but not overflow or underflow
1255   if (Status != APFloat::opOK &&
1256       Lost &&
1257       ((Status & APFloat::opOverflow)  != 0 ||
1258        (Status & APFloat::opUnderflow) != 0)) {
1259     return false;
1260   }
1261 
1262   return true;
1263 }
1264 
1265 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1266   if (!isImmTy(ImmTyNone)) {
1267     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1268     return false;
1269   }
1270   // TODO: We should avoid using host float here. It would be better to
1271   // check the float bit values which is what a few other places do.
1272   // We've had bot failures before due to weird NaN support on mips hosts.
1273 
1274   APInt Literal(64, Imm.Val);
1275 
1276   if (Imm.IsFPImm) { // We got fp literal token
1277     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1278       return AMDGPU::isInlinableLiteral64(Imm.Val,
1279                                           AsmParser->hasInv2PiInlineImm());
1280     }
1281 
1282     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1283     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1284       return false;
1285 
1286     if (type.getScalarSizeInBits() == 16) {
1287       return AMDGPU::isInlinableLiteral16(
1288         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1289         AsmParser->hasInv2PiInlineImm());
1290     }
1291 
1292     // Check if single precision literal is inlinable
1293     return AMDGPU::isInlinableLiteral32(
1294       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1295       AsmParser->hasInv2PiInlineImm());
1296   }
1297 
1298   // We got int literal token.
1299   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1300     return AMDGPU::isInlinableLiteral64(Imm.Val,
1301                                         AsmParser->hasInv2PiInlineImm());
1302   }
1303 
1304   if (type.getScalarSizeInBits() == 16) {
1305     return AMDGPU::isInlinableLiteral16(
1306       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1307       AsmParser->hasInv2PiInlineImm());
1308   }
1309 
1310   return AMDGPU::isInlinableLiteral32(
1311     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1312     AsmParser->hasInv2PiInlineImm());
1313 }
1314 
1315 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1316   // Check that this immediate can be added as literal
1317   if (!isImmTy(ImmTyNone)) {
1318     return false;
1319   }
1320 
1321   if (!Imm.IsFPImm) {
1322     // We got int literal token.
1323 
1324     if (type == MVT::f64 && hasFPModifiers()) {
1325       // Cannot apply fp modifiers to int literals preserving the same semantics
1326       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1327       // disable these cases.
1328       return false;
1329     }
1330 
1331     unsigned Size = type.getSizeInBits();
1332     if (Size == 64)
1333       Size = 32;
1334 
1335     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1336     // types.
1337     return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
1338   }
1339 
1340   // We got fp literal token
1341   if (type == MVT::f64) { // Expected 64-bit fp operand
1342     // We would set low 64-bits of literal to zeroes but we accept this literals
1343     return true;
1344   }
1345 
1346   if (type == MVT::i64) { // Expected 64-bit int operand
1347     // We don't allow fp literals in 64-bit integer instructions. It is
1348     // unclear how we should encode them.
1349     return false;
1350   }
1351 
1352   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1353   return canLosslesslyConvertToFPType(FPLiteral, type);
1354 }
1355 
1356 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1357   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1358 }
1359 
1360 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1361   if (AsmParser->isVI())
1362     return isVReg32();
1363   else if (AsmParser->isGFX9())
1364     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1365   else
1366     return false;
1367 }
1368 
1369 bool AMDGPUOperand::isSDWAFP16Operand() const {
1370   return isSDWAOperand(MVT::f16);
1371 }
1372 
1373 bool AMDGPUOperand::isSDWAFP32Operand() const {
1374   return isSDWAOperand(MVT::f32);
1375 }
1376 
1377 bool AMDGPUOperand::isSDWAInt16Operand() const {
1378   return isSDWAOperand(MVT::i16);
1379 }
1380 
1381 bool AMDGPUOperand::isSDWAInt32Operand() const {
1382   return isSDWAOperand(MVT::i32);
1383 }
1384 
1385 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1386 {
1387   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1388   assert(Size == 2 || Size == 4 || Size == 8);
1389 
1390   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1391 
1392   if (Imm.Mods.Abs) {
1393     Val &= ~FpSignMask;
1394   }
1395   if (Imm.Mods.Neg) {
1396     Val ^= FpSignMask;
1397   }
1398 
1399   return Val;
1400 }
1401 
1402 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1403   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1404                              Inst.getNumOperands())) {
1405     addLiteralImmOperand(Inst, Imm.Val,
1406                          ApplyModifiers &
1407                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1408   } else {
1409     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1410     Inst.addOperand(MCOperand::createImm(Imm.Val));
1411   }
1412 }
1413 
1414 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1415   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1416   auto OpNum = Inst.getNumOperands();
1417   // Check that this operand accepts literals
1418   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1419 
1420   if (ApplyModifiers) {
1421     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1422     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1423     Val = applyInputFPModifiers(Val, Size);
1424   }
1425 
1426   APInt Literal(64, Val);
1427   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1428 
1429   if (Imm.IsFPImm) { // We got fp literal token
1430     switch (OpTy) {
1431     case AMDGPU::OPERAND_REG_IMM_INT64:
1432     case AMDGPU::OPERAND_REG_IMM_FP64:
1433     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1434     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1435       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1436                                        AsmParser->hasInv2PiInlineImm())) {
1437         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1438         return;
1439       }
1440 
1441       // Non-inlineable
1442       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1443         // For fp operands we check if low 32 bits are zeros
1444         if (Literal.getLoBits(32) != 0) {
1445           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1446           "Can't encode literal as exact 64-bit floating-point operand. "
1447           "Low 32-bits will be set to zero");
1448         }
1449 
1450         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1451         return;
1452       }
1453 
1454       // We don't allow fp literals in 64-bit integer instructions. It is
1455       // unclear how we should encode them. This case should be checked earlier
1456       // in predicate methods (isLiteralImm())
1457       llvm_unreachable("fp literal in 64-bit integer instruction.");
1458 
1459     case AMDGPU::OPERAND_REG_IMM_INT32:
1460     case AMDGPU::OPERAND_REG_IMM_FP32:
1461     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1462     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1463     case AMDGPU::OPERAND_REG_IMM_INT16:
1464     case AMDGPU::OPERAND_REG_IMM_FP16:
1465     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1466     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1467     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1468     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1469       bool lost;
1470       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1471       // Convert literal to single precision
1472       FPLiteral.convert(*getOpFltSemantics(OpTy),
1473                         APFloat::rmNearestTiesToEven, &lost);
1474       // We allow precision lost but not overflow or underflow. This should be
1475       // checked earlier in isLiteralImm()
1476 
1477       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1478       if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
1479           OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
1480         ImmVal |= (ImmVal << 16);
1481       }
1482 
1483       Inst.addOperand(MCOperand::createImm(ImmVal));
1484       return;
1485     }
1486     default:
1487       llvm_unreachable("invalid operand size");
1488     }
1489 
1490     return;
1491   }
1492 
1493    // We got int literal token.
1494   // Only sign extend inline immediates.
1495   // FIXME: No errors on truncation
1496   switch (OpTy) {
1497   case AMDGPU::OPERAND_REG_IMM_INT32:
1498   case AMDGPU::OPERAND_REG_IMM_FP32:
1499   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1500   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1501     if (isInt<32>(Val) &&
1502         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1503                                      AsmParser->hasInv2PiInlineImm())) {
1504       Inst.addOperand(MCOperand::createImm(Val));
1505       return;
1506     }
1507 
1508     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1509     return;
1510 
1511   case AMDGPU::OPERAND_REG_IMM_INT64:
1512   case AMDGPU::OPERAND_REG_IMM_FP64:
1513   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1514   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1515     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1516       Inst.addOperand(MCOperand::createImm(Val));
1517       return;
1518     }
1519 
1520     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1521     return;
1522 
1523   case AMDGPU::OPERAND_REG_IMM_INT16:
1524   case AMDGPU::OPERAND_REG_IMM_FP16:
1525   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1526   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1527     if (isInt<16>(Val) &&
1528         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1529                                      AsmParser->hasInv2PiInlineImm())) {
1530       Inst.addOperand(MCOperand::createImm(Val));
1531       return;
1532     }
1533 
1534     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1535     return;
1536 
1537   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1538   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1539     auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
1540     assert(AMDGPU::isInlinableLiteral16(LiteralVal,
1541                                         AsmParser->hasInv2PiInlineImm()));
1542 
1543     uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
1544                       static_cast<uint32_t>(LiteralVal);
1545     Inst.addOperand(MCOperand::createImm(ImmVal));
1546     return;
1547   }
1548   default:
1549     llvm_unreachable("invalid operand size");
1550   }
1551 }
1552 
1553 template <unsigned Bitwidth>
1554 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1555   APInt Literal(64, Imm.Val);
1556 
1557   if (!Imm.IsFPImm) {
1558     // We got int literal token.
1559     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1560     return;
1561   }
1562 
1563   bool Lost;
1564   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1565   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1566                     APFloat::rmNearestTiesToEven, &Lost);
1567   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1568 }
1569 
1570 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1571   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1572 }
1573 
1574 //===----------------------------------------------------------------------===//
1575 // AsmParser
1576 //===----------------------------------------------------------------------===//
1577 
1578 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1579   if (Is == IS_VGPR) {
1580     switch (RegWidth) {
1581       default: return -1;
1582       case 1: return AMDGPU::VGPR_32RegClassID;
1583       case 2: return AMDGPU::VReg_64RegClassID;
1584       case 3: return AMDGPU::VReg_96RegClassID;
1585       case 4: return AMDGPU::VReg_128RegClassID;
1586       case 8: return AMDGPU::VReg_256RegClassID;
1587       case 16: return AMDGPU::VReg_512RegClassID;
1588     }
1589   } else if (Is == IS_TTMP) {
1590     switch (RegWidth) {
1591       default: return -1;
1592       case 1: return AMDGPU::TTMP_32RegClassID;
1593       case 2: return AMDGPU::TTMP_64RegClassID;
1594       case 4: return AMDGPU::TTMP_128RegClassID;
1595       case 8: return AMDGPU::TTMP_256RegClassID;
1596       case 16: return AMDGPU::TTMP_512RegClassID;
1597     }
1598   } else if (Is == IS_SGPR) {
1599     switch (RegWidth) {
1600       default: return -1;
1601       case 1: return AMDGPU::SGPR_32RegClassID;
1602       case 2: return AMDGPU::SGPR_64RegClassID;
1603       case 4: return AMDGPU::SGPR_128RegClassID;
1604       case 8: return AMDGPU::SGPR_256RegClassID;
1605       case 16: return AMDGPU::SGPR_512RegClassID;
1606     }
1607   }
1608   return -1;
1609 }
1610 
1611 static unsigned getSpecialRegForName(StringRef RegName) {
1612   return StringSwitch<unsigned>(RegName)
1613     .Case("exec", AMDGPU::EXEC)
1614     .Case("vcc", AMDGPU::VCC)
1615     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1616     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1617     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1618     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1619     .Case("m0", AMDGPU::M0)
1620     .Case("scc", AMDGPU::SCC)
1621     .Case("tba", AMDGPU::TBA)
1622     .Case("tma", AMDGPU::TMA)
1623     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1624     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1625     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1626     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1627     .Case("vcc_lo", AMDGPU::VCC_LO)
1628     .Case("vcc_hi", AMDGPU::VCC_HI)
1629     .Case("exec_lo", AMDGPU::EXEC_LO)
1630     .Case("exec_hi", AMDGPU::EXEC_HI)
1631     .Case("tma_lo", AMDGPU::TMA_LO)
1632     .Case("tma_hi", AMDGPU::TMA_HI)
1633     .Case("tba_lo", AMDGPU::TBA_LO)
1634     .Case("tba_hi", AMDGPU::TBA_HI)
1635     .Default(0);
1636 }
1637 
1638 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1639                                     SMLoc &EndLoc) {
1640   auto R = parseRegister();
1641   if (!R) return true;
1642   assert(R->isReg());
1643   RegNo = R->getReg();
1644   StartLoc = R->getStartLoc();
1645   EndLoc = R->getEndLoc();
1646   return false;
1647 }
1648 
1649 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1650                                             RegisterKind RegKind, unsigned Reg1,
1651                                             unsigned RegNum) {
1652   switch (RegKind) {
1653   case IS_SPECIAL:
1654     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1655       Reg = AMDGPU::EXEC;
1656       RegWidth = 2;
1657       return true;
1658     }
1659     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1660       Reg = AMDGPU::FLAT_SCR;
1661       RegWidth = 2;
1662       return true;
1663     }
1664     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1665       Reg = AMDGPU::XNACK_MASK;
1666       RegWidth = 2;
1667       return true;
1668     }
1669     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1670       Reg = AMDGPU::VCC;
1671       RegWidth = 2;
1672       return true;
1673     }
1674     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1675       Reg = AMDGPU::TBA;
1676       RegWidth = 2;
1677       return true;
1678     }
1679     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1680       Reg = AMDGPU::TMA;
1681       RegWidth = 2;
1682       return true;
1683     }
1684     return false;
1685   case IS_VGPR:
1686   case IS_SGPR:
1687   case IS_TTMP:
1688     if (Reg1 != Reg + RegWidth) {
1689       return false;
1690     }
1691     RegWidth++;
1692     return true;
1693   default:
1694     llvm_unreachable("unexpected register kind");
1695   }
1696 }
1697 
1698 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1699                                           unsigned &RegNum, unsigned &RegWidth,
1700                                           unsigned *DwordRegIndex) {
1701   if (DwordRegIndex) { *DwordRegIndex = 0; }
1702   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1703   if (getLexer().is(AsmToken::Identifier)) {
1704     StringRef RegName = Parser.getTok().getString();
1705     if ((Reg = getSpecialRegForName(RegName))) {
1706       Parser.Lex();
1707       RegKind = IS_SPECIAL;
1708     } else {
1709       unsigned RegNumIndex = 0;
1710       if (RegName[0] == 'v') {
1711         RegNumIndex = 1;
1712         RegKind = IS_VGPR;
1713       } else if (RegName[0] == 's') {
1714         RegNumIndex = 1;
1715         RegKind = IS_SGPR;
1716       } else if (RegName.startswith("ttmp")) {
1717         RegNumIndex = strlen("ttmp");
1718         RegKind = IS_TTMP;
1719       } else {
1720         return false;
1721       }
1722       if (RegName.size() > RegNumIndex) {
1723         // Single 32-bit register: vXX.
1724         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1725           return false;
1726         Parser.Lex();
1727         RegWidth = 1;
1728       } else {
1729         // Range of registers: v[XX:YY]. ":YY" is optional.
1730         Parser.Lex();
1731         int64_t RegLo, RegHi;
1732         if (getLexer().isNot(AsmToken::LBrac))
1733           return false;
1734         Parser.Lex();
1735 
1736         if (getParser().parseAbsoluteExpression(RegLo))
1737           return false;
1738 
1739         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1740         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1741           return false;
1742         Parser.Lex();
1743 
1744         if (isRBrace) {
1745           RegHi = RegLo;
1746         } else {
1747           if (getParser().parseAbsoluteExpression(RegHi))
1748             return false;
1749 
1750           if (getLexer().isNot(AsmToken::RBrac))
1751             return false;
1752           Parser.Lex();
1753         }
1754         RegNum = (unsigned) RegLo;
1755         RegWidth = (RegHi - RegLo) + 1;
1756       }
1757     }
1758   } else if (getLexer().is(AsmToken::LBrac)) {
1759     // List of consecutive registers: [s0,s1,s2,s3]
1760     Parser.Lex();
1761     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1762       return false;
1763     if (RegWidth != 1)
1764       return false;
1765     RegisterKind RegKind1;
1766     unsigned Reg1, RegNum1, RegWidth1;
1767     do {
1768       if (getLexer().is(AsmToken::Comma)) {
1769         Parser.Lex();
1770       } else if (getLexer().is(AsmToken::RBrac)) {
1771         Parser.Lex();
1772         break;
1773       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1774         if (RegWidth1 != 1) {
1775           return false;
1776         }
1777         if (RegKind1 != RegKind) {
1778           return false;
1779         }
1780         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1781           return false;
1782         }
1783       } else {
1784         return false;
1785       }
1786     } while (true);
1787   } else {
1788     return false;
1789   }
1790   switch (RegKind) {
1791   case IS_SPECIAL:
1792     RegNum = 0;
1793     RegWidth = 1;
1794     break;
1795   case IS_VGPR:
1796   case IS_SGPR:
1797   case IS_TTMP:
1798   {
1799     unsigned Size = 1;
1800     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1801       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1802       Size = std::min(RegWidth, 4u);
1803     }
1804     if (RegNum % Size != 0)
1805       return false;
1806     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1807     RegNum = RegNum / Size;
1808     int RCID = getRegClass(RegKind, RegWidth);
1809     if (RCID == -1)
1810       return false;
1811     const MCRegisterClass RC = TRI->getRegClass(RCID);
1812     if (RegNum >= RC.getNumRegs())
1813       return false;
1814     Reg = RC.getRegister(RegNum);
1815     break;
1816   }
1817 
1818   default:
1819     llvm_unreachable("unexpected register kind");
1820   }
1821 
1822   if (!subtargetHasRegister(*TRI, Reg))
1823     return false;
1824   return true;
1825 }
1826 
1827 Optional<StringRef>
1828 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1829   switch (RegKind) {
1830   case IS_VGPR:
1831     return StringRef(".amdgcn.next_free_vgpr");
1832   case IS_SGPR:
1833     return StringRef(".amdgcn.next_free_sgpr");
1834   default:
1835     return None;
1836   }
1837 }
1838 
1839 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1840   auto SymbolName = getGprCountSymbolName(RegKind);
1841   assert(SymbolName && "initializing invalid register kind");
1842   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1843   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1844 }
1845 
1846 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1847                                             unsigned DwordRegIndex,
1848                                             unsigned RegWidth) {
1849   // Symbols are only defined for GCN targets
1850   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
1851     return true;
1852 
1853   auto SymbolName = getGprCountSymbolName(RegKind);
1854   if (!SymbolName)
1855     return true;
1856   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1857 
1858   int64_t NewMax = DwordRegIndex + RegWidth - 1;
1859   int64_t OldCount;
1860 
1861   if (!Sym->isVariable())
1862     return !Error(getParser().getTok().getLoc(),
1863                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
1864   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
1865     return !Error(
1866         getParser().getTok().getLoc(),
1867         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
1868 
1869   if (OldCount <= NewMax)
1870     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
1871 
1872   return true;
1873 }
1874 
1875 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1876   const auto &Tok = Parser.getTok();
1877   SMLoc StartLoc = Tok.getLoc();
1878   SMLoc EndLoc = Tok.getEndLoc();
1879   RegisterKind RegKind;
1880   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1881 
1882   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1883     return nullptr;
1884   }
1885   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1886     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
1887       return nullptr;
1888   } else
1889     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
1890   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
1891 }
1892 
1893 bool
1894 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
1895   if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
1896       (getLexer().getKind() == AsmToken::Integer ||
1897        getLexer().getKind() == AsmToken::Real)) {
1898     // This is a workaround for handling operands like these:
1899     //     |1.0|
1900     //     |-1|
1901     // This syntax is not compatible with syntax of standard
1902     // MC expressions (due to the trailing '|').
1903 
1904     SMLoc EndLoc;
1905     const MCExpr *Expr;
1906 
1907     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
1908       return true;
1909     }
1910 
1911     return !Expr->evaluateAsAbsolute(Val);
1912   }
1913 
1914   return getParser().parseAbsoluteExpression(Val);
1915 }
1916 
1917 OperandMatchResultTy
1918 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
1919   // TODO: add syntactic sugar for 1/(2*PI)
1920   bool Minus = false;
1921   if (getLexer().getKind() == AsmToken::Minus) {
1922     const AsmToken NextToken = getLexer().peekTok();
1923     if (!NextToken.is(AsmToken::Integer) &&
1924         !NextToken.is(AsmToken::Real)) {
1925         return MatchOperand_NoMatch;
1926     }
1927     Minus = true;
1928     Parser.Lex();
1929   }
1930 
1931   SMLoc S = Parser.getTok().getLoc();
1932   switch(getLexer().getKind()) {
1933   case AsmToken::Integer: {
1934     int64_t IntVal;
1935     if (parseAbsoluteExpr(IntVal, AbsMod))
1936       return MatchOperand_ParseFail;
1937     if (Minus)
1938       IntVal *= -1;
1939     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
1940     return MatchOperand_Success;
1941   }
1942   case AsmToken::Real: {
1943     int64_t IntVal;
1944     if (parseAbsoluteExpr(IntVal, AbsMod))
1945       return MatchOperand_ParseFail;
1946 
1947     APFloat F(BitsToDouble(IntVal));
1948     if (Minus)
1949       F.changeSign();
1950     Operands.push_back(
1951         AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
1952                                  AMDGPUOperand::ImmTyNone, true));
1953     return MatchOperand_Success;
1954   }
1955   default:
1956     return MatchOperand_NoMatch;
1957   }
1958 }
1959 
1960 OperandMatchResultTy
1961 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
1962   if (auto R = parseRegister()) {
1963     assert(R->isReg());
1964     R->Reg.IsForcedVOP3 = isForcedVOP3();
1965     Operands.push_back(std::move(R));
1966     return MatchOperand_Success;
1967   }
1968   return MatchOperand_NoMatch;
1969 }
1970 
1971 OperandMatchResultTy
1972 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
1973   auto res = parseImm(Operands, AbsMod);
1974   if (res != MatchOperand_NoMatch) {
1975     return res;
1976   }
1977 
1978   return parseReg(Operands);
1979 }
1980 
1981 OperandMatchResultTy
1982 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
1983                                               bool AllowImm) {
1984   bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
1985 
1986   if (getLexer().getKind()== AsmToken::Minus) {
1987     const AsmToken NextToken = getLexer().peekTok();
1988 
1989     // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
1990     if (NextToken.is(AsmToken::Minus)) {
1991       Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
1992       return MatchOperand_ParseFail;
1993     }
1994 
1995     // '-' followed by an integer literal N should be interpreted as integer
1996     // negation rather than a floating-point NEG modifier applied to N.
1997     // Beside being contr-intuitive, such use of floating-point NEG modifier
1998     // results in different meaning of integer literals used with VOP1/2/C
1999     // and VOP3, for example:
2000     //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2001     //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2002     // Negative fp literals should be handled likewise for unifomtity
2003     if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
2004       Parser.Lex();
2005       Negate = true;
2006     }
2007   }
2008 
2009   if (getLexer().getKind() == AsmToken::Identifier &&
2010       Parser.getTok().getString() == "neg") {
2011     if (Negate) {
2012       Error(Parser.getTok().getLoc(), "expected register or immediate");
2013       return MatchOperand_ParseFail;
2014     }
2015     Parser.Lex();
2016     Negate2 = true;
2017     if (getLexer().isNot(AsmToken::LParen)) {
2018       Error(Parser.getTok().getLoc(), "expected left paren after neg");
2019       return MatchOperand_ParseFail;
2020     }
2021     Parser.Lex();
2022   }
2023 
2024   if (getLexer().getKind() == AsmToken::Identifier &&
2025       Parser.getTok().getString() == "abs") {
2026     Parser.Lex();
2027     Abs2 = true;
2028     if (getLexer().isNot(AsmToken::LParen)) {
2029       Error(Parser.getTok().getLoc(), "expected left paren after abs");
2030       return MatchOperand_ParseFail;
2031     }
2032     Parser.Lex();
2033   }
2034 
2035   if (getLexer().getKind() == AsmToken::Pipe) {
2036     if (Abs2) {
2037       Error(Parser.getTok().getLoc(), "expected register or immediate");
2038       return MatchOperand_ParseFail;
2039     }
2040     Parser.Lex();
2041     Abs = true;
2042   }
2043 
2044   OperandMatchResultTy Res;
2045   if (AllowImm) {
2046     Res = parseRegOrImm(Operands, Abs);
2047   } else {
2048     Res = parseReg(Operands);
2049   }
2050   if (Res != MatchOperand_Success) {
2051     return Res;
2052   }
2053 
2054   AMDGPUOperand::Modifiers Mods;
2055   if (Abs) {
2056     if (getLexer().getKind() != AsmToken::Pipe) {
2057       Error(Parser.getTok().getLoc(), "expected vertical bar");
2058       return MatchOperand_ParseFail;
2059     }
2060     Parser.Lex();
2061     Mods.Abs = true;
2062   }
2063   if (Abs2) {
2064     if (getLexer().isNot(AsmToken::RParen)) {
2065       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2066       return MatchOperand_ParseFail;
2067     }
2068     Parser.Lex();
2069     Mods.Abs = true;
2070   }
2071 
2072   if (Negate) {
2073     Mods.Neg = true;
2074   } else if (Negate2) {
2075     if (getLexer().isNot(AsmToken::RParen)) {
2076       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2077       return MatchOperand_ParseFail;
2078     }
2079     Parser.Lex();
2080     Mods.Neg = true;
2081   }
2082 
2083   if (Mods.hasFPModifiers()) {
2084     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2085     Op.setModifiers(Mods);
2086   }
2087   return MatchOperand_Success;
2088 }
2089 
2090 OperandMatchResultTy
2091 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2092                                                bool AllowImm) {
2093   bool Sext = false;
2094 
2095   if (getLexer().getKind() == AsmToken::Identifier &&
2096       Parser.getTok().getString() == "sext") {
2097     Parser.Lex();
2098     Sext = true;
2099     if (getLexer().isNot(AsmToken::LParen)) {
2100       Error(Parser.getTok().getLoc(), "expected left paren after sext");
2101       return MatchOperand_ParseFail;
2102     }
2103     Parser.Lex();
2104   }
2105 
2106   OperandMatchResultTy Res;
2107   if (AllowImm) {
2108     Res = parseRegOrImm(Operands);
2109   } else {
2110     Res = parseReg(Operands);
2111   }
2112   if (Res != MatchOperand_Success) {
2113     return Res;
2114   }
2115 
2116   AMDGPUOperand::Modifiers Mods;
2117   if (Sext) {
2118     if (getLexer().isNot(AsmToken::RParen)) {
2119       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2120       return MatchOperand_ParseFail;
2121     }
2122     Parser.Lex();
2123     Mods.Sext = true;
2124   }
2125 
2126   if (Mods.hasIntModifiers()) {
2127     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2128     Op.setModifiers(Mods);
2129   }
2130 
2131   return MatchOperand_Success;
2132 }
2133 
2134 OperandMatchResultTy
2135 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2136   return parseRegOrImmWithFPInputMods(Operands, false);
2137 }
2138 
2139 OperandMatchResultTy
2140 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2141   return parseRegOrImmWithIntInputMods(Operands, false);
2142 }
2143 
2144 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2145   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2146   if (Reg) {
2147     Operands.push_back(std::move(Reg));
2148     return MatchOperand_Success;
2149   }
2150 
2151   const AsmToken &Tok = Parser.getTok();
2152   if (Tok.getString() == "off") {
2153     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
2154                                                 AMDGPUOperand::ImmTyOff, false));
2155     Parser.Lex();
2156     return MatchOperand_Success;
2157   }
2158 
2159   return MatchOperand_NoMatch;
2160 }
2161 
2162 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2163   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2164 
2165   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2166       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2167       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2168       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2169     return Match_InvalidOperand;
2170 
2171   if ((TSFlags & SIInstrFlags::VOP3) &&
2172       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2173       getForcedEncodingSize() != 64)
2174     return Match_PreferE32;
2175 
2176   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2177       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2178     // v_mac_f32/16 allow only dst_sel == DWORD;
2179     auto OpNum =
2180         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2181     const auto &Op = Inst.getOperand(OpNum);
2182     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2183       return Match_InvalidOperand;
2184     }
2185   }
2186 
2187   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2188     // FIXME: Produces error without correct column reported.
2189     auto OpNum =
2190         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2191     const auto &Op = Inst.getOperand(OpNum);
2192     if (Op.getImm() != 0)
2193       return Match_InvalidOperand;
2194   }
2195 
2196   return Match_Success;
2197 }
2198 
2199 // What asm variants we should check
2200 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2201   if (getForcedEncodingSize() == 32) {
2202     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2203     return makeArrayRef(Variants);
2204   }
2205 
2206   if (isForcedVOP3()) {
2207     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2208     return makeArrayRef(Variants);
2209   }
2210 
2211   if (isForcedSDWA()) {
2212     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2213                                         AMDGPUAsmVariants::SDWA9};
2214     return makeArrayRef(Variants);
2215   }
2216 
2217   if (isForcedDPP()) {
2218     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2219     return makeArrayRef(Variants);
2220   }
2221 
2222   static const unsigned Variants[] = {
2223     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2224     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2225   };
2226 
2227   return makeArrayRef(Variants);
2228 }
2229 
2230 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2231   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2232   const unsigned Num = Desc.getNumImplicitUses();
2233   for (unsigned i = 0; i < Num; ++i) {
2234     unsigned Reg = Desc.ImplicitUses[i];
2235     switch (Reg) {
2236     case AMDGPU::FLAT_SCR:
2237     case AMDGPU::VCC:
2238     case AMDGPU::M0:
2239       return Reg;
2240     default:
2241       break;
2242     }
2243   }
2244   return AMDGPU::NoRegister;
2245 }
2246 
2247 // NB: This code is correct only when used to check constant
2248 // bus limitations because GFX7 support no f16 inline constants.
2249 // Note that there are no cases when a GFX7 opcode violates
2250 // constant bus limitations due to the use of an f16 constant.
2251 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2252                                        unsigned OpIdx) const {
2253   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2254 
2255   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2256     return false;
2257   }
2258 
2259   const MCOperand &MO = Inst.getOperand(OpIdx);
2260 
2261   int64_t Val = MO.getImm();
2262   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2263 
2264   switch (OpSize) { // expected operand size
2265   case 8:
2266     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2267   case 4:
2268     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2269   case 2: {
2270     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2271     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2272         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2273       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2274     } else {
2275       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2276     }
2277   }
2278   default:
2279     llvm_unreachable("invalid operand size");
2280   }
2281 }
2282 
2283 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2284   const MCOperand &MO = Inst.getOperand(OpIdx);
2285   if (MO.isImm()) {
2286     return !isInlineConstant(Inst, OpIdx);
2287   }
2288   return !MO.isReg() ||
2289          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2290 }
2291 
2292 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2293   const unsigned Opcode = Inst.getOpcode();
2294   const MCInstrDesc &Desc = MII.get(Opcode);
2295   unsigned ConstantBusUseCount = 0;
2296 
2297   if (Desc.TSFlags &
2298       (SIInstrFlags::VOPC |
2299        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2300        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2301        SIInstrFlags::SDWA)) {
2302     // Check special imm operands (used by madmk, etc)
2303     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2304       ++ConstantBusUseCount;
2305     }
2306 
2307     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2308     if (SGPRUsed != AMDGPU::NoRegister) {
2309       ++ConstantBusUseCount;
2310     }
2311 
2312     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2313     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2314     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2315 
2316     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2317 
2318     for (int OpIdx : OpIndices) {
2319       if (OpIdx == -1) break;
2320 
2321       const MCOperand &MO = Inst.getOperand(OpIdx);
2322       if (usesConstantBus(Inst, OpIdx)) {
2323         if (MO.isReg()) {
2324           const unsigned Reg = mc2PseudoReg(MO.getReg());
2325           // Pairs of registers with a partial intersections like these
2326           //   s0, s[0:1]
2327           //   flat_scratch_lo, flat_scratch
2328           //   flat_scratch_lo, flat_scratch_hi
2329           // are theoretically valid but they are disabled anyway.
2330           // Note that this code mimics SIInstrInfo::verifyInstruction
2331           if (Reg != SGPRUsed) {
2332             ++ConstantBusUseCount;
2333           }
2334           SGPRUsed = Reg;
2335         } else { // Expression or a literal
2336           ++ConstantBusUseCount;
2337         }
2338       }
2339     }
2340   }
2341 
2342   return ConstantBusUseCount <= 1;
2343 }
2344 
2345 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2346   const unsigned Opcode = Inst.getOpcode();
2347   const MCInstrDesc &Desc = MII.get(Opcode);
2348 
2349   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2350   if (DstIdx == -1 ||
2351       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2352     return true;
2353   }
2354 
2355   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2356 
2357   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2358   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2359   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2360 
2361   assert(DstIdx != -1);
2362   const MCOperand &Dst = Inst.getOperand(DstIdx);
2363   assert(Dst.isReg());
2364   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2365 
2366   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2367 
2368   for (int SrcIdx : SrcIndices) {
2369     if (SrcIdx == -1) break;
2370     const MCOperand &Src = Inst.getOperand(SrcIdx);
2371     if (Src.isReg()) {
2372       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2373       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2374         return false;
2375       }
2376     }
2377   }
2378 
2379   return true;
2380 }
2381 
2382 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2383 
2384   const unsigned Opc = Inst.getOpcode();
2385   const MCInstrDesc &Desc = MII.get(Opc);
2386 
2387   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2388     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2389     assert(ClampIdx != -1);
2390     return Inst.getOperand(ClampIdx).getImm() == 0;
2391   }
2392 
2393   return true;
2394 }
2395 
2396 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2397 
2398   const unsigned Opc = Inst.getOpcode();
2399   const MCInstrDesc &Desc = MII.get(Opc);
2400 
2401   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2402     return true;
2403 
2404   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2405   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2406   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2407 
2408   assert(VDataIdx != -1);
2409   assert(DMaskIdx != -1);
2410   assert(TFEIdx != -1);
2411 
2412   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2413   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2414   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2415   if (DMask == 0)
2416     DMask = 1;
2417 
2418   unsigned DataSize =
2419     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2420   if (hasPackedD16()) {
2421     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2422     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2423       DataSize = (DataSize + 1) / 2;
2424   }
2425 
2426   return (VDataSize / 4) == DataSize + TFESize;
2427 }
2428 
2429 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2430 
2431   const unsigned Opc = Inst.getOpcode();
2432   const MCInstrDesc &Desc = MII.get(Opc);
2433 
2434   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2435     return true;
2436   if (!Desc.mayLoad() || !Desc.mayStore())
2437     return true; // Not atomic
2438 
2439   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2440   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2441 
2442   // This is an incomplete check because image_atomic_cmpswap
2443   // may only use 0x3 and 0xf while other atomic operations
2444   // may use 0x1 and 0x3. However these limitations are
2445   // verified when we check that dmask matches dst size.
2446   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2447 }
2448 
2449 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2450 
2451   const unsigned Opc = Inst.getOpcode();
2452   const MCInstrDesc &Desc = MII.get(Opc);
2453 
2454   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2455     return true;
2456 
2457   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2458   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2459 
2460   // GATHER4 instructions use dmask in a different fashion compared to
2461   // other MIMG instructions. The only useful DMASK values are
2462   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2463   // (red,red,red,red) etc.) The ISA document doesn't mention
2464   // this.
2465   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2466 }
2467 
2468 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2469 
2470   const unsigned Opc = Inst.getOpcode();
2471   const MCInstrDesc &Desc = MII.get(Opc);
2472 
2473   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2474     return true;
2475 
2476   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2477   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2478     if (isCI() || isSI())
2479       return false;
2480   }
2481 
2482   return true;
2483 }
2484 
2485 static bool IsRevOpcode(const unsigned Opcode)
2486 {
2487   switch (Opcode) {
2488   case AMDGPU::V_SUBREV_F32_e32:
2489   case AMDGPU::V_SUBREV_F32_e64:
2490   case AMDGPU::V_SUBREV_F32_e32_si:
2491   case AMDGPU::V_SUBREV_F32_e32_vi:
2492   case AMDGPU::V_SUBREV_F32_e64_si:
2493   case AMDGPU::V_SUBREV_F32_e64_vi:
2494   case AMDGPU::V_SUBREV_I32_e32:
2495   case AMDGPU::V_SUBREV_I32_e64:
2496   case AMDGPU::V_SUBREV_I32_e32_si:
2497   case AMDGPU::V_SUBREV_I32_e64_si:
2498   case AMDGPU::V_SUBBREV_U32_e32:
2499   case AMDGPU::V_SUBBREV_U32_e64:
2500   case AMDGPU::V_SUBBREV_U32_e32_si:
2501   case AMDGPU::V_SUBBREV_U32_e32_vi:
2502   case AMDGPU::V_SUBBREV_U32_e64_si:
2503   case AMDGPU::V_SUBBREV_U32_e64_vi:
2504   case AMDGPU::V_SUBREV_U32_e32:
2505   case AMDGPU::V_SUBREV_U32_e64:
2506   case AMDGPU::V_SUBREV_U32_e32_gfx9:
2507   case AMDGPU::V_SUBREV_U32_e32_vi:
2508   case AMDGPU::V_SUBREV_U32_e64_gfx9:
2509   case AMDGPU::V_SUBREV_U32_e64_vi:
2510   case AMDGPU::V_SUBREV_F16_e32:
2511   case AMDGPU::V_SUBREV_F16_e64:
2512   case AMDGPU::V_SUBREV_F16_e32_vi:
2513   case AMDGPU::V_SUBREV_F16_e64_vi:
2514   case AMDGPU::V_SUBREV_U16_e32:
2515   case AMDGPU::V_SUBREV_U16_e64:
2516   case AMDGPU::V_SUBREV_U16_e32_vi:
2517   case AMDGPU::V_SUBREV_U16_e64_vi:
2518   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
2519   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
2520   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
2521   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
2522   case AMDGPU::V_LSHLREV_B32_e32_si:
2523   case AMDGPU::V_LSHLREV_B32_e64_si:
2524   case AMDGPU::V_LSHLREV_B16_e32_vi:
2525   case AMDGPU::V_LSHLREV_B16_e64_vi:
2526   case AMDGPU::V_LSHLREV_B32_e32_vi:
2527   case AMDGPU::V_LSHLREV_B32_e64_vi:
2528   case AMDGPU::V_LSHLREV_B64_vi:
2529   case AMDGPU::V_LSHRREV_B32_e32_si:
2530   case AMDGPU::V_LSHRREV_B32_e64_si:
2531   case AMDGPU::V_LSHRREV_B16_e32_vi:
2532   case AMDGPU::V_LSHRREV_B16_e64_vi:
2533   case AMDGPU::V_LSHRREV_B32_e32_vi:
2534   case AMDGPU::V_LSHRREV_B32_e64_vi:
2535   case AMDGPU::V_LSHRREV_B64_vi:
2536   case AMDGPU::V_ASHRREV_I32_e64_si:
2537   case AMDGPU::V_ASHRREV_I32_e32_si:
2538   case AMDGPU::V_ASHRREV_I16_e32_vi:
2539   case AMDGPU::V_ASHRREV_I16_e64_vi:
2540   case AMDGPU::V_ASHRREV_I32_e32_vi:
2541   case AMDGPU::V_ASHRREV_I32_e64_vi:
2542   case AMDGPU::V_ASHRREV_I64_vi:
2543   case AMDGPU::V_PK_LSHLREV_B16_vi:
2544   case AMDGPU::V_PK_LSHRREV_B16_vi:
2545   case AMDGPU::V_PK_ASHRREV_I16_vi:
2546     return true;
2547   default:
2548     return false;
2549   }
2550 }
2551 
2552 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2553 
2554   using namespace SIInstrFlags;
2555   const unsigned Opcode = Inst.getOpcode();
2556   const MCInstrDesc &Desc = MII.get(Opcode);
2557 
2558   // lds_direct register is defined so that it can be used
2559   // with 9-bit operands only. Ignore encodings which do not accept these.
2560   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2561     return true;
2562 
2563   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2564   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2565   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2566 
2567   const int SrcIndices[] = { Src1Idx, Src2Idx };
2568 
2569   // lds_direct cannot be specified as either src1 or src2.
2570   for (int SrcIdx : SrcIndices) {
2571     if (SrcIdx == -1) break;
2572     const MCOperand &Src = Inst.getOperand(SrcIdx);
2573     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2574       return false;
2575     }
2576   }
2577 
2578   if (Src0Idx == -1)
2579     return true;
2580 
2581   const MCOperand &Src = Inst.getOperand(Src0Idx);
2582   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
2583     return true;
2584 
2585   // lds_direct is specified as src0. Check additional limitations.
2586   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
2587 }
2588 
2589 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
2590   unsigned Opcode = Inst.getOpcode();
2591   const MCInstrDesc &Desc = MII.get(Opcode);
2592   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
2593     return true;
2594 
2595   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2596   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2597 
2598   const int OpIndices[] = { Src0Idx, Src1Idx };
2599 
2600   unsigned NumLiterals = 0;
2601   uint32_t LiteralValue;
2602 
2603   for (int OpIdx : OpIndices) {
2604     if (OpIdx == -1) break;
2605 
2606     const MCOperand &MO = Inst.getOperand(OpIdx);
2607     if (MO.isImm() &&
2608         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
2609         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
2610         !isInlineConstant(Inst, OpIdx)) {
2611       uint32_t Value = static_cast<uint32_t>(MO.getImm());
2612       if (NumLiterals == 0 || LiteralValue != Value) {
2613         LiteralValue = Value;
2614         ++NumLiterals;
2615       }
2616     }
2617   }
2618 
2619   return NumLiterals <= 1;
2620 }
2621 
2622 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2623                                           const SMLoc &IDLoc) {
2624   if (!validateLdsDirect(Inst)) {
2625     Error(IDLoc,
2626       "invalid use of lds_direct");
2627     return false;
2628   }
2629   if (!validateSOPLiteral(Inst)) {
2630     Error(IDLoc,
2631       "only one literal operand is allowed");
2632     return false;
2633   }
2634   if (!validateConstantBusLimitations(Inst)) {
2635     Error(IDLoc,
2636       "invalid operand (violates constant bus restrictions)");
2637     return false;
2638   }
2639   if (!validateEarlyClobberLimitations(Inst)) {
2640     Error(IDLoc,
2641       "destination must be different than all sources");
2642     return false;
2643   }
2644   if (!validateIntClampSupported(Inst)) {
2645     Error(IDLoc,
2646       "integer clamping is not supported on this GPU");
2647     return false;
2648   }
2649   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
2650   if (!validateMIMGD16(Inst)) {
2651     Error(IDLoc,
2652       "d16 modifier is not supported on this GPU");
2653     return false;
2654   }
2655   if (!validateMIMGDataSize(Inst)) {
2656     Error(IDLoc,
2657       "image data size does not match dmask and tfe");
2658     return false;
2659   }
2660   if (!validateMIMGAtomicDMask(Inst)) {
2661     Error(IDLoc,
2662       "invalid atomic image dmask");
2663     return false;
2664   }
2665   if (!validateMIMGGatherDMask(Inst)) {
2666     Error(IDLoc,
2667       "invalid image_gather dmask: only one bit must be set");
2668     return false;
2669   }
2670 
2671   return true;
2672 }
2673 
2674 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
2675                                             const FeatureBitset &FBS,
2676                                             unsigned VariantID = 0);
2677 
2678 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2679                                               OperandVector &Operands,
2680                                               MCStreamer &Out,
2681                                               uint64_t &ErrorInfo,
2682                                               bool MatchingInlineAsm) {
2683   MCInst Inst;
2684   unsigned Result = Match_Success;
2685   for (auto Variant : getMatchedVariants()) {
2686     uint64_t EI;
2687     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2688                                   Variant);
2689     // We order match statuses from least to most specific. We use most specific
2690     // status as resulting
2691     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2692     if ((R == Match_Success) ||
2693         (R == Match_PreferE32) ||
2694         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2695         (R == Match_InvalidOperand && Result != Match_MissingFeature
2696                                    && Result != Match_PreferE32) ||
2697         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2698                                    && Result != Match_MissingFeature
2699                                    && Result != Match_PreferE32)) {
2700       Result = R;
2701       ErrorInfo = EI;
2702     }
2703     if (R == Match_Success)
2704       break;
2705   }
2706 
2707   switch (Result) {
2708   default: break;
2709   case Match_Success:
2710     if (!validateInstruction(Inst, IDLoc)) {
2711       return true;
2712     }
2713     Inst.setLoc(IDLoc);
2714     Out.EmitInstruction(Inst, getSTI());
2715     return false;
2716 
2717   case Match_MissingFeature:
2718     return Error(IDLoc, "instruction not supported on this GPU");
2719 
2720   case Match_MnemonicFail: {
2721     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
2722     std::string Suggestion = AMDGPUMnemonicSpellCheck(
2723         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
2724     return Error(IDLoc, "invalid instruction" + Suggestion,
2725                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
2726   }
2727 
2728   case Match_InvalidOperand: {
2729     SMLoc ErrorLoc = IDLoc;
2730     if (ErrorInfo != ~0ULL) {
2731       if (ErrorInfo >= Operands.size()) {
2732         return Error(IDLoc, "too few operands for instruction");
2733       }
2734       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2735       if (ErrorLoc == SMLoc())
2736         ErrorLoc = IDLoc;
2737     }
2738     return Error(ErrorLoc, "invalid operand for instruction");
2739   }
2740 
2741   case Match_PreferE32:
2742     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2743                         "should be encoded as e32");
2744   }
2745   llvm_unreachable("Implement any new match types added!");
2746 }
2747 
2748 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2749   int64_t Tmp = -1;
2750   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2751     return true;
2752   }
2753   if (getParser().parseAbsoluteExpression(Tmp)) {
2754     return true;
2755   }
2756   Ret = static_cast<uint32_t>(Tmp);
2757   return false;
2758 }
2759 
2760 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2761                                                uint32_t &Minor) {
2762   if (ParseAsAbsoluteExpression(Major))
2763     return TokError("invalid major version");
2764 
2765   if (getLexer().isNot(AsmToken::Comma))
2766     return TokError("minor version number required, comma expected");
2767   Lex();
2768 
2769   if (ParseAsAbsoluteExpression(Minor))
2770     return TokError("invalid minor version");
2771 
2772   return false;
2773 }
2774 
2775 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
2776   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2777     return TokError("directive only supported for amdgcn architecture");
2778 
2779   std::string Target;
2780 
2781   SMLoc TargetStart = getTok().getLoc();
2782   if (getParser().parseEscapedString(Target))
2783     return true;
2784   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
2785 
2786   std::string ExpectedTarget;
2787   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
2788   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
2789 
2790   if (Target != ExpectedTargetOS.str())
2791     return getParser().Error(TargetRange.Start, "target must match options",
2792                              TargetRange);
2793 
2794   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
2795   return false;
2796 }
2797 
2798 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
2799   return getParser().Error(Range.Start, "value out of range", Range);
2800 }
2801 
2802 bool AMDGPUAsmParser::calculateGPRBlocks(
2803     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
2804     bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
2805     unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
2806     unsigned &SGPRBlocks) {
2807   // TODO(scott.linder): These calculations are duplicated from
2808   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
2809   IsaVersion Version = getIsaVersion(getSTI().getCPU());
2810 
2811   unsigned NumVGPRs = NextFreeVGPR;
2812   unsigned NumSGPRs = NextFreeSGPR;
2813   unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
2814 
2815   if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
2816       NumSGPRs > MaxAddressableNumSGPRs)
2817     return OutOfRangeError(SGPRRange);
2818 
2819   NumSGPRs +=
2820       IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
2821 
2822   if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
2823       NumSGPRs > MaxAddressableNumSGPRs)
2824     return OutOfRangeError(SGPRRange);
2825 
2826   if (Features.test(FeatureSGPRInitBug))
2827     NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
2828 
2829   VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
2830   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
2831 
2832   return false;
2833 }
2834 
2835 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
2836   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2837     return TokError("directive only supported for amdgcn architecture");
2838 
2839   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
2840     return TokError("directive only supported for amdhsa OS");
2841 
2842   StringRef KernelName;
2843   if (getParser().parseIdentifier(KernelName))
2844     return true;
2845 
2846   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
2847 
2848   StringSet<> Seen;
2849 
2850   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
2851 
2852   SMRange VGPRRange;
2853   uint64_t NextFreeVGPR = 0;
2854   SMRange SGPRRange;
2855   uint64_t NextFreeSGPR = 0;
2856   unsigned UserSGPRCount = 0;
2857   bool ReserveVCC = true;
2858   bool ReserveFlatScr = true;
2859   bool ReserveXNACK = hasXNACK();
2860 
2861   while (true) {
2862     while (getLexer().is(AsmToken::EndOfStatement))
2863       Lex();
2864 
2865     if (getLexer().isNot(AsmToken::Identifier))
2866       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
2867 
2868     StringRef ID = getTok().getIdentifier();
2869     SMRange IDRange = getTok().getLocRange();
2870     Lex();
2871 
2872     if (ID == ".end_amdhsa_kernel")
2873       break;
2874 
2875     if (Seen.find(ID) != Seen.end())
2876       return TokError(".amdhsa_ directives cannot be repeated");
2877     Seen.insert(ID);
2878 
2879     SMLoc ValStart = getTok().getLoc();
2880     int64_t IVal;
2881     if (getParser().parseAbsoluteExpression(IVal))
2882       return true;
2883     SMLoc ValEnd = getTok().getLoc();
2884     SMRange ValRange = SMRange(ValStart, ValEnd);
2885 
2886     if (IVal < 0)
2887       return OutOfRangeError(ValRange);
2888 
2889     uint64_t Val = IVal;
2890 
2891 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
2892   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
2893     return OutOfRangeError(RANGE);                                             \
2894   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
2895 
2896     if (ID == ".amdhsa_group_segment_fixed_size") {
2897       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
2898         return OutOfRangeError(ValRange);
2899       KD.group_segment_fixed_size = Val;
2900     } else if (ID == ".amdhsa_private_segment_fixed_size") {
2901       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
2902         return OutOfRangeError(ValRange);
2903       KD.private_segment_fixed_size = Val;
2904     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
2905       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2906                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
2907                        Val, ValRange);
2908       UserSGPRCount++;
2909     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
2910       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2911                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
2912                        ValRange);
2913       UserSGPRCount++;
2914     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
2915       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2916                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
2917                        ValRange);
2918       UserSGPRCount++;
2919     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
2920       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2921                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
2922                        Val, ValRange);
2923       UserSGPRCount++;
2924     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
2925       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2926                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
2927                        ValRange);
2928       UserSGPRCount++;
2929     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
2930       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2931                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
2932                        ValRange);
2933       UserSGPRCount++;
2934     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
2935       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2936                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
2937                        Val, ValRange);
2938       UserSGPRCount++;
2939     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
2940       PARSE_BITS_ENTRY(
2941           KD.compute_pgm_rsrc2,
2942           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
2943           ValRange);
2944     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
2945       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2946                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
2947                        ValRange);
2948     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
2949       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2950                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
2951                        ValRange);
2952     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
2953       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2954                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
2955                        ValRange);
2956     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
2957       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2958                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
2959                        ValRange);
2960     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
2961       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2962                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
2963                        ValRange);
2964     } else if (ID == ".amdhsa_next_free_vgpr") {
2965       VGPRRange = ValRange;
2966       NextFreeVGPR = Val;
2967     } else if (ID == ".amdhsa_next_free_sgpr") {
2968       SGPRRange = ValRange;
2969       NextFreeSGPR = Val;
2970     } else if (ID == ".amdhsa_reserve_vcc") {
2971       if (!isUInt<1>(Val))
2972         return OutOfRangeError(ValRange);
2973       ReserveVCC = Val;
2974     } else if (ID == ".amdhsa_reserve_flat_scratch") {
2975       if (IVersion.Major < 7)
2976         return getParser().Error(IDRange.Start, "directive requires gfx7+",
2977                                  IDRange);
2978       if (!isUInt<1>(Val))
2979         return OutOfRangeError(ValRange);
2980       ReserveFlatScr = Val;
2981     } else if (ID == ".amdhsa_reserve_xnack_mask") {
2982       if (IVersion.Major < 8)
2983         return getParser().Error(IDRange.Start, "directive requires gfx8+",
2984                                  IDRange);
2985       if (!isUInt<1>(Val))
2986         return OutOfRangeError(ValRange);
2987       ReserveXNACK = Val;
2988     } else if (ID == ".amdhsa_float_round_mode_32") {
2989       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2990                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
2991     } else if (ID == ".amdhsa_float_round_mode_16_64") {
2992       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2993                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
2994     } else if (ID == ".amdhsa_float_denorm_mode_32") {
2995       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2996                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
2997     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
2998       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2999                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3000                        ValRange);
3001     } else if (ID == ".amdhsa_dx10_clamp") {
3002       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3003                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3004     } else if (ID == ".amdhsa_ieee_mode") {
3005       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3006                        Val, ValRange);
3007     } else if (ID == ".amdhsa_fp16_overflow") {
3008       if (IVersion.Major < 9)
3009         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3010                                  IDRange);
3011       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3012                        ValRange);
3013     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3014       PARSE_BITS_ENTRY(
3015           KD.compute_pgm_rsrc2,
3016           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3017           ValRange);
3018     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3019       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3020                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3021                        Val, ValRange);
3022     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3023       PARSE_BITS_ENTRY(
3024           KD.compute_pgm_rsrc2,
3025           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3026           ValRange);
3027     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3028       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3029                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3030                        Val, ValRange);
3031     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3032       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3033                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3034                        Val, ValRange);
3035     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3036       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3037                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3038                        Val, ValRange);
3039     } else if (ID == ".amdhsa_exception_int_div_zero") {
3040       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3041                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3042                        Val, ValRange);
3043     } else {
3044       return getParser().Error(IDRange.Start,
3045                                "unknown .amdhsa_kernel directive", IDRange);
3046     }
3047 
3048 #undef PARSE_BITS_ENTRY
3049   }
3050 
3051   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3052     return TokError(".amdhsa_next_free_vgpr directive is required");
3053 
3054   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3055     return TokError(".amdhsa_next_free_sgpr directive is required");
3056 
3057   unsigned VGPRBlocks;
3058   unsigned SGPRBlocks;
3059   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3060                          ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
3061                          SGPRRange, VGPRBlocks, SGPRBlocks))
3062     return true;
3063 
3064   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3065           VGPRBlocks))
3066     return OutOfRangeError(VGPRRange);
3067   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3068                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3069 
3070   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3071           SGPRBlocks))
3072     return OutOfRangeError(SGPRRange);
3073   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3074                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3075                   SGPRBlocks);
3076 
3077   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3078     return TokError("too many user SGPRs enabled");
3079   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3080                   UserSGPRCount);
3081 
3082   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3083       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3084       ReserveFlatScr, ReserveXNACK);
3085   return false;
3086 }
3087 
3088 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3089   uint32_t Major;
3090   uint32_t Minor;
3091 
3092   if (ParseDirectiveMajorMinor(Major, Minor))
3093     return true;
3094 
3095   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3096   return false;
3097 }
3098 
3099 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3100   uint32_t Major;
3101   uint32_t Minor;
3102   uint32_t Stepping;
3103   StringRef VendorName;
3104   StringRef ArchName;
3105 
3106   // If this directive has no arguments, then use the ISA version for the
3107   // targeted GPU.
3108   if (getLexer().is(AsmToken::EndOfStatement)) {
3109     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3110     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3111                                                       ISA.Stepping,
3112                                                       "AMD", "AMDGPU");
3113     return false;
3114   }
3115 
3116   if (ParseDirectiveMajorMinor(Major, Minor))
3117     return true;
3118 
3119   if (getLexer().isNot(AsmToken::Comma))
3120     return TokError("stepping version number required, comma expected");
3121   Lex();
3122 
3123   if (ParseAsAbsoluteExpression(Stepping))
3124     return TokError("invalid stepping version");
3125 
3126   if (getLexer().isNot(AsmToken::Comma))
3127     return TokError("vendor name required, comma expected");
3128   Lex();
3129 
3130   if (getLexer().isNot(AsmToken::String))
3131     return TokError("invalid vendor name");
3132 
3133   VendorName = getLexer().getTok().getStringContents();
3134   Lex();
3135 
3136   if (getLexer().isNot(AsmToken::Comma))
3137     return TokError("arch name required, comma expected");
3138   Lex();
3139 
3140   if (getLexer().isNot(AsmToken::String))
3141     return TokError("invalid arch name");
3142 
3143   ArchName = getLexer().getTok().getStringContents();
3144   Lex();
3145 
3146   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3147                                                     VendorName, ArchName);
3148   return false;
3149 }
3150 
3151 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3152                                                amd_kernel_code_t &Header) {
3153   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3154   // assembly for backwards compatibility.
3155   if (ID == "max_scratch_backing_memory_byte_size") {
3156     Parser.eatToEndOfStatement();
3157     return false;
3158   }
3159 
3160   SmallString<40> ErrStr;
3161   raw_svector_ostream Err(ErrStr);
3162   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3163     return TokError(Err.str());
3164   }
3165   Lex();
3166   return false;
3167 }
3168 
3169 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3170   amd_kernel_code_t Header;
3171   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3172 
3173   while (true) {
3174     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3175     // will set the current token to EndOfStatement.
3176     while(getLexer().is(AsmToken::EndOfStatement))
3177       Lex();
3178 
3179     if (getLexer().isNot(AsmToken::Identifier))
3180       return TokError("expected value identifier or .end_amd_kernel_code_t");
3181 
3182     StringRef ID = getLexer().getTok().getIdentifier();
3183     Lex();
3184 
3185     if (ID == ".end_amd_kernel_code_t")
3186       break;
3187 
3188     if (ParseAMDKernelCodeTValue(ID, Header))
3189       return true;
3190   }
3191 
3192   getTargetStreamer().EmitAMDKernelCodeT(Header);
3193 
3194   return false;
3195 }
3196 
3197 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3198   if (getLexer().isNot(AsmToken::Identifier))
3199     return TokError("expected symbol name");
3200 
3201   StringRef KernelName = Parser.getTok().getString();
3202 
3203   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3204                                            ELF::STT_AMDGPU_HSA_KERNEL);
3205   Lex();
3206   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3207     KernelScope.initialize(getContext());
3208   return false;
3209 }
3210 
3211 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3212   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3213     return Error(getParser().getTok().getLoc(),
3214                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3215                  "architectures");
3216   }
3217 
3218   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3219 
3220   std::string ISAVersionStringFromSTI;
3221   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3222   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3223 
3224   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3225     return Error(getParser().getTok().getLoc(),
3226                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3227                  "arguments specified through the command line");
3228   }
3229 
3230   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3231   Lex();
3232 
3233   return false;
3234 }
3235 
3236 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3237   const char *AssemblerDirectiveBegin;
3238   const char *AssemblerDirectiveEnd;
3239   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3240       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3241           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3242                             HSAMD::V3::AssemblerDirectiveEnd)
3243           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3244                             HSAMD::AssemblerDirectiveEnd);
3245 
3246   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3247     return Error(getParser().getTok().getLoc(),
3248                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3249                  "not available on non-amdhsa OSes")).str());
3250   }
3251 
3252   std::string HSAMetadataString;
3253   raw_string_ostream YamlStream(HSAMetadataString);
3254 
3255   getLexer().setSkipSpace(false);
3256 
3257   bool FoundEnd = false;
3258   while (!getLexer().is(AsmToken::Eof)) {
3259     while (getLexer().is(AsmToken::Space)) {
3260       YamlStream << getLexer().getTok().getString();
3261       Lex();
3262     }
3263 
3264     if (getLexer().is(AsmToken::Identifier)) {
3265       StringRef ID = getLexer().getTok().getIdentifier();
3266       if (ID == AssemblerDirectiveEnd) {
3267         Lex();
3268         FoundEnd = true;
3269         break;
3270       }
3271     }
3272 
3273     YamlStream << Parser.parseStringToEndOfStatement()
3274                << getContext().getAsmInfo()->getSeparatorString();
3275 
3276     Parser.eatToEndOfStatement();
3277   }
3278 
3279   getLexer().setSkipSpace(true);
3280 
3281   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3282     return TokError(Twine("expected directive ") +
3283                     Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found"));
3284   }
3285 
3286   YamlStream.flush();
3287 
3288   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3289     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3290       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3291   } else {
3292     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3293       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3294   }
3295 
3296   return false;
3297 }
3298 
3299 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3300   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3301     return Error(getParser().getTok().getLoc(),
3302                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3303                  "not available on non-amdpal OSes")).str());
3304   }
3305 
3306   PALMD::Metadata PALMetadata;
3307   for (;;) {
3308     uint32_t Value;
3309     if (ParseAsAbsoluteExpression(Value)) {
3310       return TokError(Twine("invalid value in ") +
3311                       Twine(PALMD::AssemblerDirective));
3312     }
3313     PALMetadata.push_back(Value);
3314     if (getLexer().isNot(AsmToken::Comma))
3315       break;
3316     Lex();
3317   }
3318   getTargetStreamer().EmitPALMetadata(PALMetadata);
3319   return false;
3320 }
3321 
3322 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3323   StringRef IDVal = DirectiveID.getString();
3324 
3325   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3326     if (IDVal == ".amdgcn_target")
3327       return ParseDirectiveAMDGCNTarget();
3328 
3329     if (IDVal == ".amdhsa_kernel")
3330       return ParseDirectiveAMDHSAKernel();
3331 
3332     // TODO: Restructure/combine with PAL metadata directive.
3333     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3334       return ParseDirectiveHSAMetadata();
3335   } else {
3336     if (IDVal == ".hsa_code_object_version")
3337       return ParseDirectiveHSACodeObjectVersion();
3338 
3339     if (IDVal == ".hsa_code_object_isa")
3340       return ParseDirectiveHSACodeObjectISA();
3341 
3342     if (IDVal == ".amd_kernel_code_t")
3343       return ParseDirectiveAMDKernelCodeT();
3344 
3345     if (IDVal == ".amdgpu_hsa_kernel")
3346       return ParseDirectiveAMDGPUHsaKernel();
3347 
3348     if (IDVal == ".amd_amdgpu_isa")
3349       return ParseDirectiveISAVersion();
3350 
3351     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3352       return ParseDirectiveHSAMetadata();
3353   }
3354 
3355   if (IDVal == PALMD::AssemblerDirective)
3356     return ParseDirectivePALMetadata();
3357 
3358   return true;
3359 }
3360 
3361 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3362                                            unsigned RegNo) const {
3363 
3364   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3365        R.isValid(); ++R) {
3366     if (*R == RegNo)
3367       return isGFX9();
3368   }
3369 
3370   switch (RegNo) {
3371   case AMDGPU::TBA:
3372   case AMDGPU::TBA_LO:
3373   case AMDGPU::TBA_HI:
3374   case AMDGPU::TMA:
3375   case AMDGPU::TMA_LO:
3376   case AMDGPU::TMA_HI:
3377     return !isGFX9();
3378   case AMDGPU::XNACK_MASK:
3379   case AMDGPU::XNACK_MASK_LO:
3380   case AMDGPU::XNACK_MASK_HI:
3381     return !isCI() && !isSI() && hasXNACK();
3382   default:
3383     break;
3384   }
3385 
3386   if (isCI())
3387     return true;
3388 
3389   if (isSI()) {
3390     // No flat_scr
3391     switch (RegNo) {
3392     case AMDGPU::FLAT_SCR:
3393     case AMDGPU::FLAT_SCR_LO:
3394     case AMDGPU::FLAT_SCR_HI:
3395       return false;
3396     default:
3397       return true;
3398     }
3399   }
3400 
3401   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3402   // SI/CI have.
3403   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3404        R.isValid(); ++R) {
3405     if (*R == RegNo)
3406       return false;
3407   }
3408 
3409   return true;
3410 }
3411 
3412 OperandMatchResultTy
3413 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
3414   // Try to parse with a custom parser
3415   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3416 
3417   // If we successfully parsed the operand or if there as an error parsing,
3418   // we are done.
3419   //
3420   // If we are parsing after we reach EndOfStatement then this means we
3421   // are appending default values to the Operands list.  This is only done
3422   // by custom parser, so we shouldn't continue on to the generic parsing.
3423   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3424       getLexer().is(AsmToken::EndOfStatement))
3425     return ResTy;
3426 
3427   ResTy = parseRegOrImm(Operands);
3428 
3429   if (ResTy == MatchOperand_Success)
3430     return ResTy;
3431 
3432   const auto &Tok = Parser.getTok();
3433   SMLoc S = Tok.getLoc();
3434 
3435   const MCExpr *Expr = nullptr;
3436   if (!Parser.parseExpression(Expr)) {
3437     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3438     return MatchOperand_Success;
3439   }
3440 
3441   // Possibly this is an instruction flag like 'gds'.
3442   if (Tok.getKind() == AsmToken::Identifier) {
3443     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
3444     Parser.Lex();
3445     return MatchOperand_Success;
3446   }
3447 
3448   return MatchOperand_NoMatch;
3449 }
3450 
3451 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3452   // Clear any forced encodings from the previous instruction.
3453   setForcedEncodingSize(0);
3454   setForcedDPP(false);
3455   setForcedSDWA(false);
3456 
3457   if (Name.endswith("_e64")) {
3458     setForcedEncodingSize(64);
3459     return Name.substr(0, Name.size() - 4);
3460   } else if (Name.endswith("_e32")) {
3461     setForcedEncodingSize(32);
3462     return Name.substr(0, Name.size() - 4);
3463   } else if (Name.endswith("_dpp")) {
3464     setForcedDPP(true);
3465     return Name.substr(0, Name.size() - 4);
3466   } else if (Name.endswith("_sdwa")) {
3467     setForcedSDWA(true);
3468     return Name.substr(0, Name.size() - 5);
3469   }
3470   return Name;
3471 }
3472 
3473 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
3474                                        StringRef Name,
3475                                        SMLoc NameLoc, OperandVector &Operands) {
3476   // Add the instruction mnemonic
3477   Name = parseMnemonicSuffix(Name);
3478   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
3479 
3480   while (!getLexer().is(AsmToken::EndOfStatement)) {
3481     OperandMatchResultTy Res = parseOperand(Operands, Name);
3482 
3483     // Eat the comma or space if there is one.
3484     if (getLexer().is(AsmToken::Comma))
3485       Parser.Lex();
3486 
3487     switch (Res) {
3488       case MatchOperand_Success: break;
3489       case MatchOperand_ParseFail:
3490         Error(getLexer().getLoc(), "failed parsing operand.");
3491         while (!getLexer().is(AsmToken::EndOfStatement)) {
3492           Parser.Lex();
3493         }
3494         return true;
3495       case MatchOperand_NoMatch:
3496         Error(getLexer().getLoc(), "not a valid operand.");
3497         while (!getLexer().is(AsmToken::EndOfStatement)) {
3498           Parser.Lex();
3499         }
3500         return true;
3501     }
3502   }
3503 
3504   return false;
3505 }
3506 
3507 //===----------------------------------------------------------------------===//
3508 // Utility functions
3509 //===----------------------------------------------------------------------===//
3510 
3511 OperandMatchResultTy
3512 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
3513   switch(getLexer().getKind()) {
3514     default: return MatchOperand_NoMatch;
3515     case AsmToken::Identifier: {
3516       StringRef Name = Parser.getTok().getString();
3517       if (!Name.equals(Prefix)) {
3518         return MatchOperand_NoMatch;
3519       }
3520 
3521       Parser.Lex();
3522       if (getLexer().isNot(AsmToken::Colon))
3523         return MatchOperand_ParseFail;
3524 
3525       Parser.Lex();
3526 
3527       bool IsMinus = false;
3528       if (getLexer().getKind() == AsmToken::Minus) {
3529         Parser.Lex();
3530         IsMinus = true;
3531       }
3532 
3533       if (getLexer().isNot(AsmToken::Integer))
3534         return MatchOperand_ParseFail;
3535 
3536       if (getParser().parseAbsoluteExpression(Int))
3537         return MatchOperand_ParseFail;
3538 
3539       if (IsMinus)
3540         Int = -Int;
3541       break;
3542     }
3543   }
3544   return MatchOperand_Success;
3545 }
3546 
3547 OperandMatchResultTy
3548 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
3549                                     AMDGPUOperand::ImmTy ImmTy,
3550                                     bool (*ConvertResult)(int64_t&)) {
3551   SMLoc S = Parser.getTok().getLoc();
3552   int64_t Value = 0;
3553 
3554   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
3555   if (Res != MatchOperand_Success)
3556     return Res;
3557 
3558   if (ConvertResult && !ConvertResult(Value)) {
3559     return MatchOperand_ParseFail;
3560   }
3561 
3562   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
3563   return MatchOperand_Success;
3564 }
3565 
3566 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
3567   const char *Prefix,
3568   OperandVector &Operands,
3569   AMDGPUOperand::ImmTy ImmTy,
3570   bool (*ConvertResult)(int64_t&)) {
3571   StringRef Name = Parser.getTok().getString();
3572   if (!Name.equals(Prefix))
3573     return MatchOperand_NoMatch;
3574 
3575   Parser.Lex();
3576   if (getLexer().isNot(AsmToken::Colon))
3577     return MatchOperand_ParseFail;
3578 
3579   Parser.Lex();
3580   if (getLexer().isNot(AsmToken::LBrac))
3581     return MatchOperand_ParseFail;
3582   Parser.Lex();
3583 
3584   unsigned Val = 0;
3585   SMLoc S = Parser.getTok().getLoc();
3586 
3587   // FIXME: How to verify the number of elements matches the number of src
3588   // operands?
3589   for (int I = 0; I < 4; ++I) {
3590     if (I != 0) {
3591       if (getLexer().is(AsmToken::RBrac))
3592         break;
3593 
3594       if (getLexer().isNot(AsmToken::Comma))
3595         return MatchOperand_ParseFail;
3596       Parser.Lex();
3597     }
3598 
3599     if (getLexer().isNot(AsmToken::Integer))
3600       return MatchOperand_ParseFail;
3601 
3602     int64_t Op;
3603     if (getParser().parseAbsoluteExpression(Op))
3604       return MatchOperand_ParseFail;
3605 
3606     if (Op != 0 && Op != 1)
3607       return MatchOperand_ParseFail;
3608     Val |= (Op << I);
3609   }
3610 
3611   Parser.Lex();
3612   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
3613   return MatchOperand_Success;
3614 }
3615 
3616 OperandMatchResultTy
3617 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
3618                                AMDGPUOperand::ImmTy ImmTy) {
3619   int64_t Bit = 0;
3620   SMLoc S = Parser.getTok().getLoc();
3621 
3622   // We are at the end of the statement, and this is a default argument, so
3623   // use a default value.
3624   if (getLexer().isNot(AsmToken::EndOfStatement)) {
3625     switch(getLexer().getKind()) {
3626       case AsmToken::Identifier: {
3627         StringRef Tok = Parser.getTok().getString();
3628         if (Tok == Name) {
3629           if (Tok == "r128" && isGFX9())
3630             Error(S, "r128 modifier is not supported on this GPU");
3631           if (Tok == "a16" && !isGFX9())
3632             Error(S, "a16 modifier is not supported on this GPU");
3633           Bit = 1;
3634           Parser.Lex();
3635         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
3636           Bit = 0;
3637           Parser.Lex();
3638         } else {
3639           return MatchOperand_NoMatch;
3640         }
3641         break;
3642       }
3643       default:
3644         return MatchOperand_NoMatch;
3645     }
3646   }
3647 
3648   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
3649   return MatchOperand_Success;
3650 }
3651 
3652 static void addOptionalImmOperand(
3653   MCInst& Inst, const OperandVector& Operands,
3654   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
3655   AMDGPUOperand::ImmTy ImmT,
3656   int64_t Default = 0) {
3657   auto i = OptionalIdx.find(ImmT);
3658   if (i != OptionalIdx.end()) {
3659     unsigned Idx = i->second;
3660     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
3661   } else {
3662     Inst.addOperand(MCOperand::createImm(Default));
3663   }
3664 }
3665 
3666 OperandMatchResultTy
3667 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
3668   if (getLexer().isNot(AsmToken::Identifier)) {
3669     return MatchOperand_NoMatch;
3670   }
3671   StringRef Tok = Parser.getTok().getString();
3672   if (Tok != Prefix) {
3673     return MatchOperand_NoMatch;
3674   }
3675 
3676   Parser.Lex();
3677   if (getLexer().isNot(AsmToken::Colon)) {
3678     return MatchOperand_ParseFail;
3679   }
3680 
3681   Parser.Lex();
3682   if (getLexer().isNot(AsmToken::Identifier)) {
3683     return MatchOperand_ParseFail;
3684   }
3685 
3686   Value = Parser.getTok().getString();
3687   return MatchOperand_Success;
3688 }
3689 
3690 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
3691 // values to live in a joint format operand in the MCInst encoding.
3692 OperandMatchResultTy
3693 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
3694   SMLoc S = Parser.getTok().getLoc();
3695   int64_t Dfmt = 0, Nfmt = 0;
3696   // dfmt and nfmt can appear in either order, and each is optional.
3697   bool GotDfmt = false, GotNfmt = false;
3698   while (!GotDfmt || !GotNfmt) {
3699     if (!GotDfmt) {
3700       auto Res = parseIntWithPrefix("dfmt", Dfmt);
3701       if (Res != MatchOperand_NoMatch) {
3702         if (Res != MatchOperand_Success)
3703           return Res;
3704         if (Dfmt >= 16) {
3705           Error(Parser.getTok().getLoc(), "out of range dfmt");
3706           return MatchOperand_ParseFail;
3707         }
3708         GotDfmt = true;
3709         Parser.Lex();
3710         continue;
3711       }
3712     }
3713     if (!GotNfmt) {
3714       auto Res = parseIntWithPrefix("nfmt", Nfmt);
3715       if (Res != MatchOperand_NoMatch) {
3716         if (Res != MatchOperand_Success)
3717           return Res;
3718         if (Nfmt >= 8) {
3719           Error(Parser.getTok().getLoc(), "out of range nfmt");
3720           return MatchOperand_ParseFail;
3721         }
3722         GotNfmt = true;
3723         Parser.Lex();
3724         continue;
3725       }
3726     }
3727     break;
3728   }
3729   if (!GotDfmt && !GotNfmt)
3730     return MatchOperand_NoMatch;
3731   auto Format = Dfmt | Nfmt << 4;
3732   Operands.push_back(
3733       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
3734   return MatchOperand_Success;
3735 }
3736 
3737 //===----------------------------------------------------------------------===//
3738 // ds
3739 //===----------------------------------------------------------------------===//
3740 
3741 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
3742                                     const OperandVector &Operands) {
3743   OptionalImmIndexMap OptionalIdx;
3744 
3745   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3746     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3747 
3748     // Add the register arguments
3749     if (Op.isReg()) {
3750       Op.addRegOperands(Inst, 1);
3751       continue;
3752     }
3753 
3754     // Handle optional arguments
3755     OptionalIdx[Op.getImmTy()] = i;
3756   }
3757 
3758   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
3759   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
3760   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3761 
3762   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3763 }
3764 
3765 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
3766                                 bool IsGdsHardcoded) {
3767   OptionalImmIndexMap OptionalIdx;
3768 
3769   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3770     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3771 
3772     // Add the register arguments
3773     if (Op.isReg()) {
3774       Op.addRegOperands(Inst, 1);
3775       continue;
3776     }
3777 
3778     if (Op.isToken() && Op.getToken() == "gds") {
3779       IsGdsHardcoded = true;
3780       continue;
3781     }
3782 
3783     // Handle optional arguments
3784     OptionalIdx[Op.getImmTy()] = i;
3785   }
3786 
3787   AMDGPUOperand::ImmTy OffsetType =
3788     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
3789      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
3790                                                       AMDGPUOperand::ImmTyOffset;
3791 
3792   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
3793 
3794   if (!IsGdsHardcoded) {
3795     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3796   }
3797   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3798 }
3799 
3800 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
3801   OptionalImmIndexMap OptionalIdx;
3802 
3803   unsigned OperandIdx[4];
3804   unsigned EnMask = 0;
3805   int SrcIdx = 0;
3806 
3807   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3808     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3809 
3810     // Add the register arguments
3811     if (Op.isReg()) {
3812       assert(SrcIdx < 4);
3813       OperandIdx[SrcIdx] = Inst.size();
3814       Op.addRegOperands(Inst, 1);
3815       ++SrcIdx;
3816       continue;
3817     }
3818 
3819     if (Op.isOff()) {
3820       assert(SrcIdx < 4);
3821       OperandIdx[SrcIdx] = Inst.size();
3822       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
3823       ++SrcIdx;
3824       continue;
3825     }
3826 
3827     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
3828       Op.addImmOperands(Inst, 1);
3829       continue;
3830     }
3831 
3832     if (Op.isToken() && Op.getToken() == "done")
3833       continue;
3834 
3835     // Handle optional arguments
3836     OptionalIdx[Op.getImmTy()] = i;
3837   }
3838 
3839   assert(SrcIdx == 4);
3840 
3841   bool Compr = false;
3842   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
3843     Compr = true;
3844     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
3845     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
3846     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
3847   }
3848 
3849   for (auto i = 0; i < SrcIdx; ++i) {
3850     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
3851       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
3852     }
3853   }
3854 
3855   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
3856   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
3857 
3858   Inst.addOperand(MCOperand::createImm(EnMask));
3859 }
3860 
3861 //===----------------------------------------------------------------------===//
3862 // s_waitcnt
3863 //===----------------------------------------------------------------------===//
3864 
3865 static bool
3866 encodeCnt(
3867   const AMDGPU::IsaVersion ISA,
3868   int64_t &IntVal,
3869   int64_t CntVal,
3870   bool Saturate,
3871   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
3872   unsigned (*decode)(const IsaVersion &Version, unsigned))
3873 {
3874   bool Failed = false;
3875 
3876   IntVal = encode(ISA, IntVal, CntVal);
3877   if (CntVal != decode(ISA, IntVal)) {
3878     if (Saturate) {
3879       IntVal = encode(ISA, IntVal, -1);
3880     } else {
3881       Failed = true;
3882     }
3883   }
3884   return Failed;
3885 }
3886 
3887 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
3888   StringRef CntName = Parser.getTok().getString();
3889   int64_t CntVal;
3890 
3891   Parser.Lex();
3892   if (getLexer().isNot(AsmToken::LParen))
3893     return true;
3894 
3895   Parser.Lex();
3896   if (getLexer().isNot(AsmToken::Integer))
3897     return true;
3898 
3899   SMLoc ValLoc = Parser.getTok().getLoc();
3900   if (getParser().parseAbsoluteExpression(CntVal))
3901     return true;
3902 
3903   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3904 
3905   bool Failed = true;
3906   bool Sat = CntName.endswith("_sat");
3907 
3908   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
3909     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
3910   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
3911     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
3912   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
3913     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
3914   }
3915 
3916   if (Failed) {
3917     Error(ValLoc, "too large value for " + CntName);
3918     return true;
3919   }
3920 
3921   if (getLexer().isNot(AsmToken::RParen)) {
3922     return true;
3923   }
3924 
3925   Parser.Lex();
3926   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
3927     const AsmToken NextToken = getLexer().peekTok();
3928     if (NextToken.is(AsmToken::Identifier)) {
3929       Parser.Lex();
3930     }
3931   }
3932 
3933   return false;
3934 }
3935 
3936 OperandMatchResultTy
3937 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3938   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3939   int64_t Waitcnt = getWaitcntBitMask(ISA);
3940   SMLoc S = Parser.getTok().getLoc();
3941 
3942   switch(getLexer().getKind()) {
3943     default: return MatchOperand_ParseFail;
3944     case AsmToken::Integer:
3945       // The operand can be an integer value.
3946       if (getParser().parseAbsoluteExpression(Waitcnt))
3947         return MatchOperand_ParseFail;
3948       break;
3949 
3950     case AsmToken::Identifier:
3951       do {
3952         if (parseCnt(Waitcnt))
3953           return MatchOperand_ParseFail;
3954       } while(getLexer().isNot(AsmToken::EndOfStatement));
3955       break;
3956   }
3957   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
3958   return MatchOperand_Success;
3959 }
3960 
3961 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
3962                                           int64_t &Width) {
3963   using namespace llvm::AMDGPU::Hwreg;
3964 
3965   if (Parser.getTok().getString() != "hwreg")
3966     return true;
3967   Parser.Lex();
3968 
3969   if (getLexer().isNot(AsmToken::LParen))
3970     return true;
3971   Parser.Lex();
3972 
3973   if (getLexer().is(AsmToken::Identifier)) {
3974     HwReg.IsSymbolic = true;
3975     HwReg.Id = ID_UNKNOWN_;
3976     const StringRef tok = Parser.getTok().getString();
3977     int Last = ID_SYMBOLIC_LAST_;
3978     if (isSI() || isCI() || isVI())
3979       Last = ID_SYMBOLIC_FIRST_GFX9_;
3980     for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
3981       if (tok == IdSymbolic[i]) {
3982         HwReg.Id = i;
3983         break;
3984       }
3985     }
3986     Parser.Lex();
3987   } else {
3988     HwReg.IsSymbolic = false;
3989     if (getLexer().isNot(AsmToken::Integer))
3990       return true;
3991     if (getParser().parseAbsoluteExpression(HwReg.Id))
3992       return true;
3993   }
3994 
3995   if (getLexer().is(AsmToken::RParen)) {
3996     Parser.Lex();
3997     return false;
3998   }
3999 
4000   // optional params
4001   if (getLexer().isNot(AsmToken::Comma))
4002     return true;
4003   Parser.Lex();
4004 
4005   if (getLexer().isNot(AsmToken::Integer))
4006     return true;
4007   if (getParser().parseAbsoluteExpression(Offset))
4008     return true;
4009 
4010   if (getLexer().isNot(AsmToken::Comma))
4011     return true;
4012   Parser.Lex();
4013 
4014   if (getLexer().isNot(AsmToken::Integer))
4015     return true;
4016   if (getParser().parseAbsoluteExpression(Width))
4017     return true;
4018 
4019   if (getLexer().isNot(AsmToken::RParen))
4020     return true;
4021   Parser.Lex();
4022 
4023   return false;
4024 }
4025 
4026 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4027   using namespace llvm::AMDGPU::Hwreg;
4028 
4029   int64_t Imm16Val = 0;
4030   SMLoc S = Parser.getTok().getLoc();
4031 
4032   switch(getLexer().getKind()) {
4033     default: return MatchOperand_NoMatch;
4034     case AsmToken::Integer:
4035       // The operand can be an integer value.
4036       if (getParser().parseAbsoluteExpression(Imm16Val))
4037         return MatchOperand_NoMatch;
4038       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4039         Error(S, "invalid immediate: only 16-bit values are legal");
4040         // Do not return error code, but create an imm operand anyway and proceed
4041         // to the next operand, if any. That avoids unneccessary error messages.
4042       }
4043       break;
4044 
4045     case AsmToken::Identifier: {
4046         OperandInfoTy HwReg(ID_UNKNOWN_);
4047         int64_t Offset = OFFSET_DEFAULT_;
4048         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
4049         if (parseHwregConstruct(HwReg, Offset, Width))
4050           return MatchOperand_ParseFail;
4051         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
4052           if (HwReg.IsSymbolic)
4053             Error(S, "invalid symbolic name of hardware register");
4054           else
4055             Error(S, "invalid code of hardware register: only 6-bit values are legal");
4056         }
4057         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
4058           Error(S, "invalid bit offset: only 5-bit values are legal");
4059         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
4060           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
4061         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
4062       }
4063       break;
4064   }
4065   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
4066   return MatchOperand_Success;
4067 }
4068 
4069 bool AMDGPUOperand::isSWaitCnt() const {
4070   return isImm();
4071 }
4072 
4073 bool AMDGPUOperand::isHwreg() const {
4074   return isImmTy(ImmTyHwreg);
4075 }
4076 
4077 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
4078   using namespace llvm::AMDGPU::SendMsg;
4079 
4080   if (Parser.getTok().getString() != "sendmsg")
4081     return true;
4082   Parser.Lex();
4083 
4084   if (getLexer().isNot(AsmToken::LParen))
4085     return true;
4086   Parser.Lex();
4087 
4088   if (getLexer().is(AsmToken::Identifier)) {
4089     Msg.IsSymbolic = true;
4090     Msg.Id = ID_UNKNOWN_;
4091     const std::string tok = Parser.getTok().getString();
4092     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
4093       switch(i) {
4094         default: continue; // Omit gaps.
4095         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
4096       }
4097       if (tok == IdSymbolic[i]) {
4098         Msg.Id = i;
4099         break;
4100       }
4101     }
4102     Parser.Lex();
4103   } else {
4104     Msg.IsSymbolic = false;
4105     if (getLexer().isNot(AsmToken::Integer))
4106       return true;
4107     if (getParser().parseAbsoluteExpression(Msg.Id))
4108       return true;
4109     if (getLexer().is(AsmToken::Integer))
4110       if (getParser().parseAbsoluteExpression(Msg.Id))
4111         Msg.Id = ID_UNKNOWN_;
4112   }
4113   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
4114     return false;
4115 
4116   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
4117     if (getLexer().isNot(AsmToken::RParen))
4118       return true;
4119     Parser.Lex();
4120     return false;
4121   }
4122 
4123   if (getLexer().isNot(AsmToken::Comma))
4124     return true;
4125   Parser.Lex();
4126 
4127   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
4128   Operation.Id = ID_UNKNOWN_;
4129   if (getLexer().is(AsmToken::Identifier)) {
4130     Operation.IsSymbolic = true;
4131     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
4132     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
4133     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
4134     const StringRef Tok = Parser.getTok().getString();
4135     for (int i = F; i < L; ++i) {
4136       if (Tok == S[i]) {
4137         Operation.Id = i;
4138         break;
4139       }
4140     }
4141     Parser.Lex();
4142   } else {
4143     Operation.IsSymbolic = false;
4144     if (getLexer().isNot(AsmToken::Integer))
4145       return true;
4146     if (getParser().parseAbsoluteExpression(Operation.Id))
4147       return true;
4148   }
4149 
4150   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4151     // Stream id is optional.
4152     if (getLexer().is(AsmToken::RParen)) {
4153       Parser.Lex();
4154       return false;
4155     }
4156 
4157     if (getLexer().isNot(AsmToken::Comma))
4158       return true;
4159     Parser.Lex();
4160 
4161     if (getLexer().isNot(AsmToken::Integer))
4162       return true;
4163     if (getParser().parseAbsoluteExpression(StreamId))
4164       return true;
4165   }
4166 
4167   if (getLexer().isNot(AsmToken::RParen))
4168     return true;
4169   Parser.Lex();
4170   return false;
4171 }
4172 
4173 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4174   if (getLexer().getKind() != AsmToken::Identifier)
4175     return MatchOperand_NoMatch;
4176 
4177   StringRef Str = Parser.getTok().getString();
4178   int Slot = StringSwitch<int>(Str)
4179     .Case("p10", 0)
4180     .Case("p20", 1)
4181     .Case("p0", 2)
4182     .Default(-1);
4183 
4184   SMLoc S = Parser.getTok().getLoc();
4185   if (Slot == -1)
4186     return MatchOperand_ParseFail;
4187 
4188   Parser.Lex();
4189   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4190                                               AMDGPUOperand::ImmTyInterpSlot));
4191   return MatchOperand_Success;
4192 }
4193 
4194 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4195   if (getLexer().getKind() != AsmToken::Identifier)
4196     return MatchOperand_NoMatch;
4197 
4198   StringRef Str = Parser.getTok().getString();
4199   if (!Str.startswith("attr"))
4200     return MatchOperand_NoMatch;
4201 
4202   StringRef Chan = Str.take_back(2);
4203   int AttrChan = StringSwitch<int>(Chan)
4204     .Case(".x", 0)
4205     .Case(".y", 1)
4206     .Case(".z", 2)
4207     .Case(".w", 3)
4208     .Default(-1);
4209   if (AttrChan == -1)
4210     return MatchOperand_ParseFail;
4211 
4212   Str = Str.drop_back(2).drop_front(4);
4213 
4214   uint8_t Attr;
4215   if (Str.getAsInteger(10, Attr))
4216     return MatchOperand_ParseFail;
4217 
4218   SMLoc S = Parser.getTok().getLoc();
4219   Parser.Lex();
4220   if (Attr > 63) {
4221     Error(S, "out of bounds attr");
4222     return MatchOperand_Success;
4223   }
4224 
4225   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4226 
4227   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4228                                               AMDGPUOperand::ImmTyInterpAttr));
4229   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4230                                               AMDGPUOperand::ImmTyAttrChan));
4231   return MatchOperand_Success;
4232 }
4233 
4234 void AMDGPUAsmParser::errorExpTgt() {
4235   Error(Parser.getTok().getLoc(), "invalid exp target");
4236 }
4237 
4238 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4239                                                       uint8_t &Val) {
4240   if (Str == "null") {
4241     Val = 9;
4242     return MatchOperand_Success;
4243   }
4244 
4245   if (Str.startswith("mrt")) {
4246     Str = Str.drop_front(3);
4247     if (Str == "z") { // == mrtz
4248       Val = 8;
4249       return MatchOperand_Success;
4250     }
4251 
4252     if (Str.getAsInteger(10, Val))
4253       return MatchOperand_ParseFail;
4254 
4255     if (Val > 7)
4256       errorExpTgt();
4257 
4258     return MatchOperand_Success;
4259   }
4260 
4261   if (Str.startswith("pos")) {
4262     Str = Str.drop_front(3);
4263     if (Str.getAsInteger(10, Val))
4264       return MatchOperand_ParseFail;
4265 
4266     if (Val > 3)
4267       errorExpTgt();
4268 
4269     Val += 12;
4270     return MatchOperand_Success;
4271   }
4272 
4273   if (Str.startswith("param")) {
4274     Str = Str.drop_front(5);
4275     if (Str.getAsInteger(10, Val))
4276       return MatchOperand_ParseFail;
4277 
4278     if (Val >= 32)
4279       errorExpTgt();
4280 
4281     Val += 32;
4282     return MatchOperand_Success;
4283   }
4284 
4285   if (Str.startswith("invalid_target_")) {
4286     Str = Str.drop_front(15);
4287     if (Str.getAsInteger(10, Val))
4288       return MatchOperand_ParseFail;
4289 
4290     errorExpTgt();
4291     return MatchOperand_Success;
4292   }
4293 
4294   return MatchOperand_NoMatch;
4295 }
4296 
4297 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4298   uint8_t Val;
4299   StringRef Str = Parser.getTok().getString();
4300 
4301   auto Res = parseExpTgtImpl(Str, Val);
4302   if (Res != MatchOperand_Success)
4303     return Res;
4304 
4305   SMLoc S = Parser.getTok().getLoc();
4306   Parser.Lex();
4307 
4308   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4309                                               AMDGPUOperand::ImmTyExpTgt));
4310   return MatchOperand_Success;
4311 }
4312 
4313 OperandMatchResultTy
4314 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4315   using namespace llvm::AMDGPU::SendMsg;
4316 
4317   int64_t Imm16Val = 0;
4318   SMLoc S = Parser.getTok().getLoc();
4319 
4320   switch(getLexer().getKind()) {
4321   default:
4322     return MatchOperand_NoMatch;
4323   case AsmToken::Integer:
4324     // The operand can be an integer value.
4325     if (getParser().parseAbsoluteExpression(Imm16Val))
4326       return MatchOperand_NoMatch;
4327     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4328       Error(S, "invalid immediate: only 16-bit values are legal");
4329       // Do not return error code, but create an imm operand anyway and proceed
4330       // to the next operand, if any. That avoids unneccessary error messages.
4331     }
4332     break;
4333   case AsmToken::Identifier: {
4334       OperandInfoTy Msg(ID_UNKNOWN_);
4335       OperandInfoTy Operation(OP_UNKNOWN_);
4336       int64_t StreamId = STREAM_ID_DEFAULT_;
4337       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4338         return MatchOperand_ParseFail;
4339       do {
4340         // Validate and encode message ID.
4341         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4342                 || Msg.Id == ID_SYSMSG)) {
4343           if (Msg.IsSymbolic)
4344             Error(S, "invalid/unsupported symbolic name of message");
4345           else
4346             Error(S, "invalid/unsupported code of message");
4347           break;
4348         }
4349         Imm16Val = (Msg.Id << ID_SHIFT_);
4350         // Validate and encode operation ID.
4351         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4352           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4353             if (Operation.IsSymbolic)
4354               Error(S, "invalid symbolic name of GS_OP");
4355             else
4356               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4357             break;
4358           }
4359           if (Operation.Id == OP_GS_NOP
4360               && Msg.Id != ID_GS_DONE) {
4361             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4362             break;
4363           }
4364           Imm16Val |= (Operation.Id << OP_SHIFT_);
4365         }
4366         if (Msg.Id == ID_SYSMSG) {
4367           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4368             if (Operation.IsSymbolic)
4369               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4370             else
4371               Error(S, "invalid/unsupported code of SYSMSG_OP");
4372             break;
4373           }
4374           Imm16Val |= (Operation.Id << OP_SHIFT_);
4375         }
4376         // Validate and encode stream ID.
4377         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4378           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4379             Error(S, "invalid stream id: only 2-bit values are legal");
4380             break;
4381           }
4382           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4383         }
4384       } while (false);
4385     }
4386     break;
4387   }
4388   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4389   return MatchOperand_Success;
4390 }
4391 
4392 bool AMDGPUOperand::isSendMsg() const {
4393   return isImmTy(ImmTySendMsg);
4394 }
4395 
4396 //===----------------------------------------------------------------------===//
4397 // parser helpers
4398 //===----------------------------------------------------------------------===//
4399 
4400 bool
4401 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4402   if (getLexer().getKind() == AsmToken::Identifier &&
4403       Parser.getTok().getString() == Id) {
4404     Parser.Lex();
4405     return true;
4406   }
4407   return false;
4408 }
4409 
4410 bool
4411 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4412   if (getLexer().getKind() == Kind) {
4413     Parser.Lex();
4414     return true;
4415   }
4416   return false;
4417 }
4418 
4419 bool
4420 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4421                            const StringRef ErrMsg) {
4422   if (!trySkipToken(Kind)) {
4423     Error(Parser.getTok().getLoc(), ErrMsg);
4424     return false;
4425   }
4426   return true;
4427 }
4428 
4429 bool
4430 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4431   return !getParser().parseAbsoluteExpression(Imm);
4432 }
4433 
4434 bool
4435 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4436   SMLoc S = Parser.getTok().getLoc();
4437   if (getLexer().getKind() == AsmToken::String) {
4438     Val = Parser.getTok().getStringContents();
4439     Parser.Lex();
4440     return true;
4441   } else {
4442     Error(S, ErrMsg);
4443     return false;
4444   }
4445 }
4446 
4447 //===----------------------------------------------------------------------===//
4448 // swizzle
4449 //===----------------------------------------------------------------------===//
4450 
4451 LLVM_READNONE
4452 static unsigned
4453 encodeBitmaskPerm(const unsigned AndMask,
4454                   const unsigned OrMask,
4455                   const unsigned XorMask) {
4456   using namespace llvm::AMDGPU::Swizzle;
4457 
4458   return BITMASK_PERM_ENC |
4459          (AndMask << BITMASK_AND_SHIFT) |
4460          (OrMask  << BITMASK_OR_SHIFT)  |
4461          (XorMask << BITMASK_XOR_SHIFT);
4462 }
4463 
4464 bool
4465 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
4466                                       const unsigned MinVal,
4467                                       const unsigned MaxVal,
4468                                       const StringRef ErrMsg) {
4469   for (unsigned i = 0; i < OpNum; ++i) {
4470     if (!skipToken(AsmToken::Comma, "expected a comma")){
4471       return false;
4472     }
4473     SMLoc ExprLoc = Parser.getTok().getLoc();
4474     if (!parseExpr(Op[i])) {
4475       return false;
4476     }
4477     if (Op[i] < MinVal || Op[i] > MaxVal) {
4478       Error(ExprLoc, ErrMsg);
4479       return false;
4480     }
4481   }
4482 
4483   return true;
4484 }
4485 
4486 bool
4487 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
4488   using namespace llvm::AMDGPU::Swizzle;
4489 
4490   int64_t Lane[LANE_NUM];
4491   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
4492                            "expected a 2-bit lane id")) {
4493     Imm = QUAD_PERM_ENC;
4494     for (unsigned I = 0; I < LANE_NUM; ++I) {
4495       Imm |= Lane[I] << (LANE_SHIFT * I);
4496     }
4497     return true;
4498   }
4499   return false;
4500 }
4501 
4502 bool
4503 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
4504   using namespace llvm::AMDGPU::Swizzle;
4505 
4506   SMLoc S = Parser.getTok().getLoc();
4507   int64_t GroupSize;
4508   int64_t LaneIdx;
4509 
4510   if (!parseSwizzleOperands(1, &GroupSize,
4511                             2, 32,
4512                             "group size must be in the interval [2,32]")) {
4513     return false;
4514   }
4515   if (!isPowerOf2_64(GroupSize)) {
4516     Error(S, "group size must be a power of two");
4517     return false;
4518   }
4519   if (parseSwizzleOperands(1, &LaneIdx,
4520                            0, GroupSize - 1,
4521                            "lane id must be in the interval [0,group size - 1]")) {
4522     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
4523     return true;
4524   }
4525   return false;
4526 }
4527 
4528 bool
4529 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
4530   using namespace llvm::AMDGPU::Swizzle;
4531 
4532   SMLoc S = Parser.getTok().getLoc();
4533   int64_t GroupSize;
4534 
4535   if (!parseSwizzleOperands(1, &GroupSize,
4536       2, 32, "group size must be in the interval [2,32]")) {
4537     return false;
4538   }
4539   if (!isPowerOf2_64(GroupSize)) {
4540     Error(S, "group size must be a power of two");
4541     return false;
4542   }
4543 
4544   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
4545   return true;
4546 }
4547 
4548 bool
4549 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
4550   using namespace llvm::AMDGPU::Swizzle;
4551 
4552   SMLoc S = Parser.getTok().getLoc();
4553   int64_t GroupSize;
4554 
4555   if (!parseSwizzleOperands(1, &GroupSize,
4556       1, 16, "group size must be in the interval [1,16]")) {
4557     return false;
4558   }
4559   if (!isPowerOf2_64(GroupSize)) {
4560     Error(S, "group size must be a power of two");
4561     return false;
4562   }
4563 
4564   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
4565   return true;
4566 }
4567 
4568 bool
4569 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
4570   using namespace llvm::AMDGPU::Swizzle;
4571 
4572   if (!skipToken(AsmToken::Comma, "expected a comma")) {
4573     return false;
4574   }
4575 
4576   StringRef Ctl;
4577   SMLoc StrLoc = Parser.getTok().getLoc();
4578   if (!parseString(Ctl)) {
4579     return false;
4580   }
4581   if (Ctl.size() != BITMASK_WIDTH) {
4582     Error(StrLoc, "expected a 5-character mask");
4583     return false;
4584   }
4585 
4586   unsigned AndMask = 0;
4587   unsigned OrMask = 0;
4588   unsigned XorMask = 0;
4589 
4590   for (size_t i = 0; i < Ctl.size(); ++i) {
4591     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
4592     switch(Ctl[i]) {
4593     default:
4594       Error(StrLoc, "invalid mask");
4595       return false;
4596     case '0':
4597       break;
4598     case '1':
4599       OrMask |= Mask;
4600       break;
4601     case 'p':
4602       AndMask |= Mask;
4603       break;
4604     case 'i':
4605       AndMask |= Mask;
4606       XorMask |= Mask;
4607       break;
4608     }
4609   }
4610 
4611   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
4612   return true;
4613 }
4614 
4615 bool
4616 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
4617 
4618   SMLoc OffsetLoc = Parser.getTok().getLoc();
4619 
4620   if (!parseExpr(Imm)) {
4621     return false;
4622   }
4623   if (!isUInt<16>(Imm)) {
4624     Error(OffsetLoc, "expected a 16-bit offset");
4625     return false;
4626   }
4627   return true;
4628 }
4629 
4630 bool
4631 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
4632   using namespace llvm::AMDGPU::Swizzle;
4633 
4634   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
4635 
4636     SMLoc ModeLoc = Parser.getTok().getLoc();
4637     bool Ok = false;
4638 
4639     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
4640       Ok = parseSwizzleQuadPerm(Imm);
4641     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
4642       Ok = parseSwizzleBitmaskPerm(Imm);
4643     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
4644       Ok = parseSwizzleBroadcast(Imm);
4645     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
4646       Ok = parseSwizzleSwap(Imm);
4647     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
4648       Ok = parseSwizzleReverse(Imm);
4649     } else {
4650       Error(ModeLoc, "expected a swizzle mode");
4651     }
4652 
4653     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
4654   }
4655 
4656   return false;
4657 }
4658 
4659 OperandMatchResultTy
4660 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
4661   SMLoc S = Parser.getTok().getLoc();
4662   int64_t Imm = 0;
4663 
4664   if (trySkipId("offset")) {
4665 
4666     bool Ok = false;
4667     if (skipToken(AsmToken::Colon, "expected a colon")) {
4668       if (trySkipId("swizzle")) {
4669         Ok = parseSwizzleMacro(Imm);
4670       } else {
4671         Ok = parseSwizzleOffset(Imm);
4672       }
4673     }
4674 
4675     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
4676 
4677     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
4678   } else {
4679     // Swizzle "offset" operand is optional.
4680     // If it is omitted, try parsing other optional operands.
4681     return parseOptionalOpr(Operands);
4682   }
4683 }
4684 
4685 bool
4686 AMDGPUOperand::isSwizzle() const {
4687   return isImmTy(ImmTySwizzle);
4688 }
4689 
4690 //===----------------------------------------------------------------------===//
4691 // VGPR Index Mode
4692 //===----------------------------------------------------------------------===//
4693 
4694 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
4695 
4696   using namespace llvm::AMDGPU::VGPRIndexMode;
4697 
4698   if (trySkipToken(AsmToken::RParen)) {
4699     return OFF;
4700   }
4701 
4702   int64_t Imm = 0;
4703 
4704   while (true) {
4705     unsigned Mode = 0;
4706     SMLoc S = Parser.getTok().getLoc();
4707 
4708     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
4709       if (trySkipId(IdSymbolic[ModeId])) {
4710         Mode = 1 << ModeId;
4711         break;
4712       }
4713     }
4714 
4715     if (Mode == 0) {
4716       Error(S, (Imm == 0)?
4717                "expected a VGPR index mode or a closing parenthesis" :
4718                "expected a VGPR index mode");
4719       break;
4720     }
4721 
4722     if (Imm & Mode) {
4723       Error(S, "duplicate VGPR index mode");
4724       break;
4725     }
4726     Imm |= Mode;
4727 
4728     if (trySkipToken(AsmToken::RParen))
4729       break;
4730     if (!skipToken(AsmToken::Comma,
4731                    "expected a comma or a closing parenthesis"))
4732       break;
4733   }
4734 
4735   return Imm;
4736 }
4737 
4738 OperandMatchResultTy
4739 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
4740 
4741   int64_t Imm = 0;
4742   SMLoc S = Parser.getTok().getLoc();
4743 
4744   if (getLexer().getKind() == AsmToken::Identifier &&
4745       Parser.getTok().getString() == "gpr_idx" &&
4746       getLexer().peekTok().is(AsmToken::LParen)) {
4747 
4748     Parser.Lex();
4749     Parser.Lex();
4750 
4751     // If parse failed, trigger an error but do not return error code
4752     // to avoid excessive error messages.
4753     Imm = parseGPRIdxMacro();
4754 
4755   } else {
4756     if (getParser().parseAbsoluteExpression(Imm))
4757       return MatchOperand_NoMatch;
4758     if (Imm < 0 || !isUInt<4>(Imm)) {
4759       Error(S, "invalid immediate: only 4-bit values are legal");
4760     }
4761   }
4762 
4763   Operands.push_back(
4764       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
4765   return MatchOperand_Success;
4766 }
4767 
4768 bool AMDGPUOperand::isGPRIdxMode() const {
4769   return isImmTy(ImmTyGprIdxMode);
4770 }
4771 
4772 //===----------------------------------------------------------------------===//
4773 // sopp branch targets
4774 //===----------------------------------------------------------------------===//
4775 
4776 OperandMatchResultTy
4777 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
4778   SMLoc S = Parser.getTok().getLoc();
4779 
4780   switch (getLexer().getKind()) {
4781     default: return MatchOperand_ParseFail;
4782     case AsmToken::Integer: {
4783       int64_t Imm;
4784       if (getParser().parseAbsoluteExpression(Imm))
4785         return MatchOperand_ParseFail;
4786       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
4787       return MatchOperand_Success;
4788     }
4789 
4790     case AsmToken::Identifier:
4791       Operands.push_back(AMDGPUOperand::CreateExpr(this,
4792           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
4793                                   Parser.getTok().getString()), getContext()), S));
4794       Parser.Lex();
4795       return MatchOperand_Success;
4796   }
4797 }
4798 
4799 //===----------------------------------------------------------------------===//
4800 // mubuf
4801 //===----------------------------------------------------------------------===//
4802 
4803 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
4804   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
4805 }
4806 
4807 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
4808   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
4809 }
4810 
4811 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
4812                                const OperandVector &Operands,
4813                                bool IsAtomic,
4814                                bool IsAtomicReturn,
4815                                bool IsLds) {
4816   bool IsLdsOpcode = IsLds;
4817   bool HasLdsModifier = false;
4818   OptionalImmIndexMap OptionalIdx;
4819   assert(IsAtomicReturn ? IsAtomic : true);
4820 
4821   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4822     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4823 
4824     // Add the register arguments
4825     if (Op.isReg()) {
4826       Op.addRegOperands(Inst, 1);
4827       continue;
4828     }
4829 
4830     // Handle the case where soffset is an immediate
4831     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4832       Op.addImmOperands(Inst, 1);
4833       continue;
4834     }
4835 
4836     HasLdsModifier = Op.isLDS();
4837 
4838     // Handle tokens like 'offen' which are sometimes hard-coded into the
4839     // asm string.  There are no MCInst operands for these.
4840     if (Op.isToken()) {
4841       continue;
4842     }
4843     assert(Op.isImm());
4844 
4845     // Handle optional arguments
4846     OptionalIdx[Op.getImmTy()] = i;
4847   }
4848 
4849   // This is a workaround for an llvm quirk which may result in an
4850   // incorrect instruction selection. Lds and non-lds versions of
4851   // MUBUF instructions are identical except that lds versions
4852   // have mandatory 'lds' modifier. However this modifier follows
4853   // optional modifiers and llvm asm matcher regards this 'lds'
4854   // modifier as an optional one. As a result, an lds version
4855   // of opcode may be selected even if it has no 'lds' modifier.
4856   if (IsLdsOpcode && !HasLdsModifier) {
4857     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
4858     if (NoLdsOpcode != -1) { // Got lds version - correct it.
4859       Inst.setOpcode(NoLdsOpcode);
4860       IsLdsOpcode = false;
4861     }
4862   }
4863 
4864   // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
4865   if (IsAtomicReturn) {
4866     MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
4867     Inst.insert(I, *I);
4868   }
4869 
4870   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
4871   if (!IsAtomic) { // glc is hard-coded.
4872     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4873   }
4874   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4875 
4876   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
4877     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4878   }
4879 }
4880 
4881 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
4882   OptionalImmIndexMap OptionalIdx;
4883 
4884   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4885     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4886 
4887     // Add the register arguments
4888     if (Op.isReg()) {
4889       Op.addRegOperands(Inst, 1);
4890       continue;
4891     }
4892 
4893     // Handle the case where soffset is an immediate
4894     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4895       Op.addImmOperands(Inst, 1);
4896       continue;
4897     }
4898 
4899     // Handle tokens like 'offen' which are sometimes hard-coded into the
4900     // asm string.  There are no MCInst operands for these.
4901     if (Op.isToken()) {
4902       continue;
4903     }
4904     assert(Op.isImm());
4905 
4906     // Handle optional arguments
4907     OptionalIdx[Op.getImmTy()] = i;
4908   }
4909 
4910   addOptionalImmOperand(Inst, Operands, OptionalIdx,
4911                         AMDGPUOperand::ImmTyOffset);
4912   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
4913   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4914   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4915   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4916 }
4917 
4918 //===----------------------------------------------------------------------===//
4919 // mimg
4920 //===----------------------------------------------------------------------===//
4921 
4922 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
4923                               bool IsAtomic) {
4924   unsigned I = 1;
4925   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4926   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4927     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4928   }
4929 
4930   if (IsAtomic) {
4931     // Add src, same as dst
4932     assert(Desc.getNumDefs() == 1);
4933     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
4934   }
4935 
4936   OptionalImmIndexMap OptionalIdx;
4937 
4938   for (unsigned E = Operands.size(); I != E; ++I) {
4939     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4940 
4941     // Add the register arguments
4942     if (Op.isReg()) {
4943       Op.addRegOperands(Inst, 1);
4944     } else if (Op.isImmModifier()) {
4945       OptionalIdx[Op.getImmTy()] = I;
4946     } else {
4947       llvm_unreachable("unexpected operand type");
4948     }
4949   }
4950 
4951   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
4952   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
4953   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4954   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4955   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
4956   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4957   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
4958   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
4959   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
4960 }
4961 
4962 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
4963   cvtMIMG(Inst, Operands, true);
4964 }
4965 
4966 //===----------------------------------------------------------------------===//
4967 // smrd
4968 //===----------------------------------------------------------------------===//
4969 
4970 bool AMDGPUOperand::isSMRDOffset8() const {
4971   return isImm() && isUInt<8>(getImm());
4972 }
4973 
4974 bool AMDGPUOperand::isSMRDOffset20() const {
4975   return isImm() && isUInt<20>(getImm());
4976 }
4977 
4978 bool AMDGPUOperand::isSMRDLiteralOffset() const {
4979   // 32-bit literals are only supported on CI and we only want to use them
4980   // when the offset is > 8-bits.
4981   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
4982 }
4983 
4984 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
4985   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4986 }
4987 
4988 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
4989   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4990 }
4991 
4992 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
4993   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4994 }
4995 
4996 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
4997   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4998 }
4999 
5000 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
5001   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5002 }
5003 
5004 //===----------------------------------------------------------------------===//
5005 // vop3
5006 //===----------------------------------------------------------------------===//
5007 
5008 static bool ConvertOmodMul(int64_t &Mul) {
5009   if (Mul != 1 && Mul != 2 && Mul != 4)
5010     return false;
5011 
5012   Mul >>= 1;
5013   return true;
5014 }
5015 
5016 static bool ConvertOmodDiv(int64_t &Div) {
5017   if (Div == 1) {
5018     Div = 0;
5019     return true;
5020   }
5021 
5022   if (Div == 2) {
5023     Div = 3;
5024     return true;
5025   }
5026 
5027   return false;
5028 }
5029 
5030 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5031   if (BoundCtrl == 0) {
5032     BoundCtrl = 1;
5033     return true;
5034   }
5035 
5036   if (BoundCtrl == -1) {
5037     BoundCtrl = 0;
5038     return true;
5039   }
5040 
5041   return false;
5042 }
5043 
5044 // Note: the order in this table matches the order of operands in AsmString.
5045 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5046   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5047   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5048   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5049   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5050   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5051   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5052   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5053   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5054   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5055   {"dfmt",    AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5056   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5057   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5058   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5059   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5060   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5061   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5062   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5063   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5064   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5065   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5066   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5067   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5068   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5069   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5070   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5071   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5072   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5073   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5074   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5075   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5076   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5077   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5078   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5079   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5080   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5081   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5082   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
5083 };
5084 
5085 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5086   unsigned size = Operands.size();
5087   assert(size > 0);
5088 
5089   OperandMatchResultTy res = parseOptionalOpr(Operands);
5090 
5091   // This is a hack to enable hardcoded mandatory operands which follow
5092   // optional operands.
5093   //
5094   // Current design assumes that all operands after the first optional operand
5095   // are also optional. However implementation of some instructions violates
5096   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5097   //
5098   // To alleviate this problem, we have to (implicitly) parse extra operands
5099   // to make sure autogenerated parser of custom operands never hit hardcoded
5100   // mandatory operands.
5101 
5102   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5103 
5104     // We have parsed the first optional operand.
5105     // Parse as many operands as necessary to skip all mandatory operands.
5106 
5107     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5108       if (res != MatchOperand_Success ||
5109           getLexer().is(AsmToken::EndOfStatement)) break;
5110       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5111       res = parseOptionalOpr(Operands);
5112     }
5113   }
5114 
5115   return res;
5116 }
5117 
5118 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5119   OperandMatchResultTy res;
5120   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5121     // try to parse any optional operand here
5122     if (Op.IsBit) {
5123       res = parseNamedBit(Op.Name, Operands, Op.Type);
5124     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5125       res = parseOModOperand(Operands);
5126     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5127                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5128                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5129       res = parseSDWASel(Operands, Op.Name, Op.Type);
5130     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5131       res = parseSDWADstUnused(Operands);
5132     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5133                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5134                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5135                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5136       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5137                                         Op.ConvertResult);
5138     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
5139       res = parseDfmtNfmt(Operands);
5140     } else {
5141       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
5142     }
5143     if (res != MatchOperand_NoMatch) {
5144       return res;
5145     }
5146   }
5147   return MatchOperand_NoMatch;
5148 }
5149 
5150 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
5151   StringRef Name = Parser.getTok().getString();
5152   if (Name == "mul") {
5153     return parseIntWithPrefix("mul", Operands,
5154                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
5155   }
5156 
5157   if (Name == "div") {
5158     return parseIntWithPrefix("div", Operands,
5159                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
5160   }
5161 
5162   return MatchOperand_NoMatch;
5163 }
5164 
5165 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
5166   cvtVOP3P(Inst, Operands);
5167 
5168   int Opc = Inst.getOpcode();
5169 
5170   int SrcNum;
5171   const int Ops[] = { AMDGPU::OpName::src0,
5172                       AMDGPU::OpName::src1,
5173                       AMDGPU::OpName::src2 };
5174   for (SrcNum = 0;
5175        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
5176        ++SrcNum);
5177   assert(SrcNum > 0);
5178 
5179   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5180   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5181 
5182   if ((OpSel & (1 << SrcNum)) != 0) {
5183     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
5184     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
5185     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
5186   }
5187 }
5188 
5189 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
5190       // 1. This operand is input modifiers
5191   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
5192       // 2. This is not last operand
5193       && Desc.NumOperands > (OpNum + 1)
5194       // 3. Next operand is register class
5195       && Desc.OpInfo[OpNum + 1].RegClass != -1
5196       // 4. Next register is not tied to any other operand
5197       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
5198 }
5199 
5200 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
5201 {
5202   OptionalImmIndexMap OptionalIdx;
5203   unsigned Opc = Inst.getOpcode();
5204 
5205   unsigned I = 1;
5206   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5207   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5208     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5209   }
5210 
5211   for (unsigned E = Operands.size(); I != E; ++I) {
5212     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5213     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5214       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5215     } else if (Op.isInterpSlot() ||
5216                Op.isInterpAttr() ||
5217                Op.isAttrChan()) {
5218       Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
5219     } else if (Op.isImmModifier()) {
5220       OptionalIdx[Op.getImmTy()] = I;
5221     } else {
5222       llvm_unreachable("unhandled operand type");
5223     }
5224   }
5225 
5226   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5227     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5228   }
5229 
5230   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5231     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5232   }
5233 
5234   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5235     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5236   }
5237 }
5238 
5239 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5240                               OptionalImmIndexMap &OptionalIdx) {
5241   unsigned Opc = Inst.getOpcode();
5242 
5243   unsigned I = 1;
5244   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5245   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5246     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5247   }
5248 
5249   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5250     // This instruction has src modifiers
5251     for (unsigned E = Operands.size(); I != E; ++I) {
5252       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5253       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5254         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5255       } else if (Op.isImmModifier()) {
5256         OptionalIdx[Op.getImmTy()] = I;
5257       } else if (Op.isRegOrImm()) {
5258         Op.addRegOrImmOperands(Inst, 1);
5259       } else {
5260         llvm_unreachable("unhandled operand type");
5261       }
5262     }
5263   } else {
5264     // No src modifiers
5265     for (unsigned E = Operands.size(); I != E; ++I) {
5266       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5267       if (Op.isMod()) {
5268         OptionalIdx[Op.getImmTy()] = I;
5269       } else {
5270         Op.addRegOrImmOperands(Inst, 1);
5271       }
5272     }
5273   }
5274 
5275   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5276     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5277   }
5278 
5279   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5280     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5281   }
5282 
5283   // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
5284   // it has src2 register operand that is tied to dst operand
5285   // we don't allow modifiers for this operand in assembler so src2_modifiers
5286   // should be 0.
5287   if (Opc == AMDGPU::V_MAC_F32_e64_si ||
5288       Opc == AMDGPU::V_MAC_F32_e64_vi ||
5289       Opc == AMDGPU::V_MAC_F16_e64_vi ||
5290       Opc == AMDGPU::V_FMAC_F32_e64_vi) {
5291     auto it = Inst.begin();
5292     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5293     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5294     ++it;
5295     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5296   }
5297 }
5298 
5299 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5300   OptionalImmIndexMap OptionalIdx;
5301   cvtVOP3(Inst, Operands, OptionalIdx);
5302 }
5303 
5304 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5305                                const OperandVector &Operands) {
5306   OptionalImmIndexMap OptIdx;
5307   const int Opc = Inst.getOpcode();
5308   const MCInstrDesc &Desc = MII.get(Opc);
5309 
5310   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5311 
5312   cvtVOP3(Inst, Operands, OptIdx);
5313 
5314   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5315     assert(!IsPacked);
5316     Inst.addOperand(Inst.getOperand(0));
5317   }
5318 
5319   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5320   // instruction, and then figure out where to actually put the modifiers
5321 
5322   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5323 
5324   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5325   if (OpSelHiIdx != -1) {
5326     int DefaultVal = IsPacked ? -1 : 0;
5327     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5328                           DefaultVal);
5329   }
5330 
5331   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5332   if (NegLoIdx != -1) {
5333     assert(IsPacked);
5334     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5335     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5336   }
5337 
5338   const int Ops[] = { AMDGPU::OpName::src0,
5339                       AMDGPU::OpName::src1,
5340                       AMDGPU::OpName::src2 };
5341   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5342                          AMDGPU::OpName::src1_modifiers,
5343                          AMDGPU::OpName::src2_modifiers };
5344 
5345   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5346 
5347   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5348   unsigned OpSelHi = 0;
5349   unsigned NegLo = 0;
5350   unsigned NegHi = 0;
5351 
5352   if (OpSelHiIdx != -1) {
5353     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5354   }
5355 
5356   if (NegLoIdx != -1) {
5357     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5358     NegLo = Inst.getOperand(NegLoIdx).getImm();
5359     NegHi = Inst.getOperand(NegHiIdx).getImm();
5360   }
5361 
5362   for (int J = 0; J < 3; ++J) {
5363     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5364     if (OpIdx == -1)
5365       break;
5366 
5367     uint32_t ModVal = 0;
5368 
5369     if ((OpSel & (1 << J)) != 0)
5370       ModVal |= SISrcMods::OP_SEL_0;
5371 
5372     if ((OpSelHi & (1 << J)) != 0)
5373       ModVal |= SISrcMods::OP_SEL_1;
5374 
5375     if ((NegLo & (1 << J)) != 0)
5376       ModVal |= SISrcMods::NEG;
5377 
5378     if ((NegHi & (1 << J)) != 0)
5379       ModVal |= SISrcMods::NEG_HI;
5380 
5381     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5382 
5383     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5384   }
5385 }
5386 
5387 //===----------------------------------------------------------------------===//
5388 // dpp
5389 //===----------------------------------------------------------------------===//
5390 
5391 bool AMDGPUOperand::isDPPCtrl() const {
5392   using namespace AMDGPU::DPP;
5393 
5394   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5395   if (result) {
5396     int64_t Imm = getImm();
5397     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
5398            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
5399            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
5400            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
5401            (Imm == DppCtrl::WAVE_SHL1) ||
5402            (Imm == DppCtrl::WAVE_ROL1) ||
5403            (Imm == DppCtrl::WAVE_SHR1) ||
5404            (Imm == DppCtrl::WAVE_ROR1) ||
5405            (Imm == DppCtrl::ROW_MIRROR) ||
5406            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
5407            (Imm == DppCtrl::BCAST15) ||
5408            (Imm == DppCtrl::BCAST31);
5409   }
5410   return false;
5411 }
5412 
5413 bool AMDGPUOperand::isS16Imm() const {
5414   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
5415 }
5416 
5417 bool AMDGPUOperand::isU16Imm() const {
5418   return isImm() && isUInt<16>(getImm());
5419 }
5420 
5421 OperandMatchResultTy
5422 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
5423   using namespace AMDGPU::DPP;
5424 
5425   SMLoc S = Parser.getTok().getLoc();
5426   StringRef Prefix;
5427   int64_t Int;
5428 
5429   if (getLexer().getKind() == AsmToken::Identifier) {
5430     Prefix = Parser.getTok().getString();
5431   } else {
5432     return MatchOperand_NoMatch;
5433   }
5434 
5435   if (Prefix == "row_mirror") {
5436     Int = DppCtrl::ROW_MIRROR;
5437     Parser.Lex();
5438   } else if (Prefix == "row_half_mirror") {
5439     Int = DppCtrl::ROW_HALF_MIRROR;
5440     Parser.Lex();
5441   } else {
5442     // Check to prevent parseDPPCtrlOps from eating invalid tokens
5443     if (Prefix != "quad_perm"
5444         && Prefix != "row_shl"
5445         && Prefix != "row_shr"
5446         && Prefix != "row_ror"
5447         && Prefix != "wave_shl"
5448         && Prefix != "wave_rol"
5449         && Prefix != "wave_shr"
5450         && Prefix != "wave_ror"
5451         && Prefix != "row_bcast") {
5452       return MatchOperand_NoMatch;
5453     }
5454 
5455     Parser.Lex();
5456     if (getLexer().isNot(AsmToken::Colon))
5457       return MatchOperand_ParseFail;
5458 
5459     if (Prefix == "quad_perm") {
5460       // quad_perm:[%d,%d,%d,%d]
5461       Parser.Lex();
5462       if (getLexer().isNot(AsmToken::LBrac))
5463         return MatchOperand_ParseFail;
5464       Parser.Lex();
5465 
5466       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
5467         return MatchOperand_ParseFail;
5468 
5469       for (int i = 0; i < 3; ++i) {
5470         if (getLexer().isNot(AsmToken::Comma))
5471           return MatchOperand_ParseFail;
5472         Parser.Lex();
5473 
5474         int64_t Temp;
5475         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
5476           return MatchOperand_ParseFail;
5477         const int shift = i*2 + 2;
5478         Int += (Temp << shift);
5479       }
5480 
5481       if (getLexer().isNot(AsmToken::RBrac))
5482         return MatchOperand_ParseFail;
5483       Parser.Lex();
5484     } else {
5485       // sel:%d
5486       Parser.Lex();
5487       if (getParser().parseAbsoluteExpression(Int))
5488         return MatchOperand_ParseFail;
5489 
5490       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
5491         Int |= DppCtrl::ROW_SHL0;
5492       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
5493         Int |= DppCtrl::ROW_SHR0;
5494       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
5495         Int |= DppCtrl::ROW_ROR0;
5496       } else if (Prefix == "wave_shl" && 1 == Int) {
5497         Int = DppCtrl::WAVE_SHL1;
5498       } else if (Prefix == "wave_rol" && 1 == Int) {
5499         Int = DppCtrl::WAVE_ROL1;
5500       } else if (Prefix == "wave_shr" && 1 == Int) {
5501         Int = DppCtrl::WAVE_SHR1;
5502       } else if (Prefix == "wave_ror" && 1 == Int) {
5503         Int = DppCtrl::WAVE_ROR1;
5504       } else if (Prefix == "row_bcast") {
5505         if (Int == 15) {
5506           Int = DppCtrl::BCAST15;
5507         } else if (Int == 31) {
5508           Int = DppCtrl::BCAST31;
5509         } else {
5510           return MatchOperand_ParseFail;
5511         }
5512       } else {
5513         return MatchOperand_ParseFail;
5514       }
5515     }
5516   }
5517 
5518   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
5519   return MatchOperand_Success;
5520 }
5521 
5522 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
5523   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
5524 }
5525 
5526 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
5527   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
5528 }
5529 
5530 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
5531   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
5532 }
5533 
5534 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
5535   OptionalImmIndexMap OptionalIdx;
5536 
5537   unsigned I = 1;
5538   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5539   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5540     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5541   }
5542 
5543   for (unsigned E = Operands.size(); I != E; ++I) {
5544     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
5545                                             MCOI::TIED_TO);
5546     if (TiedTo != -1) {
5547       assert((unsigned)TiedTo < Inst.getNumOperands());
5548       // handle tied old or src2 for MAC instructions
5549       Inst.addOperand(Inst.getOperand(TiedTo));
5550     }
5551     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5552     // Add the register arguments
5553     if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5554       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
5555       // Skip it.
5556       continue;
5557     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5558       Op.addRegWithFPInputModsOperands(Inst, 2);
5559     } else if (Op.isDPPCtrl()) {
5560       Op.addImmOperands(Inst, 1);
5561     } else if (Op.isImm()) {
5562       // Handle optional arguments
5563       OptionalIdx[Op.getImmTy()] = I;
5564     } else {
5565       llvm_unreachable("Invalid operand type");
5566     }
5567   }
5568 
5569   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
5570   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
5571   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
5572 }
5573 
5574 //===----------------------------------------------------------------------===//
5575 // sdwa
5576 //===----------------------------------------------------------------------===//
5577 
5578 OperandMatchResultTy
5579 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
5580                               AMDGPUOperand::ImmTy Type) {
5581   using namespace llvm::AMDGPU::SDWA;
5582 
5583   SMLoc S = Parser.getTok().getLoc();
5584   StringRef Value;
5585   OperandMatchResultTy res;
5586 
5587   res = parseStringWithPrefix(Prefix, Value);
5588   if (res != MatchOperand_Success) {
5589     return res;
5590   }
5591 
5592   int64_t Int;
5593   Int = StringSwitch<int64_t>(Value)
5594         .Case("BYTE_0", SdwaSel::BYTE_0)
5595         .Case("BYTE_1", SdwaSel::BYTE_1)
5596         .Case("BYTE_2", SdwaSel::BYTE_2)
5597         .Case("BYTE_3", SdwaSel::BYTE_3)
5598         .Case("WORD_0", SdwaSel::WORD_0)
5599         .Case("WORD_1", SdwaSel::WORD_1)
5600         .Case("DWORD", SdwaSel::DWORD)
5601         .Default(0xffffffff);
5602   Parser.Lex(); // eat last token
5603 
5604   if (Int == 0xffffffff) {
5605     return MatchOperand_ParseFail;
5606   }
5607 
5608   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
5609   return MatchOperand_Success;
5610 }
5611 
5612 OperandMatchResultTy
5613 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
5614   using namespace llvm::AMDGPU::SDWA;
5615 
5616   SMLoc S = Parser.getTok().getLoc();
5617   StringRef Value;
5618   OperandMatchResultTy res;
5619 
5620   res = parseStringWithPrefix("dst_unused", Value);
5621   if (res != MatchOperand_Success) {
5622     return res;
5623   }
5624 
5625   int64_t Int;
5626   Int = StringSwitch<int64_t>(Value)
5627         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
5628         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
5629         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
5630         .Default(0xffffffff);
5631   Parser.Lex(); // eat last token
5632 
5633   if (Int == 0xffffffff) {
5634     return MatchOperand_ParseFail;
5635   }
5636 
5637   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
5638   return MatchOperand_Success;
5639 }
5640 
5641 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
5642   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
5643 }
5644 
5645 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
5646   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
5647 }
5648 
5649 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
5650   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
5651 }
5652 
5653 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
5654   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
5655 }
5656 
5657 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
5658                               uint64_t BasicInstType, bool skipVcc) {
5659   using namespace llvm::AMDGPU::SDWA;
5660 
5661   OptionalImmIndexMap OptionalIdx;
5662   bool skippedVcc = false;
5663 
5664   unsigned I = 1;
5665   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5666   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5667     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5668   }
5669 
5670   for (unsigned E = Operands.size(); I != E; ++I) {
5671     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5672     if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5673       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
5674       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
5675       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
5676       // Skip VCC only if we didn't skip it on previous iteration.
5677       if (BasicInstType == SIInstrFlags::VOP2 &&
5678           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
5679         skippedVcc = true;
5680         continue;
5681       } else if (BasicInstType == SIInstrFlags::VOPC &&
5682                  Inst.getNumOperands() == 0) {
5683         skippedVcc = true;
5684         continue;
5685       }
5686     }
5687     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5688       Op.addRegOrImmWithInputModsOperands(Inst, 2);
5689     } else if (Op.isImm()) {
5690       // Handle optional arguments
5691       OptionalIdx[Op.getImmTy()] = I;
5692     } else {
5693       llvm_unreachable("Invalid operand type");
5694     }
5695     skippedVcc = false;
5696   }
5697 
5698   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
5699       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
5700     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
5701     switch (BasicInstType) {
5702     case SIInstrFlags::VOP1:
5703       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5704       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5705         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5706       }
5707       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5708       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5709       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5710       break;
5711 
5712     case SIInstrFlags::VOP2:
5713       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5714       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5715         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5716       }
5717       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5718       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5719       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5720       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5721       break;
5722 
5723     case SIInstrFlags::VOPC:
5724       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5725       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5726       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5727       break;
5728 
5729     default:
5730       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
5731     }
5732   }
5733 
5734   // special case v_mac_{f16, f32}:
5735   // it has src2 register operand that is tied to dst operand
5736   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
5737       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
5738     auto it = Inst.begin();
5739     std::advance(
5740       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
5741     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5742   }
5743 }
5744 
5745 /// Force static initialization.
5746 extern "C" void LLVMInitializeAMDGPUAsmParser() {
5747   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
5748   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
5749 }
5750 
5751 #define GET_REGISTER_MATCHER
5752 #define GET_MATCHER_IMPLEMENTATION
5753 #define GET_MNEMONIC_SPELL_CHECKER
5754 #include "AMDGPUGenAsmMatcher.inc"
5755 
5756 // This fuction should be defined after auto-generated include so that we have
5757 // MatchClassKind enum defined
5758 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
5759                                                      unsigned Kind) {
5760   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
5761   // But MatchInstructionImpl() expects to meet token and fails to validate
5762   // operand. This method checks if we are given immediate operand but expect to
5763   // get corresponding token.
5764   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
5765   switch (Kind) {
5766   case MCK_addr64:
5767     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
5768   case MCK_gds:
5769     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
5770   case MCK_lds:
5771     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
5772   case MCK_glc:
5773     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
5774   case MCK_idxen:
5775     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
5776   case MCK_offen:
5777     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
5778   case MCK_SSrcB32:
5779     // When operands have expression values, they will return true for isToken,
5780     // because it is not possible to distinguish between a token and an
5781     // expression at parse time. MatchInstructionImpl() will always try to
5782     // match an operand as a token, when isToken returns true, and when the
5783     // name of the expression is not a valid token, the match will fail,
5784     // so we need to handle it here.
5785     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
5786   case MCK_SSrcF32:
5787     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
5788   case MCK_SoppBrTarget:
5789     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
5790   case MCK_VReg32OrOff:
5791     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
5792   case MCK_InterpSlot:
5793     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
5794   case MCK_Attr:
5795     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
5796   case MCK_AttrChan:
5797     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
5798   default:
5799     return Match_InvalidOperand;
5800   }
5801 }
5802