1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/MC/MCInst.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCInstrInfo.h"
34 #include "llvm/MC/MCParser/MCAsmLexer.h"
35 #include "llvm/MC/MCParser/MCAsmParser.h"
36 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
38 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
39 #include "llvm/MC/MCRegisterInfo.h"
40 #include "llvm/MC/MCStreamer.h"
41 #include "llvm/MC/MCSubtargetInfo.h"
42 #include "llvm/MC/MCSymbol.h"
43 #include "llvm/Support/AMDGPUMetadata.h"
44 #include "llvm/Support/AMDHSAKernelDescriptor.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MachineValueType.h"
49 #include "llvm/Support/MathExtras.h"
50 #include "llvm/Support/SMLoc.h"
51 #include "llvm/Support/TargetParser.h"
52 #include "llvm/Support/TargetRegistry.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include <algorithm>
55 #include <cassert>
56 #include <cstdint>
57 #include <cstring>
58 #include <iterator>
59 #include <map>
60 #include <memory>
61 #include <string>
62 
63 using namespace llvm;
64 using namespace llvm::AMDGPU;
65 using namespace llvm::amdhsa;
66 
67 namespace {
68 
69 class AMDGPUAsmParser;
70 
71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
72 
73 //===----------------------------------------------------------------------===//
74 // Operand
75 //===----------------------------------------------------------------------===//
76 
77 class AMDGPUOperand : public MCParsedAsmOperand {
78   enum KindTy {
79     Token,
80     Immediate,
81     Register,
82     Expression
83   } Kind;
84 
85   SMLoc StartLoc, EndLoc;
86   const AMDGPUAsmParser *AsmParser;
87 
88 public:
89   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
90     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
91 
92   using Ptr = std::unique_ptr<AMDGPUOperand>;
93 
94   struct Modifiers {
95     bool Abs = false;
96     bool Neg = false;
97     bool Sext = false;
98 
99     bool hasFPModifiers() const { return Abs || Neg; }
100     bool hasIntModifiers() const { return Sext; }
101     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
102 
103     int64_t getFPModifiersOperand() const {
104       int64_t Operand = 0;
105       Operand |= Abs ? SISrcMods::ABS : 0u;
106       Operand |= Neg ? SISrcMods::NEG : 0u;
107       return Operand;
108     }
109 
110     int64_t getIntModifiersOperand() const {
111       int64_t Operand = 0;
112       Operand |= Sext ? SISrcMods::SEXT : 0u;
113       return Operand;
114     }
115 
116     int64_t getModifiersOperand() const {
117       assert(!(hasFPModifiers() && hasIntModifiers())
118            && "fp and int modifiers should not be used simultaneously");
119       if (hasFPModifiers()) {
120         return getFPModifiersOperand();
121       } else if (hasIntModifiers()) {
122         return getIntModifiersOperand();
123       } else {
124         return 0;
125       }
126     }
127 
128     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
129   };
130 
131   enum ImmTy {
132     ImmTyNone,
133     ImmTyGDS,
134     ImmTyLDS,
135     ImmTyOffen,
136     ImmTyIdxen,
137     ImmTyAddr64,
138     ImmTyOffset,
139     ImmTyInstOffset,
140     ImmTyOffset0,
141     ImmTyOffset1,
142     ImmTyGLC,
143     ImmTySLC,
144     ImmTyTFE,
145     ImmTyD16,
146     ImmTyClampSI,
147     ImmTyOModSI,
148     ImmTyDppCtrl,
149     ImmTyDppRowMask,
150     ImmTyDppBankMask,
151     ImmTyDppBoundCtrl,
152     ImmTySdwaDstSel,
153     ImmTySdwaSrc0Sel,
154     ImmTySdwaSrc1Sel,
155     ImmTySdwaDstUnused,
156     ImmTyDMask,
157     ImmTyUNorm,
158     ImmTyDA,
159     ImmTyR128A16,
160     ImmTyLWE,
161     ImmTyExpTgt,
162     ImmTyExpCompr,
163     ImmTyExpVM,
164     ImmTyFORMAT,
165     ImmTyHwreg,
166     ImmTyOff,
167     ImmTySendMsg,
168     ImmTyInterpSlot,
169     ImmTyInterpAttr,
170     ImmTyAttrChan,
171     ImmTyOpSel,
172     ImmTyOpSelHi,
173     ImmTyNegLo,
174     ImmTyNegHi,
175     ImmTySwizzle,
176     ImmTyGprIdxMode,
177     ImmTyEndpgm,
178     ImmTyHigh
179   };
180 
181 private:
182   struct TokOp {
183     const char *Data;
184     unsigned Length;
185   };
186 
187   struct ImmOp {
188     int64_t Val;
189     ImmTy Type;
190     bool IsFPImm;
191     Modifiers Mods;
192   };
193 
194   struct RegOp {
195     unsigned RegNo;
196     Modifiers Mods;
197   };
198 
199   union {
200     TokOp Tok;
201     ImmOp Imm;
202     RegOp Reg;
203     const MCExpr *Expr;
204   };
205 
206 public:
207   bool isToken() const override {
208     if (Kind == Token)
209       return true;
210 
211     if (Kind != Expression || !Expr)
212       return false;
213 
214     // When parsing operands, we can't always tell if something was meant to be
215     // a token, like 'gds', or an expression that references a global variable.
216     // In this case, we assume the string is an expression, and if we need to
217     // interpret is a token, then we treat the symbol name as the token.
218     return isa<MCSymbolRefExpr>(Expr);
219   }
220 
221   bool isImm() const override {
222     return Kind == Immediate;
223   }
224 
225   bool isInlinableImm(MVT type) const;
226   bool isLiteralImm(MVT type) const;
227 
228   bool isRegKind() const {
229     return Kind == Register;
230   }
231 
232   bool isReg() const override {
233     return isRegKind() && !hasModifiers();
234   }
235 
236   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
237     return isRegClass(RCID) || isInlinableImm(type);
238   }
239 
240   bool isRegOrImmWithInt16InputMods() const {
241     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
242   }
243 
244   bool isRegOrImmWithInt32InputMods() const {
245     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
246   }
247 
248   bool isRegOrImmWithInt64InputMods() const {
249     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
250   }
251 
252   bool isRegOrImmWithFP16InputMods() const {
253     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
254   }
255 
256   bool isRegOrImmWithFP32InputMods() const {
257     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
258   }
259 
260   bool isRegOrImmWithFP64InputMods() const {
261     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
262   }
263 
264   bool isVReg() const {
265     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
266            isRegClass(AMDGPU::VReg_64RegClassID) ||
267            isRegClass(AMDGPU::VReg_96RegClassID) ||
268            isRegClass(AMDGPU::VReg_128RegClassID) ||
269            isRegClass(AMDGPU::VReg_256RegClassID) ||
270            isRegClass(AMDGPU::VReg_512RegClassID);
271   }
272 
273   bool isVReg32() const {
274     return isRegClass(AMDGPU::VGPR_32RegClassID);
275   }
276 
277   bool isVReg32OrOff() const {
278     return isOff() || isVReg32();
279   }
280 
281   bool isSDWAOperand(MVT type) const;
282   bool isSDWAFP16Operand() const;
283   bool isSDWAFP32Operand() const;
284   bool isSDWAInt16Operand() const;
285   bool isSDWAInt32Operand() const;
286 
287   bool isImmTy(ImmTy ImmT) const {
288     return isImm() && Imm.Type == ImmT;
289   }
290 
291   bool isImmModifier() const {
292     return isImm() && Imm.Type != ImmTyNone;
293   }
294 
295   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
296   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
297   bool isDMask() const { return isImmTy(ImmTyDMask); }
298   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
299   bool isDA() const { return isImmTy(ImmTyDA); }
300   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
301   bool isLWE() const { return isImmTy(ImmTyLWE); }
302   bool isOff() const { return isImmTy(ImmTyOff); }
303   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
304   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
305   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
306   bool isOffen() const { return isImmTy(ImmTyOffen); }
307   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
308   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
309   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
310   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
311   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
312 
313   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
314   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
315   bool isGDS() const { return isImmTy(ImmTyGDS); }
316   bool isLDS() const { return isImmTy(ImmTyLDS); }
317   bool isGLC() const { return isImmTy(ImmTyGLC); }
318   bool isSLC() const { return isImmTy(ImmTySLC); }
319   bool isTFE() const { return isImmTy(ImmTyTFE); }
320   bool isD16() const { return isImmTy(ImmTyD16); }
321   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
322   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
323   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
324   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
325   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
326   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
327   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
328   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
329   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
330   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
331   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
332   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
333   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
334   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
335   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
336   bool isHigh() const { return isImmTy(ImmTyHigh); }
337 
338   bool isMod() const {
339     return isClampSI() || isOModSI();
340   }
341 
342   bool isRegOrImm() const {
343     return isReg() || isImm();
344   }
345 
346   bool isRegClass(unsigned RCID) const;
347 
348   bool isInlineValue() const;
349 
350   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
351     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
352   }
353 
354   bool isSCSrcB16() const {
355     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
356   }
357 
358   bool isSCSrcV2B16() const {
359     return isSCSrcB16();
360   }
361 
362   bool isSCSrcB32() const {
363     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
364   }
365 
366   bool isSCSrcB64() const {
367     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
368   }
369 
370   bool isSCSrcF16() const {
371     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
372   }
373 
374   bool isSCSrcV2F16() const {
375     return isSCSrcF16();
376   }
377 
378   bool isSCSrcF32() const {
379     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
380   }
381 
382   bool isSCSrcF64() const {
383     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
384   }
385 
386   bool isSSrcB32() const {
387     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
388   }
389 
390   bool isSSrcB16() const {
391     return isSCSrcB16() || isLiteralImm(MVT::i16);
392   }
393 
394   bool isSSrcV2B16() const {
395     llvm_unreachable("cannot happen");
396     return isSSrcB16();
397   }
398 
399   bool isSSrcB64() const {
400     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
401     // See isVSrc64().
402     return isSCSrcB64() || isLiteralImm(MVT::i64);
403   }
404 
405   bool isSSrcF32() const {
406     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
407   }
408 
409   bool isSSrcF64() const {
410     return isSCSrcB64() || isLiteralImm(MVT::f64);
411   }
412 
413   bool isSSrcF16() const {
414     return isSCSrcB16() || isLiteralImm(MVT::f16);
415   }
416 
417   bool isSSrcV2F16() const {
418     llvm_unreachable("cannot happen");
419     return isSSrcF16();
420   }
421 
422   bool isSSrcOrLdsB32() const {
423     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
424            isLiteralImm(MVT::i32) || isExpr();
425   }
426 
427   bool isVCSrcB32() const {
428     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
429   }
430 
431   bool isVCSrcB64() const {
432     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
433   }
434 
435   bool isVCSrcB16() const {
436     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
437   }
438 
439   bool isVCSrcV2B16() const {
440     return isVCSrcB16();
441   }
442 
443   bool isVCSrcF32() const {
444     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
445   }
446 
447   bool isVCSrcF64() const {
448     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
449   }
450 
451   bool isVCSrcF16() const {
452     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
453   }
454 
455   bool isVCSrcV2F16() const {
456     return isVCSrcF16();
457   }
458 
459   bool isVSrcB32() const {
460     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
461   }
462 
463   bool isVSrcB64() const {
464     return isVCSrcF64() || isLiteralImm(MVT::i64);
465   }
466 
467   bool isVSrcB16() const {
468     return isVCSrcF16() || isLiteralImm(MVT::i16);
469   }
470 
471   bool isVSrcV2B16() const {
472     llvm_unreachable("cannot happen");
473     return isVSrcB16();
474   }
475 
476   bool isVSrcF32() const {
477     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
478   }
479 
480   bool isVSrcF64() const {
481     return isVCSrcF64() || isLiteralImm(MVT::f64);
482   }
483 
484   bool isVSrcF16() const {
485     return isVCSrcF16() || isLiteralImm(MVT::f16);
486   }
487 
488   bool isVSrcV2F16() const {
489     llvm_unreachable("cannot happen");
490     return isVSrcF16();
491   }
492 
493   bool isKImmFP32() const {
494     return isLiteralImm(MVT::f32);
495   }
496 
497   bool isKImmFP16() const {
498     return isLiteralImm(MVT::f16);
499   }
500 
501   bool isMem() const override {
502     return false;
503   }
504 
505   bool isExpr() const {
506     return Kind == Expression;
507   }
508 
509   bool isSoppBrTarget() const {
510     return isExpr() || isImm();
511   }
512 
513   bool isSWaitCnt() const;
514   bool isHwreg() const;
515   bool isSendMsg() const;
516   bool isSwizzle() const;
517   bool isSMRDOffset8() const;
518   bool isSMRDOffset20() const;
519   bool isSMRDLiteralOffset() const;
520   bool isDPPCtrl() const;
521   bool isGPRIdxMode() const;
522   bool isS16Imm() const;
523   bool isU16Imm() const;
524   bool isEndpgm() const;
525 
526   StringRef getExpressionAsToken() const {
527     assert(isExpr());
528     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
529     return S->getSymbol().getName();
530   }
531 
532   StringRef getToken() const {
533     assert(isToken());
534 
535     if (Kind == Expression)
536       return getExpressionAsToken();
537 
538     return StringRef(Tok.Data, Tok.Length);
539   }
540 
541   int64_t getImm() const {
542     assert(isImm());
543     return Imm.Val;
544   }
545 
546   ImmTy getImmTy() const {
547     assert(isImm());
548     return Imm.Type;
549   }
550 
551   unsigned getReg() const override {
552     assert(isRegKind());
553     return Reg.RegNo;
554   }
555 
556   SMLoc getStartLoc() const override {
557     return StartLoc;
558   }
559 
560   SMLoc getEndLoc() const override {
561     return EndLoc;
562   }
563 
564   SMRange getLocRange() const {
565     return SMRange(StartLoc, EndLoc);
566   }
567 
568   Modifiers getModifiers() const {
569     assert(isRegKind() || isImmTy(ImmTyNone));
570     return isRegKind() ? Reg.Mods : Imm.Mods;
571   }
572 
573   void setModifiers(Modifiers Mods) {
574     assert(isRegKind() || isImmTy(ImmTyNone));
575     if (isRegKind())
576       Reg.Mods = Mods;
577     else
578       Imm.Mods = Mods;
579   }
580 
581   bool hasModifiers() const {
582     return getModifiers().hasModifiers();
583   }
584 
585   bool hasFPModifiers() const {
586     return getModifiers().hasFPModifiers();
587   }
588 
589   bool hasIntModifiers() const {
590     return getModifiers().hasIntModifiers();
591   }
592 
593   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
594 
595   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
596 
597   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
598 
599   template <unsigned Bitwidth>
600   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
601 
602   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
603     addKImmFPOperands<16>(Inst, N);
604   }
605 
606   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
607     addKImmFPOperands<32>(Inst, N);
608   }
609 
610   void addRegOperands(MCInst &Inst, unsigned N) const;
611 
612   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
613     if (isRegKind())
614       addRegOperands(Inst, N);
615     else if (isExpr())
616       Inst.addOperand(MCOperand::createExpr(Expr));
617     else
618       addImmOperands(Inst, N);
619   }
620 
621   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
622     Modifiers Mods = getModifiers();
623     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
624     if (isRegKind()) {
625       addRegOperands(Inst, N);
626     } else {
627       addImmOperands(Inst, N, false);
628     }
629   }
630 
631   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
632     assert(!hasIntModifiers());
633     addRegOrImmWithInputModsOperands(Inst, N);
634   }
635 
636   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
637     assert(!hasFPModifiers());
638     addRegOrImmWithInputModsOperands(Inst, N);
639   }
640 
641   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
642     Modifiers Mods = getModifiers();
643     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
644     assert(isRegKind());
645     addRegOperands(Inst, N);
646   }
647 
648   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
649     assert(!hasIntModifiers());
650     addRegWithInputModsOperands(Inst, N);
651   }
652 
653   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
654     assert(!hasFPModifiers());
655     addRegWithInputModsOperands(Inst, N);
656   }
657 
658   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
659     if (isImm())
660       addImmOperands(Inst, N);
661     else {
662       assert(isExpr());
663       Inst.addOperand(MCOperand::createExpr(Expr));
664     }
665   }
666 
667   static void printImmTy(raw_ostream& OS, ImmTy Type) {
668     switch (Type) {
669     case ImmTyNone: OS << "None"; break;
670     case ImmTyGDS: OS << "GDS"; break;
671     case ImmTyLDS: OS << "LDS"; break;
672     case ImmTyOffen: OS << "Offen"; break;
673     case ImmTyIdxen: OS << "Idxen"; break;
674     case ImmTyAddr64: OS << "Addr64"; break;
675     case ImmTyOffset: OS << "Offset"; break;
676     case ImmTyInstOffset: OS << "InstOffset"; break;
677     case ImmTyOffset0: OS << "Offset0"; break;
678     case ImmTyOffset1: OS << "Offset1"; break;
679     case ImmTyGLC: OS << "GLC"; break;
680     case ImmTySLC: OS << "SLC"; break;
681     case ImmTyTFE: OS << "TFE"; break;
682     case ImmTyD16: OS << "D16"; break;
683     case ImmTyFORMAT: OS << "FORMAT"; break;
684     case ImmTyClampSI: OS << "ClampSI"; break;
685     case ImmTyOModSI: OS << "OModSI"; break;
686     case ImmTyDppCtrl: OS << "DppCtrl"; break;
687     case ImmTyDppRowMask: OS << "DppRowMask"; break;
688     case ImmTyDppBankMask: OS << "DppBankMask"; break;
689     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
690     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
691     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
692     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
693     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
694     case ImmTyDMask: OS << "DMask"; break;
695     case ImmTyUNorm: OS << "UNorm"; break;
696     case ImmTyDA: OS << "DA"; break;
697     case ImmTyR128A16: OS << "R128A16"; break;
698     case ImmTyLWE: OS << "LWE"; break;
699     case ImmTyOff: OS << "Off"; break;
700     case ImmTyExpTgt: OS << "ExpTgt"; break;
701     case ImmTyExpCompr: OS << "ExpCompr"; break;
702     case ImmTyExpVM: OS << "ExpVM"; break;
703     case ImmTyHwreg: OS << "Hwreg"; break;
704     case ImmTySendMsg: OS << "SendMsg"; break;
705     case ImmTyInterpSlot: OS << "InterpSlot"; break;
706     case ImmTyInterpAttr: OS << "InterpAttr"; break;
707     case ImmTyAttrChan: OS << "AttrChan"; break;
708     case ImmTyOpSel: OS << "OpSel"; break;
709     case ImmTyOpSelHi: OS << "OpSelHi"; break;
710     case ImmTyNegLo: OS << "NegLo"; break;
711     case ImmTyNegHi: OS << "NegHi"; break;
712     case ImmTySwizzle: OS << "Swizzle"; break;
713     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
714     case ImmTyHigh: OS << "High"; break;
715     case ImmTyEndpgm:
716       OS << "Endpgm";
717       break;
718     }
719   }
720 
721   void print(raw_ostream &OS) const override {
722     switch (Kind) {
723     case Register:
724       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
725       break;
726     case Immediate:
727       OS << '<' << getImm();
728       if (getImmTy() != ImmTyNone) {
729         OS << " type: "; printImmTy(OS, getImmTy());
730       }
731       OS << " mods: " << Imm.Mods << '>';
732       break;
733     case Token:
734       OS << '\'' << getToken() << '\'';
735       break;
736     case Expression:
737       OS << "<expr " << *Expr << '>';
738       break;
739     }
740   }
741 
742   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
743                                       int64_t Val, SMLoc Loc,
744                                       ImmTy Type = ImmTyNone,
745                                       bool IsFPImm = false) {
746     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
747     Op->Imm.Val = Val;
748     Op->Imm.IsFPImm = IsFPImm;
749     Op->Imm.Type = Type;
750     Op->Imm.Mods = Modifiers();
751     Op->StartLoc = Loc;
752     Op->EndLoc = Loc;
753     return Op;
754   }
755 
756   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
757                                         StringRef Str, SMLoc Loc,
758                                         bool HasExplicitEncodingSize = true) {
759     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
760     Res->Tok.Data = Str.data();
761     Res->Tok.Length = Str.size();
762     Res->StartLoc = Loc;
763     Res->EndLoc = Loc;
764     return Res;
765   }
766 
767   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
768                                       unsigned RegNo, SMLoc S,
769                                       SMLoc E) {
770     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
771     Op->Reg.RegNo = RegNo;
772     Op->Reg.Mods = Modifiers();
773     Op->StartLoc = S;
774     Op->EndLoc = E;
775     return Op;
776   }
777 
778   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
779                                        const class MCExpr *Expr, SMLoc S) {
780     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
781     Op->Expr = Expr;
782     Op->StartLoc = S;
783     Op->EndLoc = S;
784     return Op;
785   }
786 };
787 
788 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
789   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
790   return OS;
791 }
792 
793 //===----------------------------------------------------------------------===//
794 // AsmParser
795 //===----------------------------------------------------------------------===//
796 
797 // Holds info related to the current kernel, e.g. count of SGPRs used.
798 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
799 // .amdgpu_hsa_kernel or at EOF.
800 class KernelScopeInfo {
801   int SgprIndexUnusedMin = -1;
802   int VgprIndexUnusedMin = -1;
803   MCContext *Ctx = nullptr;
804 
805   void usesSgprAt(int i) {
806     if (i >= SgprIndexUnusedMin) {
807       SgprIndexUnusedMin = ++i;
808       if (Ctx) {
809         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
810         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
811       }
812     }
813   }
814 
815   void usesVgprAt(int i) {
816     if (i >= VgprIndexUnusedMin) {
817       VgprIndexUnusedMin = ++i;
818       if (Ctx) {
819         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
820         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
821       }
822     }
823   }
824 
825 public:
826   KernelScopeInfo() = default;
827 
828   void initialize(MCContext &Context) {
829     Ctx = &Context;
830     usesSgprAt(SgprIndexUnusedMin = -1);
831     usesVgprAt(VgprIndexUnusedMin = -1);
832   }
833 
834   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
835     switch (RegKind) {
836       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
837       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
838       default: break;
839     }
840   }
841 };
842 
843 class AMDGPUAsmParser : public MCTargetAsmParser {
844   MCAsmParser &Parser;
845 
846   // Number of extra operands parsed after the first optional operand.
847   // This may be necessary to skip hardcoded mandatory operands.
848   static const unsigned MAX_OPR_LOOKAHEAD = 8;
849 
850   unsigned ForcedEncodingSize = 0;
851   bool ForcedDPP = false;
852   bool ForcedSDWA = false;
853   KernelScopeInfo KernelScope;
854 
855   /// @name Auto-generated Match Functions
856   /// {
857 
858 #define GET_ASSEMBLER_HEADER
859 #include "AMDGPUGenAsmMatcher.inc"
860 
861   /// }
862 
863 private:
864   bool ParseAsAbsoluteExpression(uint32_t &Ret);
865   bool OutOfRangeError(SMRange Range);
866   /// Calculate VGPR/SGPR blocks required for given target, reserved
867   /// registers, and user-specified NextFreeXGPR values.
868   ///
869   /// \param Features [in] Target features, used for bug corrections.
870   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
871   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
872   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
873   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
874   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
875   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
876   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
877   /// \param VGPRBlocks [out] Result VGPR block count.
878   /// \param SGPRBlocks [out] Result SGPR block count.
879   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
880                           bool FlatScrUsed, bool XNACKUsed,
881                           unsigned NextFreeVGPR, SMRange VGPRRange,
882                           unsigned NextFreeSGPR, SMRange SGPRRange,
883                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
884   bool ParseDirectiveAMDGCNTarget();
885   bool ParseDirectiveAMDHSAKernel();
886   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
887   bool ParseDirectiveHSACodeObjectVersion();
888   bool ParseDirectiveHSACodeObjectISA();
889   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
890   bool ParseDirectiveAMDKernelCodeT();
891   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
892   bool ParseDirectiveAMDGPUHsaKernel();
893 
894   bool ParseDirectiveISAVersion();
895   bool ParseDirectiveHSAMetadata();
896   bool ParseDirectivePALMetadataBegin();
897   bool ParseDirectivePALMetadata();
898 
899   /// Common code to parse out a block of text (typically YAML) between start and
900   /// end directives.
901   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
902                            const char *AssemblerDirectiveEnd,
903                            std::string &CollectString);
904 
905   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
906                              RegisterKind RegKind, unsigned Reg1,
907                              unsigned RegNum);
908   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
909                            unsigned& RegNum, unsigned& RegWidth,
910                            unsigned *DwordRegIndex);
911   bool isRegister();
912   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
913   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
914   void initializeGprCountSymbol(RegisterKind RegKind);
915   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
916                              unsigned RegWidth);
917   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
918                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
919   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
920                  bool IsGdsHardcoded);
921 
922 public:
923   enum AMDGPUMatchResultTy {
924     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
925   };
926 
927   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
928 
929   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
930                const MCInstrInfo &MII,
931                const MCTargetOptions &Options)
932       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
933     MCAsmParserExtension::Initialize(Parser);
934 
935     if (getFeatureBits().none()) {
936       // Set default features.
937       copySTI().ToggleFeature("southern-islands");
938     }
939 
940     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
941 
942     {
943       // TODO: make those pre-defined variables read-only.
944       // Currently there is none suitable machinery in the core llvm-mc for this.
945       // MCSymbol::isRedefinable is intended for another purpose, and
946       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
947       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
948       MCContext &Ctx = getContext();
949       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
950         MCSymbol *Sym =
951             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
952         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
953         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
954         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
955         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
956         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
957       } else {
958         MCSymbol *Sym =
959             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
960         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
961         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
962         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
963         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
964         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
965       }
966       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
967         initializeGprCountSymbol(IS_VGPR);
968         initializeGprCountSymbol(IS_SGPR);
969       } else
970         KernelScope.initialize(getContext());
971     }
972   }
973 
974   bool hasXNACK() const {
975     return AMDGPU::hasXNACK(getSTI());
976   }
977 
978   bool hasMIMG_R128() const {
979     return AMDGPU::hasMIMG_R128(getSTI());
980   }
981 
982   bool hasPackedD16() const {
983     return AMDGPU::hasPackedD16(getSTI());
984   }
985 
986   bool isSI() const {
987     return AMDGPU::isSI(getSTI());
988   }
989 
990   bool isCI() const {
991     return AMDGPU::isCI(getSTI());
992   }
993 
994   bool isVI() const {
995     return AMDGPU::isVI(getSTI());
996   }
997 
998   bool isGFX9() const {
999     return AMDGPU::isGFX9(getSTI());
1000   }
1001 
1002   bool isGFX10() const {
1003     return AMDGPU::isGFX10(getSTI());
1004   }
1005 
1006   bool hasInv2PiInlineImm() const {
1007     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1008   }
1009 
1010   bool hasFlatOffsets() const {
1011     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1012   }
1013 
1014   bool hasSGPR102_SGPR103() const {
1015     return !isVI() && !isGFX9();
1016   }
1017 
1018   bool hasSGPR104_SGPR105() const {
1019     return isGFX10();
1020   }
1021 
1022   bool hasIntClamp() const {
1023     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1024   }
1025 
1026   AMDGPUTargetStreamer &getTargetStreamer() {
1027     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1028     return static_cast<AMDGPUTargetStreamer &>(TS);
1029   }
1030 
1031   const MCRegisterInfo *getMRI() const {
1032     // We need this const_cast because for some reason getContext() is not const
1033     // in MCAsmParser.
1034     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1035   }
1036 
1037   const MCInstrInfo *getMII() const {
1038     return &MII;
1039   }
1040 
1041   const FeatureBitset &getFeatureBits() const {
1042     return getSTI().getFeatureBits();
1043   }
1044 
1045   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1046   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1047   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1048 
1049   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1050   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1051   bool isForcedDPP() const { return ForcedDPP; }
1052   bool isForcedSDWA() const { return ForcedSDWA; }
1053   ArrayRef<unsigned> getMatchedVariants() const;
1054 
1055   std::unique_ptr<AMDGPUOperand> parseRegister();
1056   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1057   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1058   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1059                                       unsigned Kind) override;
1060   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1061                                OperandVector &Operands, MCStreamer &Out,
1062                                uint64_t &ErrorInfo,
1063                                bool MatchingInlineAsm) override;
1064   bool ParseDirective(AsmToken DirectiveID) override;
1065   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
1066   StringRef parseMnemonicSuffix(StringRef Name);
1067   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1068                         SMLoc NameLoc, OperandVector &Operands) override;
1069   //bool ProcessInstruction(MCInst &Inst);
1070 
1071   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1072 
1073   OperandMatchResultTy
1074   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1075                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1076                      bool (*ConvertResult)(int64_t &) = nullptr);
1077 
1078   OperandMatchResultTy parseOperandArrayWithPrefix(
1079     const char *Prefix,
1080     OperandVector &Operands,
1081     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1082     bool (*ConvertResult)(int64_t&) = nullptr);
1083 
1084   OperandMatchResultTy
1085   parseNamedBit(const char *Name, OperandVector &Operands,
1086                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1087   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1088                                              StringRef &Value);
1089 
1090   bool parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier = false);
1091   bool parseSP3NegModifier();
1092   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1093   OperandMatchResultTy parseReg(OperandVector &Operands);
1094   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1095   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1096   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1097   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1098   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1099   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1100   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1101 
1102   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1103   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1104   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1105   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1106 
1107   bool parseCnt(int64_t &IntVal);
1108   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1109   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1110 
1111 private:
1112   struct OperandInfoTy {
1113     int64_t Id;
1114     bool IsSymbolic = false;
1115 
1116     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1117   };
1118 
1119   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1120   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1121 
1122   void errorExpTgt();
1123   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1124 
1125   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1126   bool validateSOPLiteral(const MCInst &Inst) const;
1127   bool validateConstantBusLimitations(const MCInst &Inst);
1128   bool validateEarlyClobberLimitations(const MCInst &Inst);
1129   bool validateIntClampSupported(const MCInst &Inst);
1130   bool validateMIMGAtomicDMask(const MCInst &Inst);
1131   bool validateMIMGGatherDMask(const MCInst &Inst);
1132   bool validateMIMGDataSize(const MCInst &Inst);
1133   bool validateMIMGD16(const MCInst &Inst);
1134   bool validateLdsDirect(const MCInst &Inst);
1135   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1136   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1137   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1138 
1139   bool isId(const StringRef Id) const;
1140   bool isId(const AsmToken &Token, const StringRef Id) const;
1141   bool isToken(const AsmToken::TokenKind Kind) const;
1142   bool trySkipId(const StringRef Id);
1143   bool trySkipToken(const AsmToken::TokenKind Kind);
1144   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1145   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1146   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1147   AsmToken::TokenKind getTokenKind() const;
1148   bool parseExpr(int64_t &Imm);
1149   StringRef getTokenStr() const;
1150   AsmToken peekToken();
1151   AsmToken getToken() const;
1152   SMLoc getLoc() const;
1153   void lex();
1154 
1155 public:
1156   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1157   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1158 
1159   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1160   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1161   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1162   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1163   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1164 
1165   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1166                             const unsigned MinVal,
1167                             const unsigned MaxVal,
1168                             const StringRef ErrMsg);
1169   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1170   bool parseSwizzleOffset(int64_t &Imm);
1171   bool parseSwizzleMacro(int64_t &Imm);
1172   bool parseSwizzleQuadPerm(int64_t &Imm);
1173   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1174   bool parseSwizzleBroadcast(int64_t &Imm);
1175   bool parseSwizzleSwap(int64_t &Imm);
1176   bool parseSwizzleReverse(int64_t &Imm);
1177 
1178   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1179   int64_t parseGPRIdxMacro();
1180 
1181   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1182   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1183   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1184   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1185   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1186 
1187   AMDGPUOperand::Ptr defaultGLC() const;
1188   AMDGPUOperand::Ptr defaultSLC() const;
1189 
1190   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1191   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1192   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1193   AMDGPUOperand::Ptr defaultOffsetU12() const;
1194   AMDGPUOperand::Ptr defaultOffsetS13() const;
1195 
1196   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1197 
1198   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1199                OptionalImmIndexMap &OptionalIdx);
1200   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1201   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1202   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1203 
1204   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1205 
1206   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1207                bool IsAtomic = false);
1208   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1209 
1210   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1211   AMDGPUOperand::Ptr defaultRowMask() const;
1212   AMDGPUOperand::Ptr defaultBankMask() const;
1213   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1214   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1215 
1216   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1217                                     AMDGPUOperand::ImmTy Type);
1218   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1219   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1220   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1221   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1222   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1223   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1224                 uint64_t BasicInstType, bool skipVcc = false);
1225 
1226   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1227   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1228 };
1229 
1230 struct OptionalOperand {
1231   const char *Name;
1232   AMDGPUOperand::ImmTy Type;
1233   bool IsBit;
1234   bool (*ConvertResult)(int64_t&);
1235 };
1236 
1237 } // end anonymous namespace
1238 
1239 // May be called with integer type with equivalent bitwidth.
1240 static const fltSemantics *getFltSemantics(unsigned Size) {
1241   switch (Size) {
1242   case 4:
1243     return &APFloat::IEEEsingle();
1244   case 8:
1245     return &APFloat::IEEEdouble();
1246   case 2:
1247     return &APFloat::IEEEhalf();
1248   default:
1249     llvm_unreachable("unsupported fp type");
1250   }
1251 }
1252 
1253 static const fltSemantics *getFltSemantics(MVT VT) {
1254   return getFltSemantics(VT.getSizeInBits() / 8);
1255 }
1256 
1257 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1258   switch (OperandType) {
1259   case AMDGPU::OPERAND_REG_IMM_INT32:
1260   case AMDGPU::OPERAND_REG_IMM_FP32:
1261   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1262   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1263     return &APFloat::IEEEsingle();
1264   case AMDGPU::OPERAND_REG_IMM_INT64:
1265   case AMDGPU::OPERAND_REG_IMM_FP64:
1266   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1267   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1268     return &APFloat::IEEEdouble();
1269   case AMDGPU::OPERAND_REG_IMM_INT16:
1270   case AMDGPU::OPERAND_REG_IMM_FP16:
1271   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1272   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1273   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1274   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1275     return &APFloat::IEEEhalf();
1276   default:
1277     llvm_unreachable("unsupported fp type");
1278   }
1279 }
1280 
1281 //===----------------------------------------------------------------------===//
1282 // Operand
1283 //===----------------------------------------------------------------------===//
1284 
1285 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1286   bool Lost;
1287 
1288   // Convert literal to single precision
1289   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1290                                                APFloat::rmNearestTiesToEven,
1291                                                &Lost);
1292   // We allow precision lost but not overflow or underflow
1293   if (Status != APFloat::opOK &&
1294       Lost &&
1295       ((Status & APFloat::opOverflow)  != 0 ||
1296        (Status & APFloat::opUnderflow) != 0)) {
1297     return false;
1298   }
1299 
1300   return true;
1301 }
1302 
1303 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1304   return isUIntN(Size, Val) || isIntN(Size, Val);
1305 }
1306 
1307 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1308 
1309   // This is a hack to enable named inline values like
1310   // shared_base with both 32-bit and 64-bit operands.
1311   // Note that these values are defined as
1312   // 32-bit operands only.
1313   if (isInlineValue()) {
1314     return true;
1315   }
1316 
1317   if (!isImmTy(ImmTyNone)) {
1318     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1319     return false;
1320   }
1321   // TODO: We should avoid using host float here. It would be better to
1322   // check the float bit values which is what a few other places do.
1323   // We've had bot failures before due to weird NaN support on mips hosts.
1324 
1325   APInt Literal(64, Imm.Val);
1326 
1327   if (Imm.IsFPImm) { // We got fp literal token
1328     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1329       return AMDGPU::isInlinableLiteral64(Imm.Val,
1330                                           AsmParser->hasInv2PiInlineImm());
1331     }
1332 
1333     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1334     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1335       return false;
1336 
1337     if (type.getScalarSizeInBits() == 16) {
1338       return AMDGPU::isInlinableLiteral16(
1339         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1340         AsmParser->hasInv2PiInlineImm());
1341     }
1342 
1343     // Check if single precision literal is inlinable
1344     return AMDGPU::isInlinableLiteral32(
1345       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1346       AsmParser->hasInv2PiInlineImm());
1347   }
1348 
1349   // We got int literal token.
1350   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1351     return AMDGPU::isInlinableLiteral64(Imm.Val,
1352                                         AsmParser->hasInv2PiInlineImm());
1353   }
1354 
1355   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1356     return false;
1357   }
1358 
1359   if (type.getScalarSizeInBits() == 16) {
1360     return AMDGPU::isInlinableLiteral16(
1361       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1362       AsmParser->hasInv2PiInlineImm());
1363   }
1364 
1365   return AMDGPU::isInlinableLiteral32(
1366     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1367     AsmParser->hasInv2PiInlineImm());
1368 }
1369 
1370 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1371   // Check that this immediate can be added as literal
1372   if (!isImmTy(ImmTyNone)) {
1373     return false;
1374   }
1375 
1376   if (!Imm.IsFPImm) {
1377     // We got int literal token.
1378 
1379     if (type == MVT::f64 && hasFPModifiers()) {
1380       // Cannot apply fp modifiers to int literals preserving the same semantics
1381       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1382       // disable these cases.
1383       return false;
1384     }
1385 
1386     unsigned Size = type.getSizeInBits();
1387     if (Size == 64)
1388       Size = 32;
1389 
1390     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1391     // types.
1392     return isSafeTruncation(Imm.Val, Size);
1393   }
1394 
1395   // We got fp literal token
1396   if (type == MVT::f64) { // Expected 64-bit fp operand
1397     // We would set low 64-bits of literal to zeroes but we accept this literals
1398     return true;
1399   }
1400 
1401   if (type == MVT::i64) { // Expected 64-bit int operand
1402     // We don't allow fp literals in 64-bit integer instructions. It is
1403     // unclear how we should encode them.
1404     return false;
1405   }
1406 
1407   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1408   return canLosslesslyConvertToFPType(FPLiteral, type);
1409 }
1410 
1411 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1412   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1413 }
1414 
1415 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1416   if (AsmParser->isVI())
1417     return isVReg32();
1418   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1419     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1420   else
1421     return false;
1422 }
1423 
1424 bool AMDGPUOperand::isSDWAFP16Operand() const {
1425   return isSDWAOperand(MVT::f16);
1426 }
1427 
1428 bool AMDGPUOperand::isSDWAFP32Operand() const {
1429   return isSDWAOperand(MVT::f32);
1430 }
1431 
1432 bool AMDGPUOperand::isSDWAInt16Operand() const {
1433   return isSDWAOperand(MVT::i16);
1434 }
1435 
1436 bool AMDGPUOperand::isSDWAInt32Operand() const {
1437   return isSDWAOperand(MVT::i32);
1438 }
1439 
1440 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1441 {
1442   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1443   assert(Size == 2 || Size == 4 || Size == 8);
1444 
1445   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1446 
1447   if (Imm.Mods.Abs) {
1448     Val &= ~FpSignMask;
1449   }
1450   if (Imm.Mods.Neg) {
1451     Val ^= FpSignMask;
1452   }
1453 
1454   return Val;
1455 }
1456 
1457 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1458   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1459                              Inst.getNumOperands())) {
1460     addLiteralImmOperand(Inst, Imm.Val,
1461                          ApplyModifiers &
1462                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1463   } else {
1464     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1465     Inst.addOperand(MCOperand::createImm(Imm.Val));
1466   }
1467 }
1468 
1469 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1470   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1471   auto OpNum = Inst.getNumOperands();
1472   // Check that this operand accepts literals
1473   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1474 
1475   if (ApplyModifiers) {
1476     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1477     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1478     Val = applyInputFPModifiers(Val, Size);
1479   }
1480 
1481   APInt Literal(64, Val);
1482   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1483 
1484   if (Imm.IsFPImm) { // We got fp literal token
1485     switch (OpTy) {
1486     case AMDGPU::OPERAND_REG_IMM_INT64:
1487     case AMDGPU::OPERAND_REG_IMM_FP64:
1488     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1489     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1490       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1491                                        AsmParser->hasInv2PiInlineImm())) {
1492         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1493         return;
1494       }
1495 
1496       // Non-inlineable
1497       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1498         // For fp operands we check if low 32 bits are zeros
1499         if (Literal.getLoBits(32) != 0) {
1500           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1501           "Can't encode literal as exact 64-bit floating-point operand. "
1502           "Low 32-bits will be set to zero");
1503         }
1504 
1505         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1506         return;
1507       }
1508 
1509       // We don't allow fp literals in 64-bit integer instructions. It is
1510       // unclear how we should encode them. This case should be checked earlier
1511       // in predicate methods (isLiteralImm())
1512       llvm_unreachable("fp literal in 64-bit integer instruction.");
1513 
1514     case AMDGPU::OPERAND_REG_IMM_INT32:
1515     case AMDGPU::OPERAND_REG_IMM_FP32:
1516     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1517     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1518     case AMDGPU::OPERAND_REG_IMM_INT16:
1519     case AMDGPU::OPERAND_REG_IMM_FP16:
1520     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1521     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1522     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1523     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1524       bool lost;
1525       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1526       // Convert literal to single precision
1527       FPLiteral.convert(*getOpFltSemantics(OpTy),
1528                         APFloat::rmNearestTiesToEven, &lost);
1529       // We allow precision lost but not overflow or underflow. This should be
1530       // checked earlier in isLiteralImm()
1531 
1532       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1533       Inst.addOperand(MCOperand::createImm(ImmVal));
1534       return;
1535     }
1536     default:
1537       llvm_unreachable("invalid operand size");
1538     }
1539 
1540     return;
1541   }
1542 
1543   // We got int literal token.
1544   // Only sign extend inline immediates.
1545   switch (OpTy) {
1546   case AMDGPU::OPERAND_REG_IMM_INT32:
1547   case AMDGPU::OPERAND_REG_IMM_FP32:
1548   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1549   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1550     if (isSafeTruncation(Val, 32) &&
1551         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1552                                      AsmParser->hasInv2PiInlineImm())) {
1553       Inst.addOperand(MCOperand::createImm(Val));
1554       return;
1555     }
1556 
1557     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1558     return;
1559 
1560   case AMDGPU::OPERAND_REG_IMM_INT64:
1561   case AMDGPU::OPERAND_REG_IMM_FP64:
1562   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1563   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1564     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1565       Inst.addOperand(MCOperand::createImm(Val));
1566       return;
1567     }
1568 
1569     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1570     return;
1571 
1572   case AMDGPU::OPERAND_REG_IMM_INT16:
1573   case AMDGPU::OPERAND_REG_IMM_FP16:
1574   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1575   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1576     if (isSafeTruncation(Val, 16) &&
1577         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1578                                      AsmParser->hasInv2PiInlineImm())) {
1579       Inst.addOperand(MCOperand::createImm(Val));
1580       return;
1581     }
1582 
1583     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1584     return;
1585 
1586   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1587   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1588     assert(isSafeTruncation(Val, 16));
1589     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1590                                         AsmParser->hasInv2PiInlineImm()));
1591 
1592     Inst.addOperand(MCOperand::createImm(Val));
1593     return;
1594   }
1595   default:
1596     llvm_unreachable("invalid operand size");
1597   }
1598 }
1599 
1600 template <unsigned Bitwidth>
1601 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1602   APInt Literal(64, Imm.Val);
1603 
1604   if (!Imm.IsFPImm) {
1605     // We got int literal token.
1606     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1607     return;
1608   }
1609 
1610   bool Lost;
1611   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1612   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1613                     APFloat::rmNearestTiesToEven, &Lost);
1614   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1615 }
1616 
1617 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1618   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1619 }
1620 
1621 static bool isInlineValue(unsigned Reg) {
1622   switch (Reg) {
1623   case AMDGPU::SRC_SHARED_BASE:
1624   case AMDGPU::SRC_SHARED_LIMIT:
1625   case AMDGPU::SRC_PRIVATE_BASE:
1626   case AMDGPU::SRC_PRIVATE_LIMIT:
1627   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1628     return true;
1629   default:
1630     return false;
1631   }
1632 }
1633 
1634 bool AMDGPUOperand::isInlineValue() const {
1635   return isRegKind() && ::isInlineValue(getReg());
1636 }
1637 
1638 //===----------------------------------------------------------------------===//
1639 // AsmParser
1640 //===----------------------------------------------------------------------===//
1641 
1642 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1643   if (Is == IS_VGPR) {
1644     switch (RegWidth) {
1645       default: return -1;
1646       case 1: return AMDGPU::VGPR_32RegClassID;
1647       case 2: return AMDGPU::VReg_64RegClassID;
1648       case 3: return AMDGPU::VReg_96RegClassID;
1649       case 4: return AMDGPU::VReg_128RegClassID;
1650       case 8: return AMDGPU::VReg_256RegClassID;
1651       case 16: return AMDGPU::VReg_512RegClassID;
1652     }
1653   } else if (Is == IS_TTMP) {
1654     switch (RegWidth) {
1655       default: return -1;
1656       case 1: return AMDGPU::TTMP_32RegClassID;
1657       case 2: return AMDGPU::TTMP_64RegClassID;
1658       case 4: return AMDGPU::TTMP_128RegClassID;
1659       case 8: return AMDGPU::TTMP_256RegClassID;
1660       case 16: return AMDGPU::TTMP_512RegClassID;
1661     }
1662   } else if (Is == IS_SGPR) {
1663     switch (RegWidth) {
1664       default: return -1;
1665       case 1: return AMDGPU::SGPR_32RegClassID;
1666       case 2: return AMDGPU::SGPR_64RegClassID;
1667       case 4: return AMDGPU::SGPR_128RegClassID;
1668       case 8: return AMDGPU::SGPR_256RegClassID;
1669       case 16: return AMDGPU::SGPR_512RegClassID;
1670     }
1671   }
1672   return -1;
1673 }
1674 
1675 static unsigned getSpecialRegForName(StringRef RegName) {
1676   return StringSwitch<unsigned>(RegName)
1677     .Case("exec", AMDGPU::EXEC)
1678     .Case("vcc", AMDGPU::VCC)
1679     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1680     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1681     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1682     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1683     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1684     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1685     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1686     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1687     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1688     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1689     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1690     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1691     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1692     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1693     .Case("m0", AMDGPU::M0)
1694     .Case("scc", AMDGPU::SCC)
1695     .Case("tba", AMDGPU::TBA)
1696     .Case("tma", AMDGPU::TMA)
1697     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1698     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1699     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1700     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1701     .Case("vcc_lo", AMDGPU::VCC_LO)
1702     .Case("vcc_hi", AMDGPU::VCC_HI)
1703     .Case("exec_lo", AMDGPU::EXEC_LO)
1704     .Case("exec_hi", AMDGPU::EXEC_HI)
1705     .Case("tma_lo", AMDGPU::TMA_LO)
1706     .Case("tma_hi", AMDGPU::TMA_HI)
1707     .Case("tba_lo", AMDGPU::TBA_LO)
1708     .Case("tba_hi", AMDGPU::TBA_HI)
1709     .Case("null", AMDGPU::SGPR_NULL)
1710     .Default(0);
1711 }
1712 
1713 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1714                                     SMLoc &EndLoc) {
1715   auto R = parseRegister();
1716   if (!R) return true;
1717   assert(R->isReg());
1718   RegNo = R->getReg();
1719   StartLoc = R->getStartLoc();
1720   EndLoc = R->getEndLoc();
1721   return false;
1722 }
1723 
1724 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1725                                             RegisterKind RegKind, unsigned Reg1,
1726                                             unsigned RegNum) {
1727   switch (RegKind) {
1728   case IS_SPECIAL:
1729     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1730       Reg = AMDGPU::EXEC;
1731       RegWidth = 2;
1732       return true;
1733     }
1734     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1735       Reg = AMDGPU::FLAT_SCR;
1736       RegWidth = 2;
1737       return true;
1738     }
1739     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1740       Reg = AMDGPU::XNACK_MASK;
1741       RegWidth = 2;
1742       return true;
1743     }
1744     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1745       Reg = AMDGPU::VCC;
1746       RegWidth = 2;
1747       return true;
1748     }
1749     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1750       Reg = AMDGPU::TBA;
1751       RegWidth = 2;
1752       return true;
1753     }
1754     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1755       Reg = AMDGPU::TMA;
1756       RegWidth = 2;
1757       return true;
1758     }
1759     return false;
1760   case IS_VGPR:
1761   case IS_SGPR:
1762   case IS_TTMP:
1763     if (Reg1 != Reg + RegWidth) {
1764       return false;
1765     }
1766     RegWidth++;
1767     return true;
1768   default:
1769     llvm_unreachable("unexpected register kind");
1770   }
1771 }
1772 
1773 static const StringRef Registers[] = {
1774   { "v" },
1775   { "s" },
1776   { "ttmp" },
1777 };
1778 
1779 bool
1780 AMDGPUAsmParser::isRegister(const AsmToken &Token,
1781                             const AsmToken &NextToken) const {
1782 
1783   // A list of consecutive registers: [s0,s1,s2,s3]
1784   if (Token.is(AsmToken::LBrac))
1785     return true;
1786 
1787   if (!Token.is(AsmToken::Identifier))
1788     return false;
1789 
1790   // A single register like s0 or a range of registers like s[0:1]
1791 
1792   StringRef RegName = Token.getString();
1793 
1794   for (StringRef Reg : Registers) {
1795     if (RegName.startswith(Reg)) {
1796       if (Reg.size() < RegName.size()) {
1797         unsigned RegNum;
1798         // A single register with an index: rXX
1799         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
1800           return true;
1801       } else {
1802         // A range of registers: r[XX:YY].
1803         if (NextToken.is(AsmToken::LBrac))
1804           return true;
1805       }
1806     }
1807   }
1808 
1809   return getSpecialRegForName(RegName);
1810 }
1811 
1812 bool
1813 AMDGPUAsmParser::isRegister()
1814 {
1815   return isRegister(getToken(), peekToken());
1816 }
1817 
1818 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1819                                           unsigned &RegNum, unsigned &RegWidth,
1820                                           unsigned *DwordRegIndex) {
1821   if (DwordRegIndex) { *DwordRegIndex = 0; }
1822   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1823   if (getLexer().is(AsmToken::Identifier)) {
1824     StringRef RegName = Parser.getTok().getString();
1825     if ((Reg = getSpecialRegForName(RegName))) {
1826       Parser.Lex();
1827       RegKind = IS_SPECIAL;
1828     } else {
1829       unsigned RegNumIndex = 0;
1830       if (RegName[0] == 'v') {
1831         RegNumIndex = 1;
1832         RegKind = IS_VGPR;
1833       } else if (RegName[0] == 's') {
1834         RegNumIndex = 1;
1835         RegKind = IS_SGPR;
1836       } else if (RegName.startswith("ttmp")) {
1837         RegNumIndex = strlen("ttmp");
1838         RegKind = IS_TTMP;
1839       } else {
1840         return false;
1841       }
1842       if (RegName.size() > RegNumIndex) {
1843         // Single 32-bit register: vXX.
1844         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1845           return false;
1846         Parser.Lex();
1847         RegWidth = 1;
1848       } else {
1849         // Range of registers: v[XX:YY]. ":YY" is optional.
1850         Parser.Lex();
1851         int64_t RegLo, RegHi;
1852         if (getLexer().isNot(AsmToken::LBrac))
1853           return false;
1854         Parser.Lex();
1855 
1856         if (getParser().parseAbsoluteExpression(RegLo))
1857           return false;
1858 
1859         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1860         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1861           return false;
1862         Parser.Lex();
1863 
1864         if (isRBrace) {
1865           RegHi = RegLo;
1866         } else {
1867           if (getParser().parseAbsoluteExpression(RegHi))
1868             return false;
1869 
1870           if (getLexer().isNot(AsmToken::RBrac))
1871             return false;
1872           Parser.Lex();
1873         }
1874         RegNum = (unsigned) RegLo;
1875         RegWidth = (RegHi - RegLo) + 1;
1876       }
1877     }
1878   } else if (getLexer().is(AsmToken::LBrac)) {
1879     // List of consecutive registers: [s0,s1,s2,s3]
1880     Parser.Lex();
1881     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1882       return false;
1883     if (RegWidth != 1)
1884       return false;
1885     RegisterKind RegKind1;
1886     unsigned Reg1, RegNum1, RegWidth1;
1887     do {
1888       if (getLexer().is(AsmToken::Comma)) {
1889         Parser.Lex();
1890       } else if (getLexer().is(AsmToken::RBrac)) {
1891         Parser.Lex();
1892         break;
1893       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1894         if (RegWidth1 != 1) {
1895           return false;
1896         }
1897         if (RegKind1 != RegKind) {
1898           return false;
1899         }
1900         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1901           return false;
1902         }
1903       } else {
1904         return false;
1905       }
1906     } while (true);
1907   } else {
1908     return false;
1909   }
1910   switch (RegKind) {
1911   case IS_SPECIAL:
1912     RegNum = 0;
1913     RegWidth = 1;
1914     break;
1915   case IS_VGPR:
1916   case IS_SGPR:
1917   case IS_TTMP:
1918   {
1919     unsigned Size = 1;
1920     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1921       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1922       Size = std::min(RegWidth, 4u);
1923     }
1924     if (RegNum % Size != 0)
1925       return false;
1926     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1927     RegNum = RegNum / Size;
1928     int RCID = getRegClass(RegKind, RegWidth);
1929     if (RCID == -1)
1930       return false;
1931     const MCRegisterClass RC = TRI->getRegClass(RCID);
1932     if (RegNum >= RC.getNumRegs())
1933       return false;
1934     Reg = RC.getRegister(RegNum);
1935     break;
1936   }
1937 
1938   default:
1939     llvm_unreachable("unexpected register kind");
1940   }
1941 
1942   if (!subtargetHasRegister(*TRI, Reg))
1943     return false;
1944   return true;
1945 }
1946 
1947 Optional<StringRef>
1948 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1949   switch (RegKind) {
1950   case IS_VGPR:
1951     return StringRef(".amdgcn.next_free_vgpr");
1952   case IS_SGPR:
1953     return StringRef(".amdgcn.next_free_sgpr");
1954   default:
1955     return None;
1956   }
1957 }
1958 
1959 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1960   auto SymbolName = getGprCountSymbolName(RegKind);
1961   assert(SymbolName && "initializing invalid register kind");
1962   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1963   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1964 }
1965 
1966 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1967                                             unsigned DwordRegIndex,
1968                                             unsigned RegWidth) {
1969   // Symbols are only defined for GCN targets
1970   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
1971     return true;
1972 
1973   auto SymbolName = getGprCountSymbolName(RegKind);
1974   if (!SymbolName)
1975     return true;
1976   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1977 
1978   int64_t NewMax = DwordRegIndex + RegWidth - 1;
1979   int64_t OldCount;
1980 
1981   if (!Sym->isVariable())
1982     return !Error(getParser().getTok().getLoc(),
1983                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
1984   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
1985     return !Error(
1986         getParser().getTok().getLoc(),
1987         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
1988 
1989   if (OldCount <= NewMax)
1990     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
1991 
1992   return true;
1993 }
1994 
1995 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1996   const auto &Tok = Parser.getTok();
1997   SMLoc StartLoc = Tok.getLoc();
1998   SMLoc EndLoc = Tok.getEndLoc();
1999   RegisterKind RegKind;
2000   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2001 
2002   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2003     //FIXME: improve error messages (bug 41303).
2004     Error(StartLoc, "not a valid operand.");
2005     return nullptr;
2006   }
2007   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2008     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2009       return nullptr;
2010   } else
2011     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2012   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2013 }
2014 
2015 bool
2016 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier) {
2017   if (HasSP3AbsModifier) {
2018     // This is a workaround for handling expressions
2019     // as arguments of SP3 'abs' modifier, for example:
2020     //     |1.0|
2021     //     |-1|
2022     //     |1+x|
2023     // This syntax is not compatible with syntax of standard
2024     // MC expressions (due to the trailing '|').
2025 
2026     SMLoc EndLoc;
2027     const MCExpr *Expr;
2028     SMLoc StartLoc = getLoc();
2029 
2030     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
2031       return true;
2032     }
2033 
2034     if (!Expr->evaluateAsAbsolute(Val))
2035       return Error(StartLoc, "expected absolute expression");
2036 
2037     return false;
2038   }
2039 
2040   return getParser().parseAbsoluteExpression(Val);
2041 }
2042 
2043 OperandMatchResultTy
2044 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2045   // TODO: add syntactic sugar for 1/(2*PI)
2046 
2047   const auto& Tok = getToken();
2048   const auto& NextTok = peekToken();
2049   bool IsReal = Tok.is(AsmToken::Real);
2050   SMLoc S = Tok.getLoc();
2051   bool Negate = false;
2052 
2053   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2054     lex();
2055     IsReal = true;
2056     Negate = true;
2057   }
2058 
2059   if (IsReal) {
2060     // Floating-point expressions are not supported.
2061     // Can only allow floating-point literals with an
2062     // optional sign.
2063 
2064     StringRef Num = getTokenStr();
2065     lex();
2066 
2067     APFloat RealVal(APFloat::IEEEdouble());
2068     auto roundMode = APFloat::rmNearestTiesToEven;
2069     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2070       return MatchOperand_ParseFail;
2071     }
2072     if (Negate)
2073       RealVal.changeSign();
2074 
2075     Operands.push_back(
2076       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2077                                AMDGPUOperand::ImmTyNone, true));
2078 
2079     return MatchOperand_Success;
2080 
2081     // FIXME: Should enable arbitrary expressions here
2082   } else if (Tok.is(AsmToken::Integer) ||
2083              (Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Integer))){
2084 
2085     int64_t IntVal;
2086     if (parseAbsoluteExpr(IntVal, HasSP3AbsModifier))
2087       return MatchOperand_ParseFail;
2088 
2089     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2090     return MatchOperand_Success;
2091   }
2092 
2093   return MatchOperand_NoMatch;
2094 }
2095 
2096 OperandMatchResultTy
2097 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2098   if (!isRegister())
2099     return MatchOperand_NoMatch;
2100 
2101   if (auto R = parseRegister()) {
2102     assert(R->isReg());
2103     Operands.push_back(std::move(R));
2104     return MatchOperand_Success;
2105   }
2106   return MatchOperand_ParseFail;
2107 }
2108 
2109 OperandMatchResultTy
2110 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2111   auto res = parseReg(Operands);
2112   return (res == MatchOperand_NoMatch)?
2113          parseImm(Operands, HasSP3AbsMod) :
2114          res;
2115 }
2116 
2117 // Check if the current token is an SP3 'neg' modifier.
2118 // Currently this modifier is allowed in the following context:
2119 //
2120 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2121 // 2. Before an 'abs' modifier: -abs(...)
2122 // 3. Before an SP3 'abs' modifier: -|...|
2123 //
2124 // In all other cases "-" is handled as a part
2125 // of an expression that follows the sign.
2126 //
2127 // Note: When "-" is followed by an integer literal,
2128 // this is interpreted as integer negation rather
2129 // than a floating-point NEG modifier applied to N.
2130 // Beside being contr-intuitive, such use of floating-point
2131 // NEG modifier would have resulted in different meaning
2132 // of integer literals used with VOP1/2/C and VOP3,
2133 // for example:
2134 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2135 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2136 // Negative fp literals with preceding "-" are
2137 // handled likewise for unifomtity
2138 //
2139 bool
2140 AMDGPUAsmParser::parseSP3NegModifier() {
2141 
2142   AsmToken NextToken[2];
2143   peekTokens(NextToken);
2144 
2145   if (isToken(AsmToken::Minus) &&
2146       (isRegister(NextToken[0], NextToken[1]) ||
2147        NextToken[0].is(AsmToken::Pipe) ||
2148        isId(NextToken[0], "abs"))) {
2149     lex();
2150     return true;
2151   }
2152 
2153   return false;
2154 }
2155 
2156 OperandMatchResultTy
2157 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2158                                               bool AllowImm) {
2159   bool Neg, SP3Neg;
2160   bool Abs, SP3Abs;
2161   SMLoc Loc;
2162 
2163   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2164   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2165     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2166     return MatchOperand_ParseFail;
2167   }
2168 
2169   SP3Neg = parseSP3NegModifier();
2170 
2171   Loc = getLoc();
2172   Neg = trySkipId("neg");
2173   if (Neg && SP3Neg) {
2174     Error(Loc, "expected register or immediate");
2175     return MatchOperand_ParseFail;
2176   }
2177   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2178     return MatchOperand_ParseFail;
2179 
2180   Abs = trySkipId("abs");
2181   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2182     return MatchOperand_ParseFail;
2183 
2184   Loc = getLoc();
2185   SP3Abs = trySkipToken(AsmToken::Pipe);
2186   if (Abs && SP3Abs) {
2187     Error(Loc, "expected register or immediate");
2188     return MatchOperand_ParseFail;
2189   }
2190 
2191   OperandMatchResultTy Res;
2192   if (AllowImm) {
2193     Res = parseRegOrImm(Operands, SP3Abs);
2194   } else {
2195     Res = parseReg(Operands);
2196   }
2197   if (Res != MatchOperand_Success) {
2198     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2199   }
2200 
2201   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2202     return MatchOperand_ParseFail;
2203   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2204     return MatchOperand_ParseFail;
2205   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2206     return MatchOperand_ParseFail;
2207 
2208   AMDGPUOperand::Modifiers Mods;
2209   Mods.Abs = Abs || SP3Abs;
2210   Mods.Neg = Neg || SP3Neg;
2211 
2212   if (Mods.hasFPModifiers()) {
2213     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2214     Op.setModifiers(Mods);
2215   }
2216   return MatchOperand_Success;
2217 }
2218 
2219 OperandMatchResultTy
2220 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2221                                                bool AllowImm) {
2222   bool Sext = trySkipId("sext");
2223   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2224     return MatchOperand_ParseFail;
2225 
2226   OperandMatchResultTy Res;
2227   if (AllowImm) {
2228     Res = parseRegOrImm(Operands);
2229   } else {
2230     Res = parseReg(Operands);
2231   }
2232   if (Res != MatchOperand_Success) {
2233     return Sext? MatchOperand_ParseFail : Res;
2234   }
2235 
2236   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2237     return MatchOperand_ParseFail;
2238 
2239   AMDGPUOperand::Modifiers Mods;
2240   Mods.Sext = Sext;
2241 
2242   if (Mods.hasIntModifiers()) {
2243     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2244     Op.setModifiers(Mods);
2245   }
2246 
2247   return MatchOperand_Success;
2248 }
2249 
2250 OperandMatchResultTy
2251 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2252   return parseRegOrImmWithFPInputMods(Operands, false);
2253 }
2254 
2255 OperandMatchResultTy
2256 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2257   return parseRegOrImmWithIntInputMods(Operands, false);
2258 }
2259 
2260 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2261   auto Loc = getLoc();
2262   if (trySkipId("off")) {
2263     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2264                                                 AMDGPUOperand::ImmTyOff, false));
2265     return MatchOperand_Success;
2266   }
2267 
2268   if (!isRegister())
2269     return MatchOperand_NoMatch;
2270 
2271   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2272   if (Reg) {
2273     Operands.push_back(std::move(Reg));
2274     return MatchOperand_Success;
2275   }
2276 
2277   return MatchOperand_ParseFail;
2278 
2279 }
2280 
2281 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2282   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2283 
2284   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2285       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2286       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2287       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2288     return Match_InvalidOperand;
2289 
2290   if ((TSFlags & SIInstrFlags::VOP3) &&
2291       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2292       getForcedEncodingSize() != 64)
2293     return Match_PreferE32;
2294 
2295   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2296       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2297     // v_mac_f32/16 allow only dst_sel == DWORD;
2298     auto OpNum =
2299         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2300     const auto &Op = Inst.getOperand(OpNum);
2301     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2302       return Match_InvalidOperand;
2303     }
2304   }
2305 
2306   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2307     // FIXME: Produces error without correct column reported.
2308     auto OpNum =
2309         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2310     const auto &Op = Inst.getOperand(OpNum);
2311     if (Op.getImm() != 0)
2312       return Match_InvalidOperand;
2313   }
2314 
2315   return Match_Success;
2316 }
2317 
2318 // What asm variants we should check
2319 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2320   if (getForcedEncodingSize() == 32) {
2321     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2322     return makeArrayRef(Variants);
2323   }
2324 
2325   if (isForcedVOP3()) {
2326     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2327     return makeArrayRef(Variants);
2328   }
2329 
2330   if (isForcedSDWA()) {
2331     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2332                                         AMDGPUAsmVariants::SDWA9};
2333     return makeArrayRef(Variants);
2334   }
2335 
2336   if (isForcedDPP()) {
2337     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2338     return makeArrayRef(Variants);
2339   }
2340 
2341   static const unsigned Variants[] = {
2342     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2343     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2344   };
2345 
2346   return makeArrayRef(Variants);
2347 }
2348 
2349 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2350   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2351   const unsigned Num = Desc.getNumImplicitUses();
2352   for (unsigned i = 0; i < Num; ++i) {
2353     unsigned Reg = Desc.ImplicitUses[i];
2354     switch (Reg) {
2355     case AMDGPU::FLAT_SCR:
2356     case AMDGPU::VCC:
2357     case AMDGPU::VCC_LO:
2358     case AMDGPU::VCC_HI:
2359     case AMDGPU::M0:
2360     case AMDGPU::SGPR_NULL:
2361       return Reg;
2362     default:
2363       break;
2364     }
2365   }
2366   return AMDGPU::NoRegister;
2367 }
2368 
2369 // NB: This code is correct only when used to check constant
2370 // bus limitations because GFX7 support no f16 inline constants.
2371 // Note that there are no cases when a GFX7 opcode violates
2372 // constant bus limitations due to the use of an f16 constant.
2373 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2374                                        unsigned OpIdx) const {
2375   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2376 
2377   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2378     return false;
2379   }
2380 
2381   const MCOperand &MO = Inst.getOperand(OpIdx);
2382 
2383   int64_t Val = MO.getImm();
2384   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2385 
2386   switch (OpSize) { // expected operand size
2387   case 8:
2388     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2389   case 4:
2390     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2391   case 2: {
2392     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2393     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2394         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2395       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2396     } else {
2397       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2398     }
2399   }
2400   default:
2401     llvm_unreachable("invalid operand size");
2402   }
2403 }
2404 
2405 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2406   const MCOperand &MO = Inst.getOperand(OpIdx);
2407   if (MO.isImm()) {
2408     return !isInlineConstant(Inst, OpIdx);
2409   }
2410   return !MO.isReg() ||
2411          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2412 }
2413 
2414 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2415   const unsigned Opcode = Inst.getOpcode();
2416   const MCInstrDesc &Desc = MII.get(Opcode);
2417   unsigned ConstantBusUseCount = 0;
2418 
2419   if (Desc.TSFlags &
2420       (SIInstrFlags::VOPC |
2421        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2422        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2423        SIInstrFlags::SDWA)) {
2424     // Check special imm operands (used by madmk, etc)
2425     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2426       ++ConstantBusUseCount;
2427     }
2428 
2429     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2430     if (SGPRUsed != AMDGPU::NoRegister) {
2431       ++ConstantBusUseCount;
2432     }
2433 
2434     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2435     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2436     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2437 
2438     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2439 
2440     for (int OpIdx : OpIndices) {
2441       if (OpIdx == -1) break;
2442 
2443       const MCOperand &MO = Inst.getOperand(OpIdx);
2444       if (usesConstantBus(Inst, OpIdx)) {
2445         if (MO.isReg()) {
2446           const unsigned Reg = mc2PseudoReg(MO.getReg());
2447           // Pairs of registers with a partial intersections like these
2448           //   s0, s[0:1]
2449           //   flat_scratch_lo, flat_scratch
2450           //   flat_scratch_lo, flat_scratch_hi
2451           // are theoretically valid but they are disabled anyway.
2452           // Note that this code mimics SIInstrInfo::verifyInstruction
2453           if (Reg != SGPRUsed) {
2454             ++ConstantBusUseCount;
2455           }
2456           SGPRUsed = Reg;
2457         } else { // Expression or a literal
2458           ++ConstantBusUseCount;
2459         }
2460       }
2461     }
2462   }
2463 
2464   return ConstantBusUseCount <= 1;
2465 }
2466 
2467 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2468   const unsigned Opcode = Inst.getOpcode();
2469   const MCInstrDesc &Desc = MII.get(Opcode);
2470 
2471   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2472   if (DstIdx == -1 ||
2473       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2474     return true;
2475   }
2476 
2477   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2478 
2479   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2480   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2481   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2482 
2483   assert(DstIdx != -1);
2484   const MCOperand &Dst = Inst.getOperand(DstIdx);
2485   assert(Dst.isReg());
2486   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2487 
2488   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2489 
2490   for (int SrcIdx : SrcIndices) {
2491     if (SrcIdx == -1) break;
2492     const MCOperand &Src = Inst.getOperand(SrcIdx);
2493     if (Src.isReg()) {
2494       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2495       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2496         return false;
2497       }
2498     }
2499   }
2500 
2501   return true;
2502 }
2503 
2504 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2505 
2506   const unsigned Opc = Inst.getOpcode();
2507   const MCInstrDesc &Desc = MII.get(Opc);
2508 
2509   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2510     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2511     assert(ClampIdx != -1);
2512     return Inst.getOperand(ClampIdx).getImm() == 0;
2513   }
2514 
2515   return true;
2516 }
2517 
2518 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2519 
2520   const unsigned Opc = Inst.getOpcode();
2521   const MCInstrDesc &Desc = MII.get(Opc);
2522 
2523   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2524     return true;
2525 
2526   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2527   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2528   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2529 
2530   assert(VDataIdx != -1);
2531   assert(DMaskIdx != -1);
2532   assert(TFEIdx != -1);
2533 
2534   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2535   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2536   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2537   if (DMask == 0)
2538     DMask = 1;
2539 
2540   unsigned DataSize =
2541     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2542   if (hasPackedD16()) {
2543     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2544     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2545       DataSize = (DataSize + 1) / 2;
2546   }
2547 
2548   return (VDataSize / 4) == DataSize + TFESize;
2549 }
2550 
2551 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2552 
2553   const unsigned Opc = Inst.getOpcode();
2554   const MCInstrDesc &Desc = MII.get(Opc);
2555 
2556   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2557     return true;
2558   if (!Desc.mayLoad() || !Desc.mayStore())
2559     return true; // Not atomic
2560 
2561   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2562   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2563 
2564   // This is an incomplete check because image_atomic_cmpswap
2565   // may only use 0x3 and 0xf while other atomic operations
2566   // may use 0x1 and 0x3. However these limitations are
2567   // verified when we check that dmask matches dst size.
2568   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2569 }
2570 
2571 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2572 
2573   const unsigned Opc = Inst.getOpcode();
2574   const MCInstrDesc &Desc = MII.get(Opc);
2575 
2576   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2577     return true;
2578 
2579   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2580   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2581 
2582   // GATHER4 instructions use dmask in a different fashion compared to
2583   // other MIMG instructions. The only useful DMASK values are
2584   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2585   // (red,red,red,red) etc.) The ISA document doesn't mention
2586   // this.
2587   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2588 }
2589 
2590 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2591 
2592   const unsigned Opc = Inst.getOpcode();
2593   const MCInstrDesc &Desc = MII.get(Opc);
2594 
2595   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2596     return true;
2597 
2598   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2599   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2600     if (isCI() || isSI())
2601       return false;
2602   }
2603 
2604   return true;
2605 }
2606 
2607 static bool IsRevOpcode(const unsigned Opcode)
2608 {
2609   switch (Opcode) {
2610   case AMDGPU::V_SUBREV_F32_e32:
2611   case AMDGPU::V_SUBREV_F32_e64:
2612   case AMDGPU::V_SUBREV_F32_e32_si:
2613   case AMDGPU::V_SUBREV_F32_e32_vi:
2614   case AMDGPU::V_SUBREV_F32_e64_si:
2615   case AMDGPU::V_SUBREV_F32_e64_vi:
2616   case AMDGPU::V_SUBREV_I32_e32:
2617   case AMDGPU::V_SUBREV_I32_e64:
2618   case AMDGPU::V_SUBREV_I32_e32_si:
2619   case AMDGPU::V_SUBREV_I32_e64_si:
2620   case AMDGPU::V_SUBBREV_U32_e32:
2621   case AMDGPU::V_SUBBREV_U32_e64:
2622   case AMDGPU::V_SUBBREV_U32_e32_si:
2623   case AMDGPU::V_SUBBREV_U32_e32_vi:
2624   case AMDGPU::V_SUBBREV_U32_e64_si:
2625   case AMDGPU::V_SUBBREV_U32_e64_vi:
2626   case AMDGPU::V_SUBREV_U32_e32:
2627   case AMDGPU::V_SUBREV_U32_e64:
2628   case AMDGPU::V_SUBREV_U32_e32_gfx9:
2629   case AMDGPU::V_SUBREV_U32_e32_vi:
2630   case AMDGPU::V_SUBREV_U32_e64_gfx9:
2631   case AMDGPU::V_SUBREV_U32_e64_vi:
2632   case AMDGPU::V_SUBREV_F16_e32:
2633   case AMDGPU::V_SUBREV_F16_e64:
2634   case AMDGPU::V_SUBREV_F16_e32_vi:
2635   case AMDGPU::V_SUBREV_F16_e64_vi:
2636   case AMDGPU::V_SUBREV_U16_e32:
2637   case AMDGPU::V_SUBREV_U16_e64:
2638   case AMDGPU::V_SUBREV_U16_e32_vi:
2639   case AMDGPU::V_SUBREV_U16_e64_vi:
2640   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
2641   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
2642   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
2643   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
2644   case AMDGPU::V_LSHLREV_B32_e32_si:
2645   case AMDGPU::V_LSHLREV_B32_e64_si:
2646   case AMDGPU::V_LSHLREV_B16_e32_vi:
2647   case AMDGPU::V_LSHLREV_B16_e64_vi:
2648   case AMDGPU::V_LSHLREV_B32_e32_vi:
2649   case AMDGPU::V_LSHLREV_B32_e64_vi:
2650   case AMDGPU::V_LSHLREV_B64_vi:
2651   case AMDGPU::V_LSHRREV_B32_e32_si:
2652   case AMDGPU::V_LSHRREV_B32_e64_si:
2653   case AMDGPU::V_LSHRREV_B16_e32_vi:
2654   case AMDGPU::V_LSHRREV_B16_e64_vi:
2655   case AMDGPU::V_LSHRREV_B32_e32_vi:
2656   case AMDGPU::V_LSHRREV_B32_e64_vi:
2657   case AMDGPU::V_LSHRREV_B64_vi:
2658   case AMDGPU::V_ASHRREV_I32_e64_si:
2659   case AMDGPU::V_ASHRREV_I32_e32_si:
2660   case AMDGPU::V_ASHRREV_I16_e32_vi:
2661   case AMDGPU::V_ASHRREV_I16_e64_vi:
2662   case AMDGPU::V_ASHRREV_I32_e32_vi:
2663   case AMDGPU::V_ASHRREV_I32_e64_vi:
2664   case AMDGPU::V_ASHRREV_I64_vi:
2665   case AMDGPU::V_PK_LSHLREV_B16_vi:
2666   case AMDGPU::V_PK_LSHRREV_B16_vi:
2667   case AMDGPU::V_PK_ASHRREV_I16_vi:
2668     return true;
2669   default:
2670     return false;
2671   }
2672 }
2673 
2674 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2675 
2676   using namespace SIInstrFlags;
2677   const unsigned Opcode = Inst.getOpcode();
2678   const MCInstrDesc &Desc = MII.get(Opcode);
2679 
2680   // lds_direct register is defined so that it can be used
2681   // with 9-bit operands only. Ignore encodings which do not accept these.
2682   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2683     return true;
2684 
2685   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2686   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2687   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2688 
2689   const int SrcIndices[] = { Src1Idx, Src2Idx };
2690 
2691   // lds_direct cannot be specified as either src1 or src2.
2692   for (int SrcIdx : SrcIndices) {
2693     if (SrcIdx == -1) break;
2694     const MCOperand &Src = Inst.getOperand(SrcIdx);
2695     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2696       return false;
2697     }
2698   }
2699 
2700   if (Src0Idx == -1)
2701     return true;
2702 
2703   const MCOperand &Src = Inst.getOperand(Src0Idx);
2704   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
2705     return true;
2706 
2707   // lds_direct is specified as src0. Check additional limitations.
2708   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
2709 }
2710 
2711 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
2712   unsigned Opcode = Inst.getOpcode();
2713   const MCInstrDesc &Desc = MII.get(Opcode);
2714   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
2715     return true;
2716 
2717   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2718   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2719 
2720   const int OpIndices[] = { Src0Idx, Src1Idx };
2721 
2722   unsigned NumLiterals = 0;
2723   uint32_t LiteralValue;
2724 
2725   for (int OpIdx : OpIndices) {
2726     if (OpIdx == -1) break;
2727 
2728     const MCOperand &MO = Inst.getOperand(OpIdx);
2729     if (MO.isImm() &&
2730         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
2731         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
2732         !isInlineConstant(Inst, OpIdx)) {
2733       uint32_t Value = static_cast<uint32_t>(MO.getImm());
2734       if (NumLiterals == 0 || LiteralValue != Value) {
2735         LiteralValue = Value;
2736         ++NumLiterals;
2737       }
2738     }
2739   }
2740 
2741   return NumLiterals <= 1;
2742 }
2743 
2744 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2745                                           const SMLoc &IDLoc) {
2746   if (!validateLdsDirect(Inst)) {
2747     Error(IDLoc,
2748       "invalid use of lds_direct");
2749     return false;
2750   }
2751   if (!validateSOPLiteral(Inst)) {
2752     Error(IDLoc,
2753       "only one literal operand is allowed");
2754     return false;
2755   }
2756   if (!validateConstantBusLimitations(Inst)) {
2757     Error(IDLoc,
2758       "invalid operand (violates constant bus restrictions)");
2759     return false;
2760   }
2761   if (!validateEarlyClobberLimitations(Inst)) {
2762     Error(IDLoc,
2763       "destination must be different than all sources");
2764     return false;
2765   }
2766   if (!validateIntClampSupported(Inst)) {
2767     Error(IDLoc,
2768       "integer clamping is not supported on this GPU");
2769     return false;
2770   }
2771   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
2772   if (!validateMIMGD16(Inst)) {
2773     Error(IDLoc,
2774       "d16 modifier is not supported on this GPU");
2775     return false;
2776   }
2777   if (!validateMIMGDataSize(Inst)) {
2778     Error(IDLoc,
2779       "image data size does not match dmask and tfe");
2780     return false;
2781   }
2782   if (!validateMIMGAtomicDMask(Inst)) {
2783     Error(IDLoc,
2784       "invalid atomic image dmask");
2785     return false;
2786   }
2787   if (!validateMIMGGatherDMask(Inst)) {
2788     Error(IDLoc,
2789       "invalid image_gather dmask: only one bit must be set");
2790     return false;
2791   }
2792 
2793   return true;
2794 }
2795 
2796 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
2797                                             const FeatureBitset &FBS,
2798                                             unsigned VariantID = 0);
2799 
2800 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2801                                               OperandVector &Operands,
2802                                               MCStreamer &Out,
2803                                               uint64_t &ErrorInfo,
2804                                               bool MatchingInlineAsm) {
2805   MCInst Inst;
2806   unsigned Result = Match_Success;
2807   for (auto Variant : getMatchedVariants()) {
2808     uint64_t EI;
2809     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2810                                   Variant);
2811     // We order match statuses from least to most specific. We use most specific
2812     // status as resulting
2813     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2814     if ((R == Match_Success) ||
2815         (R == Match_PreferE32) ||
2816         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2817         (R == Match_InvalidOperand && Result != Match_MissingFeature
2818                                    && Result != Match_PreferE32) ||
2819         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2820                                    && Result != Match_MissingFeature
2821                                    && Result != Match_PreferE32)) {
2822       Result = R;
2823       ErrorInfo = EI;
2824     }
2825     if (R == Match_Success)
2826       break;
2827   }
2828 
2829   switch (Result) {
2830   default: break;
2831   case Match_Success:
2832     if (!validateInstruction(Inst, IDLoc)) {
2833       return true;
2834     }
2835     Inst.setLoc(IDLoc);
2836     Out.EmitInstruction(Inst, getSTI());
2837     return false;
2838 
2839   case Match_MissingFeature:
2840     return Error(IDLoc, "instruction not supported on this GPU");
2841 
2842   case Match_MnemonicFail: {
2843     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
2844     std::string Suggestion = AMDGPUMnemonicSpellCheck(
2845         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
2846     return Error(IDLoc, "invalid instruction" + Suggestion,
2847                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
2848   }
2849 
2850   case Match_InvalidOperand: {
2851     SMLoc ErrorLoc = IDLoc;
2852     if (ErrorInfo != ~0ULL) {
2853       if (ErrorInfo >= Operands.size()) {
2854         return Error(IDLoc, "too few operands for instruction");
2855       }
2856       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2857       if (ErrorLoc == SMLoc())
2858         ErrorLoc = IDLoc;
2859     }
2860     return Error(ErrorLoc, "invalid operand for instruction");
2861   }
2862 
2863   case Match_PreferE32:
2864     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2865                         "should be encoded as e32");
2866   }
2867   llvm_unreachable("Implement any new match types added!");
2868 }
2869 
2870 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2871   int64_t Tmp = -1;
2872   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2873     return true;
2874   }
2875   if (getParser().parseAbsoluteExpression(Tmp)) {
2876     return true;
2877   }
2878   Ret = static_cast<uint32_t>(Tmp);
2879   return false;
2880 }
2881 
2882 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2883                                                uint32_t &Minor) {
2884   if (ParseAsAbsoluteExpression(Major))
2885     return TokError("invalid major version");
2886 
2887   if (getLexer().isNot(AsmToken::Comma))
2888     return TokError("minor version number required, comma expected");
2889   Lex();
2890 
2891   if (ParseAsAbsoluteExpression(Minor))
2892     return TokError("invalid minor version");
2893 
2894   return false;
2895 }
2896 
2897 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
2898   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2899     return TokError("directive only supported for amdgcn architecture");
2900 
2901   std::string Target;
2902 
2903   SMLoc TargetStart = getTok().getLoc();
2904   if (getParser().parseEscapedString(Target))
2905     return true;
2906   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
2907 
2908   std::string ExpectedTarget;
2909   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
2910   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
2911 
2912   if (Target != ExpectedTargetOS.str())
2913     return getParser().Error(TargetRange.Start, "target must match options",
2914                              TargetRange);
2915 
2916   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
2917   return false;
2918 }
2919 
2920 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
2921   return getParser().Error(Range.Start, "value out of range", Range);
2922 }
2923 
2924 bool AMDGPUAsmParser::calculateGPRBlocks(
2925     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
2926     bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
2927     unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
2928     unsigned &SGPRBlocks) {
2929   // TODO(scott.linder): These calculations are duplicated from
2930   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
2931   IsaVersion Version = getIsaVersion(getSTI().getCPU());
2932 
2933   unsigned NumVGPRs = NextFreeVGPR;
2934   unsigned NumSGPRs = NextFreeSGPR;
2935 
2936   if (Version.Major >= 10)
2937     NumSGPRs = 0;
2938   else {
2939     unsigned MaxAddressableNumSGPRs =
2940         IsaInfo::getAddressableNumSGPRs(&getSTI());
2941 
2942     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
2943         NumSGPRs > MaxAddressableNumSGPRs)
2944       return OutOfRangeError(SGPRRange);
2945 
2946     NumSGPRs +=
2947         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
2948 
2949     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
2950         NumSGPRs > MaxAddressableNumSGPRs)
2951       return OutOfRangeError(SGPRRange);
2952 
2953     if (Features.test(FeatureSGPRInitBug))
2954       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
2955   }
2956 
2957   VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
2958   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
2959 
2960   return false;
2961 }
2962 
2963 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
2964   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2965     return TokError("directive only supported for amdgcn architecture");
2966 
2967   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
2968     return TokError("directive only supported for amdhsa OS");
2969 
2970   StringRef KernelName;
2971   if (getParser().parseIdentifier(KernelName))
2972     return true;
2973 
2974   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
2975 
2976   StringSet<> Seen;
2977 
2978   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
2979 
2980   SMRange VGPRRange;
2981   uint64_t NextFreeVGPR = 0;
2982   SMRange SGPRRange;
2983   uint64_t NextFreeSGPR = 0;
2984   unsigned UserSGPRCount = 0;
2985   bool ReserveVCC = true;
2986   bool ReserveFlatScr = true;
2987   bool ReserveXNACK = hasXNACK();
2988 
2989   while (true) {
2990     while (getLexer().is(AsmToken::EndOfStatement))
2991       Lex();
2992 
2993     if (getLexer().isNot(AsmToken::Identifier))
2994       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
2995 
2996     StringRef ID = getTok().getIdentifier();
2997     SMRange IDRange = getTok().getLocRange();
2998     Lex();
2999 
3000     if (ID == ".end_amdhsa_kernel")
3001       break;
3002 
3003     if (Seen.find(ID) != Seen.end())
3004       return TokError(".amdhsa_ directives cannot be repeated");
3005     Seen.insert(ID);
3006 
3007     SMLoc ValStart = getTok().getLoc();
3008     int64_t IVal;
3009     if (getParser().parseAbsoluteExpression(IVal))
3010       return true;
3011     SMLoc ValEnd = getTok().getLoc();
3012     SMRange ValRange = SMRange(ValStart, ValEnd);
3013 
3014     if (IVal < 0)
3015       return OutOfRangeError(ValRange);
3016 
3017     uint64_t Val = IVal;
3018 
3019 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3020   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3021     return OutOfRangeError(RANGE);                                             \
3022   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3023 
3024     if (ID == ".amdhsa_group_segment_fixed_size") {
3025       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3026         return OutOfRangeError(ValRange);
3027       KD.group_segment_fixed_size = Val;
3028     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3029       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3030         return OutOfRangeError(ValRange);
3031       KD.private_segment_fixed_size = Val;
3032     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3033       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3034                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3035                        Val, ValRange);
3036       UserSGPRCount += 4;
3037     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3038       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3039                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3040                        ValRange);
3041       UserSGPRCount += 2;
3042     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3043       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3044                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3045                        ValRange);
3046       UserSGPRCount += 2;
3047     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3048       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3049                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3050                        Val, ValRange);
3051       UserSGPRCount += 2;
3052     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3053       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3054                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3055                        ValRange);
3056       UserSGPRCount += 2;
3057     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3058       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3059                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3060                        ValRange);
3061       UserSGPRCount += 2;
3062     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3063       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3064                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3065                        Val, ValRange);
3066       UserSGPRCount += 1;
3067     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3068       PARSE_BITS_ENTRY(
3069           KD.compute_pgm_rsrc2,
3070           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3071           ValRange);
3072     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3073       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3074                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3075                        ValRange);
3076     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3077       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3078                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3079                        ValRange);
3080     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3081       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3082                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3083                        ValRange);
3084     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3085       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3086                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3087                        ValRange);
3088     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3089       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3090                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3091                        ValRange);
3092     } else if (ID == ".amdhsa_next_free_vgpr") {
3093       VGPRRange = ValRange;
3094       NextFreeVGPR = Val;
3095     } else if (ID == ".amdhsa_next_free_sgpr") {
3096       SGPRRange = ValRange;
3097       NextFreeSGPR = Val;
3098     } else if (ID == ".amdhsa_reserve_vcc") {
3099       if (!isUInt<1>(Val))
3100         return OutOfRangeError(ValRange);
3101       ReserveVCC = Val;
3102     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3103       if (IVersion.Major < 7)
3104         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3105                                  IDRange);
3106       if (!isUInt<1>(Val))
3107         return OutOfRangeError(ValRange);
3108       ReserveFlatScr = Val;
3109     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3110       if (IVersion.Major < 8)
3111         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3112                                  IDRange);
3113       if (!isUInt<1>(Val))
3114         return OutOfRangeError(ValRange);
3115       ReserveXNACK = Val;
3116     } else if (ID == ".amdhsa_float_round_mode_32") {
3117       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3118                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3119     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3120       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3121                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3122     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3123       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3124                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3125     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3126       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3127                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3128                        ValRange);
3129     } else if (ID == ".amdhsa_dx10_clamp") {
3130       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3131                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3132     } else if (ID == ".amdhsa_ieee_mode") {
3133       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3134                        Val, ValRange);
3135     } else if (ID == ".amdhsa_fp16_overflow") {
3136       if (IVersion.Major < 9)
3137         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3138                                  IDRange);
3139       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3140                        ValRange);
3141     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3142       PARSE_BITS_ENTRY(
3143           KD.compute_pgm_rsrc2,
3144           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3145           ValRange);
3146     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3147       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3148                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3149                        Val, ValRange);
3150     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3151       PARSE_BITS_ENTRY(
3152           KD.compute_pgm_rsrc2,
3153           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3154           ValRange);
3155     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3156       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3157                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3158                        Val, ValRange);
3159     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3160       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3161                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3162                        Val, ValRange);
3163     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3164       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3165                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3166                        Val, ValRange);
3167     } else if (ID == ".amdhsa_exception_int_div_zero") {
3168       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3169                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3170                        Val, ValRange);
3171     } else {
3172       return getParser().Error(IDRange.Start,
3173                                "unknown .amdhsa_kernel directive", IDRange);
3174     }
3175 
3176 #undef PARSE_BITS_ENTRY
3177   }
3178 
3179   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3180     return TokError(".amdhsa_next_free_vgpr directive is required");
3181 
3182   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3183     return TokError(".amdhsa_next_free_sgpr directive is required");
3184 
3185   unsigned VGPRBlocks;
3186   unsigned SGPRBlocks;
3187   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3188                          ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
3189                          SGPRRange, VGPRBlocks, SGPRBlocks))
3190     return true;
3191 
3192   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3193           VGPRBlocks))
3194     return OutOfRangeError(VGPRRange);
3195   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3196                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3197 
3198   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3199           SGPRBlocks))
3200     return OutOfRangeError(SGPRRange);
3201   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3202                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3203                   SGPRBlocks);
3204 
3205   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3206     return TokError("too many user SGPRs enabled");
3207   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3208                   UserSGPRCount);
3209 
3210   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3211       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3212       ReserveFlatScr, ReserveXNACK);
3213   return false;
3214 }
3215 
3216 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3217   uint32_t Major;
3218   uint32_t Minor;
3219 
3220   if (ParseDirectiveMajorMinor(Major, Minor))
3221     return true;
3222 
3223   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3224   return false;
3225 }
3226 
3227 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3228   uint32_t Major;
3229   uint32_t Minor;
3230   uint32_t Stepping;
3231   StringRef VendorName;
3232   StringRef ArchName;
3233 
3234   // If this directive has no arguments, then use the ISA version for the
3235   // targeted GPU.
3236   if (getLexer().is(AsmToken::EndOfStatement)) {
3237     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3238     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3239                                                       ISA.Stepping,
3240                                                       "AMD", "AMDGPU");
3241     return false;
3242   }
3243 
3244   if (ParseDirectiveMajorMinor(Major, Minor))
3245     return true;
3246 
3247   if (getLexer().isNot(AsmToken::Comma))
3248     return TokError("stepping version number required, comma expected");
3249   Lex();
3250 
3251   if (ParseAsAbsoluteExpression(Stepping))
3252     return TokError("invalid stepping version");
3253 
3254   if (getLexer().isNot(AsmToken::Comma))
3255     return TokError("vendor name required, comma expected");
3256   Lex();
3257 
3258   if (getLexer().isNot(AsmToken::String))
3259     return TokError("invalid vendor name");
3260 
3261   VendorName = getLexer().getTok().getStringContents();
3262   Lex();
3263 
3264   if (getLexer().isNot(AsmToken::Comma))
3265     return TokError("arch name required, comma expected");
3266   Lex();
3267 
3268   if (getLexer().isNot(AsmToken::String))
3269     return TokError("invalid arch name");
3270 
3271   ArchName = getLexer().getTok().getStringContents();
3272   Lex();
3273 
3274   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3275                                                     VendorName, ArchName);
3276   return false;
3277 }
3278 
3279 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3280                                                amd_kernel_code_t &Header) {
3281   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3282   // assembly for backwards compatibility.
3283   if (ID == "max_scratch_backing_memory_byte_size") {
3284     Parser.eatToEndOfStatement();
3285     return false;
3286   }
3287 
3288   SmallString<40> ErrStr;
3289   raw_svector_ostream Err(ErrStr);
3290   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3291     return TokError(Err.str());
3292   }
3293   Lex();
3294   return false;
3295 }
3296 
3297 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3298   amd_kernel_code_t Header;
3299   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3300 
3301   while (true) {
3302     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3303     // will set the current token to EndOfStatement.
3304     while(getLexer().is(AsmToken::EndOfStatement))
3305       Lex();
3306 
3307     if (getLexer().isNot(AsmToken::Identifier))
3308       return TokError("expected value identifier or .end_amd_kernel_code_t");
3309 
3310     StringRef ID = getLexer().getTok().getIdentifier();
3311     Lex();
3312 
3313     if (ID == ".end_amd_kernel_code_t")
3314       break;
3315 
3316     if (ParseAMDKernelCodeTValue(ID, Header))
3317       return true;
3318   }
3319 
3320   getTargetStreamer().EmitAMDKernelCodeT(Header);
3321 
3322   return false;
3323 }
3324 
3325 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3326   if (getLexer().isNot(AsmToken::Identifier))
3327     return TokError("expected symbol name");
3328 
3329   StringRef KernelName = Parser.getTok().getString();
3330 
3331   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3332                                            ELF::STT_AMDGPU_HSA_KERNEL);
3333   Lex();
3334   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3335     KernelScope.initialize(getContext());
3336   return false;
3337 }
3338 
3339 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3340   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3341     return Error(getParser().getTok().getLoc(),
3342                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3343                  "architectures");
3344   }
3345 
3346   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3347 
3348   std::string ISAVersionStringFromSTI;
3349   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3350   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3351 
3352   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3353     return Error(getParser().getTok().getLoc(),
3354                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3355                  "arguments specified through the command line");
3356   }
3357 
3358   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3359   Lex();
3360 
3361   return false;
3362 }
3363 
3364 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3365   const char *AssemblerDirectiveBegin;
3366   const char *AssemblerDirectiveEnd;
3367   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3368       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3369           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3370                             HSAMD::V3::AssemblerDirectiveEnd)
3371           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3372                             HSAMD::AssemblerDirectiveEnd);
3373 
3374   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3375     return Error(getParser().getTok().getLoc(),
3376                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3377                  "not available on non-amdhsa OSes")).str());
3378   }
3379 
3380   std::string HSAMetadataString;
3381   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
3382                           HSAMetadataString))
3383     return true;
3384 
3385   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3386     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3387       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3388   } else {
3389     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3390       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3391   }
3392 
3393   return false;
3394 }
3395 
3396 /// Common code to parse out a block of text (typically YAML) between start and
3397 /// end directives.
3398 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
3399                                           const char *AssemblerDirectiveEnd,
3400                                           std::string &CollectString) {
3401 
3402   raw_string_ostream CollectStream(CollectString);
3403 
3404   getLexer().setSkipSpace(false);
3405 
3406   bool FoundEnd = false;
3407   while (!getLexer().is(AsmToken::Eof)) {
3408     while (getLexer().is(AsmToken::Space)) {
3409       CollectStream << getLexer().getTok().getString();
3410       Lex();
3411     }
3412 
3413     if (getLexer().is(AsmToken::Identifier)) {
3414       StringRef ID = getLexer().getTok().getIdentifier();
3415       if (ID == AssemblerDirectiveEnd) {
3416         Lex();
3417         FoundEnd = true;
3418         break;
3419       }
3420     }
3421 
3422     CollectStream << Parser.parseStringToEndOfStatement()
3423                   << getContext().getAsmInfo()->getSeparatorString();
3424 
3425     Parser.eatToEndOfStatement();
3426   }
3427 
3428   getLexer().setSkipSpace(true);
3429 
3430   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3431     return TokError(Twine("expected directive ") +
3432                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
3433   }
3434 
3435   CollectStream.flush();
3436   return false;
3437 }
3438 
3439 /// Parse the assembler directive for new MsgPack-format PAL metadata.
3440 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
3441   std::string String;
3442   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
3443                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
3444     return true;
3445 
3446   auto PALMetadata = getTargetStreamer().getPALMetadata();
3447   if (!PALMetadata->setFromString(String))
3448     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
3449   return false;
3450 }
3451 
3452 /// Parse the assembler directive for old linear-format PAL metadata.
3453 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3454   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3455     return Error(getParser().getTok().getLoc(),
3456                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3457                  "not available on non-amdpal OSes")).str());
3458   }
3459 
3460   auto PALMetadata = getTargetStreamer().getPALMetadata();
3461   PALMetadata->setLegacy();
3462   for (;;) {
3463     uint32_t Key, Value;
3464     if (ParseAsAbsoluteExpression(Key)) {
3465       return TokError(Twine("invalid value in ") +
3466                       Twine(PALMD::AssemblerDirective));
3467     }
3468     if (getLexer().isNot(AsmToken::Comma)) {
3469       return TokError(Twine("expected an even number of values in ") +
3470                       Twine(PALMD::AssemblerDirective));
3471     }
3472     Lex();
3473     if (ParseAsAbsoluteExpression(Value)) {
3474       return TokError(Twine("invalid value in ") +
3475                       Twine(PALMD::AssemblerDirective));
3476     }
3477     PALMetadata->setRegister(Key, Value);
3478     if (getLexer().isNot(AsmToken::Comma))
3479       break;
3480     Lex();
3481   }
3482   return false;
3483 }
3484 
3485 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3486   StringRef IDVal = DirectiveID.getString();
3487 
3488   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3489     if (IDVal == ".amdgcn_target")
3490       return ParseDirectiveAMDGCNTarget();
3491 
3492     if (IDVal == ".amdhsa_kernel")
3493       return ParseDirectiveAMDHSAKernel();
3494 
3495     // TODO: Restructure/combine with PAL metadata directive.
3496     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3497       return ParseDirectiveHSAMetadata();
3498   } else {
3499     if (IDVal == ".hsa_code_object_version")
3500       return ParseDirectiveHSACodeObjectVersion();
3501 
3502     if (IDVal == ".hsa_code_object_isa")
3503       return ParseDirectiveHSACodeObjectISA();
3504 
3505     if (IDVal == ".amd_kernel_code_t")
3506       return ParseDirectiveAMDKernelCodeT();
3507 
3508     if (IDVal == ".amdgpu_hsa_kernel")
3509       return ParseDirectiveAMDGPUHsaKernel();
3510 
3511     if (IDVal == ".amd_amdgpu_isa")
3512       return ParseDirectiveISAVersion();
3513 
3514     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3515       return ParseDirectiveHSAMetadata();
3516   }
3517 
3518   if (IDVal == PALMD::AssemblerDirectiveBegin)
3519     return ParseDirectivePALMetadataBegin();
3520 
3521   if (IDVal == PALMD::AssemblerDirective)
3522     return ParseDirectivePALMetadata();
3523 
3524   return true;
3525 }
3526 
3527 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3528                                            unsigned RegNo) const {
3529 
3530   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3531        R.isValid(); ++R) {
3532     if (*R == RegNo)
3533       return isGFX9() || isGFX10();
3534   }
3535 
3536   // GFX10 has 2 more SGPRs 104 and 105.
3537   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
3538        R.isValid(); ++R) {
3539     if (*R == RegNo)
3540       return hasSGPR104_SGPR105();
3541   }
3542 
3543   switch (RegNo) {
3544   case AMDGPU::TBA:
3545   case AMDGPU::TBA_LO:
3546   case AMDGPU::TBA_HI:
3547   case AMDGPU::TMA:
3548   case AMDGPU::TMA_LO:
3549   case AMDGPU::TMA_HI:
3550     return !isGFX9() && !isGFX10();
3551   case AMDGPU::XNACK_MASK:
3552   case AMDGPU::XNACK_MASK_LO:
3553   case AMDGPU::XNACK_MASK_HI:
3554     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
3555   case AMDGPU::SGPR_NULL:
3556     return isGFX10();
3557   default:
3558     break;
3559   }
3560 
3561   if (isInlineValue(RegNo))
3562     return !isCI() && !isSI() && !isVI();
3563 
3564   if (isCI())
3565     return true;
3566 
3567   if (isSI() || isGFX10()) {
3568     // No flat_scr on SI.
3569     // On GFX10 flat scratch is not a valid register operand and can only be
3570     // accessed with s_setreg/s_getreg.
3571     switch (RegNo) {
3572     case AMDGPU::FLAT_SCR:
3573     case AMDGPU::FLAT_SCR_LO:
3574     case AMDGPU::FLAT_SCR_HI:
3575       return false;
3576     default:
3577       return true;
3578     }
3579   }
3580 
3581   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3582   // SI/CI have.
3583   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3584        R.isValid(); ++R) {
3585     if (*R == RegNo)
3586       return hasSGPR102_SGPR103();
3587   }
3588 
3589   return true;
3590 }
3591 
3592 OperandMatchResultTy
3593 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
3594   // Try to parse with a custom parser
3595   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3596 
3597   // If we successfully parsed the operand or if there as an error parsing,
3598   // we are done.
3599   //
3600   // If we are parsing after we reach EndOfStatement then this means we
3601   // are appending default values to the Operands list.  This is only done
3602   // by custom parser, so we shouldn't continue on to the generic parsing.
3603   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3604       getLexer().is(AsmToken::EndOfStatement))
3605     return ResTy;
3606 
3607   ResTy = parseRegOrImm(Operands);
3608 
3609   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
3610     return ResTy;
3611 
3612   const auto &Tok = Parser.getTok();
3613   SMLoc S = Tok.getLoc();
3614 
3615   const MCExpr *Expr = nullptr;
3616   if (!Parser.parseExpression(Expr)) {
3617     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3618     return MatchOperand_Success;
3619   }
3620 
3621   // Possibly this is an instruction flag like 'gds'.
3622   if (Tok.getKind() == AsmToken::Identifier) {
3623     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
3624     Parser.Lex();
3625     return MatchOperand_Success;
3626   }
3627 
3628   return MatchOperand_NoMatch;
3629 }
3630 
3631 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3632   // Clear any forced encodings from the previous instruction.
3633   setForcedEncodingSize(0);
3634   setForcedDPP(false);
3635   setForcedSDWA(false);
3636 
3637   if (Name.endswith("_e64")) {
3638     setForcedEncodingSize(64);
3639     return Name.substr(0, Name.size() - 4);
3640   } else if (Name.endswith("_e32")) {
3641     setForcedEncodingSize(32);
3642     return Name.substr(0, Name.size() - 4);
3643   } else if (Name.endswith("_dpp")) {
3644     setForcedDPP(true);
3645     return Name.substr(0, Name.size() - 4);
3646   } else if (Name.endswith("_sdwa")) {
3647     setForcedSDWA(true);
3648     return Name.substr(0, Name.size() - 5);
3649   }
3650   return Name;
3651 }
3652 
3653 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
3654                                        StringRef Name,
3655                                        SMLoc NameLoc, OperandVector &Operands) {
3656   // Add the instruction mnemonic
3657   Name = parseMnemonicSuffix(Name);
3658   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
3659 
3660   while (!getLexer().is(AsmToken::EndOfStatement)) {
3661     OperandMatchResultTy Res = parseOperand(Operands, Name);
3662 
3663     // Eat the comma or space if there is one.
3664     if (getLexer().is(AsmToken::Comma))
3665       Parser.Lex();
3666 
3667     switch (Res) {
3668       case MatchOperand_Success: break;
3669       case MatchOperand_ParseFail:
3670         Error(getLexer().getLoc(), "failed parsing operand.");
3671         while (!getLexer().is(AsmToken::EndOfStatement)) {
3672           Parser.Lex();
3673         }
3674         return true;
3675       case MatchOperand_NoMatch:
3676         Error(getLexer().getLoc(), "not a valid operand.");
3677         while (!getLexer().is(AsmToken::EndOfStatement)) {
3678           Parser.Lex();
3679         }
3680         return true;
3681     }
3682   }
3683 
3684   return false;
3685 }
3686 
3687 //===----------------------------------------------------------------------===//
3688 // Utility functions
3689 //===----------------------------------------------------------------------===//
3690 
3691 OperandMatchResultTy
3692 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
3693   switch(getLexer().getKind()) {
3694     default: return MatchOperand_NoMatch;
3695     case AsmToken::Identifier: {
3696       StringRef Name = Parser.getTok().getString();
3697       if (!Name.equals(Prefix)) {
3698         return MatchOperand_NoMatch;
3699       }
3700 
3701       Parser.Lex();
3702       if (getLexer().isNot(AsmToken::Colon))
3703         return MatchOperand_ParseFail;
3704 
3705       Parser.Lex();
3706 
3707       bool IsMinus = false;
3708       if (getLexer().getKind() == AsmToken::Minus) {
3709         Parser.Lex();
3710         IsMinus = true;
3711       }
3712 
3713       if (getLexer().isNot(AsmToken::Integer))
3714         return MatchOperand_ParseFail;
3715 
3716       if (getParser().parseAbsoluteExpression(Int))
3717         return MatchOperand_ParseFail;
3718 
3719       if (IsMinus)
3720         Int = -Int;
3721       break;
3722     }
3723   }
3724   return MatchOperand_Success;
3725 }
3726 
3727 OperandMatchResultTy
3728 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
3729                                     AMDGPUOperand::ImmTy ImmTy,
3730                                     bool (*ConvertResult)(int64_t&)) {
3731   SMLoc S = Parser.getTok().getLoc();
3732   int64_t Value = 0;
3733 
3734   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
3735   if (Res != MatchOperand_Success)
3736     return Res;
3737 
3738   if (ConvertResult && !ConvertResult(Value)) {
3739     return MatchOperand_ParseFail;
3740   }
3741 
3742   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
3743   return MatchOperand_Success;
3744 }
3745 
3746 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
3747   const char *Prefix,
3748   OperandVector &Operands,
3749   AMDGPUOperand::ImmTy ImmTy,
3750   bool (*ConvertResult)(int64_t&)) {
3751   StringRef Name = Parser.getTok().getString();
3752   if (!Name.equals(Prefix))
3753     return MatchOperand_NoMatch;
3754 
3755   Parser.Lex();
3756   if (getLexer().isNot(AsmToken::Colon))
3757     return MatchOperand_ParseFail;
3758 
3759   Parser.Lex();
3760   if (getLexer().isNot(AsmToken::LBrac))
3761     return MatchOperand_ParseFail;
3762   Parser.Lex();
3763 
3764   unsigned Val = 0;
3765   SMLoc S = Parser.getTok().getLoc();
3766 
3767   // FIXME: How to verify the number of elements matches the number of src
3768   // operands?
3769   for (int I = 0; I < 4; ++I) {
3770     if (I != 0) {
3771       if (getLexer().is(AsmToken::RBrac))
3772         break;
3773 
3774       if (getLexer().isNot(AsmToken::Comma))
3775         return MatchOperand_ParseFail;
3776       Parser.Lex();
3777     }
3778 
3779     if (getLexer().isNot(AsmToken::Integer))
3780       return MatchOperand_ParseFail;
3781 
3782     int64_t Op;
3783     if (getParser().parseAbsoluteExpression(Op))
3784       return MatchOperand_ParseFail;
3785 
3786     if (Op != 0 && Op != 1)
3787       return MatchOperand_ParseFail;
3788     Val |= (Op << I);
3789   }
3790 
3791   Parser.Lex();
3792   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
3793   return MatchOperand_Success;
3794 }
3795 
3796 OperandMatchResultTy
3797 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
3798                                AMDGPUOperand::ImmTy ImmTy) {
3799   int64_t Bit = 0;
3800   SMLoc S = Parser.getTok().getLoc();
3801 
3802   // We are at the end of the statement, and this is a default argument, so
3803   // use a default value.
3804   if (getLexer().isNot(AsmToken::EndOfStatement)) {
3805     switch(getLexer().getKind()) {
3806       case AsmToken::Identifier: {
3807         StringRef Tok = Parser.getTok().getString();
3808         if (Tok == Name) {
3809           if (Tok == "r128" && isGFX9())
3810             Error(S, "r128 modifier is not supported on this GPU");
3811           if (Tok == "a16" && !isGFX9())
3812             Error(S, "a16 modifier is not supported on this GPU");
3813           Bit = 1;
3814           Parser.Lex();
3815         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
3816           Bit = 0;
3817           Parser.Lex();
3818         } else {
3819           return MatchOperand_NoMatch;
3820         }
3821         break;
3822       }
3823       default:
3824         return MatchOperand_NoMatch;
3825     }
3826   }
3827 
3828   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
3829   return MatchOperand_Success;
3830 }
3831 
3832 static void addOptionalImmOperand(
3833   MCInst& Inst, const OperandVector& Operands,
3834   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
3835   AMDGPUOperand::ImmTy ImmT,
3836   int64_t Default = 0) {
3837   auto i = OptionalIdx.find(ImmT);
3838   if (i != OptionalIdx.end()) {
3839     unsigned Idx = i->second;
3840     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
3841   } else {
3842     Inst.addOperand(MCOperand::createImm(Default));
3843   }
3844 }
3845 
3846 OperandMatchResultTy
3847 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
3848   if (getLexer().isNot(AsmToken::Identifier)) {
3849     return MatchOperand_NoMatch;
3850   }
3851   StringRef Tok = Parser.getTok().getString();
3852   if (Tok != Prefix) {
3853     return MatchOperand_NoMatch;
3854   }
3855 
3856   Parser.Lex();
3857   if (getLexer().isNot(AsmToken::Colon)) {
3858     return MatchOperand_ParseFail;
3859   }
3860 
3861   Parser.Lex();
3862   if (getLexer().isNot(AsmToken::Identifier)) {
3863     return MatchOperand_ParseFail;
3864   }
3865 
3866   Value = Parser.getTok().getString();
3867   return MatchOperand_Success;
3868 }
3869 
3870 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
3871 // values to live in a joint format operand in the MCInst encoding.
3872 OperandMatchResultTy
3873 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
3874   SMLoc S = Parser.getTok().getLoc();
3875   int64_t Dfmt = 0, Nfmt = 0;
3876   // dfmt and nfmt can appear in either order, and each is optional.
3877   bool GotDfmt = false, GotNfmt = false;
3878   while (!GotDfmt || !GotNfmt) {
3879     if (!GotDfmt) {
3880       auto Res = parseIntWithPrefix("dfmt", Dfmt);
3881       if (Res != MatchOperand_NoMatch) {
3882         if (Res != MatchOperand_Success)
3883           return Res;
3884         if (Dfmt >= 16) {
3885           Error(Parser.getTok().getLoc(), "out of range dfmt");
3886           return MatchOperand_ParseFail;
3887         }
3888         GotDfmt = true;
3889         Parser.Lex();
3890         continue;
3891       }
3892     }
3893     if (!GotNfmt) {
3894       auto Res = parseIntWithPrefix("nfmt", Nfmt);
3895       if (Res != MatchOperand_NoMatch) {
3896         if (Res != MatchOperand_Success)
3897           return Res;
3898         if (Nfmt >= 8) {
3899           Error(Parser.getTok().getLoc(), "out of range nfmt");
3900           return MatchOperand_ParseFail;
3901         }
3902         GotNfmt = true;
3903         Parser.Lex();
3904         continue;
3905       }
3906     }
3907     break;
3908   }
3909   if (!GotDfmt && !GotNfmt)
3910     return MatchOperand_NoMatch;
3911   auto Format = Dfmt | Nfmt << 4;
3912   Operands.push_back(
3913       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
3914   return MatchOperand_Success;
3915 }
3916 
3917 //===----------------------------------------------------------------------===//
3918 // ds
3919 //===----------------------------------------------------------------------===//
3920 
3921 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
3922                                     const OperandVector &Operands) {
3923   OptionalImmIndexMap OptionalIdx;
3924 
3925   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3926     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3927 
3928     // Add the register arguments
3929     if (Op.isReg()) {
3930       Op.addRegOperands(Inst, 1);
3931       continue;
3932     }
3933 
3934     // Handle optional arguments
3935     OptionalIdx[Op.getImmTy()] = i;
3936   }
3937 
3938   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
3939   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
3940   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3941 
3942   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3943 }
3944 
3945 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
3946                                 bool IsGdsHardcoded) {
3947   OptionalImmIndexMap OptionalIdx;
3948 
3949   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3950     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3951 
3952     // Add the register arguments
3953     if (Op.isReg()) {
3954       Op.addRegOperands(Inst, 1);
3955       continue;
3956     }
3957 
3958     if (Op.isToken() && Op.getToken() == "gds") {
3959       IsGdsHardcoded = true;
3960       continue;
3961     }
3962 
3963     // Handle optional arguments
3964     OptionalIdx[Op.getImmTy()] = i;
3965   }
3966 
3967   AMDGPUOperand::ImmTy OffsetType =
3968     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
3969      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
3970                                                       AMDGPUOperand::ImmTyOffset;
3971 
3972   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
3973 
3974   if (!IsGdsHardcoded) {
3975     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3976   }
3977   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3978 }
3979 
3980 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
3981   OptionalImmIndexMap OptionalIdx;
3982 
3983   unsigned OperandIdx[4];
3984   unsigned EnMask = 0;
3985   int SrcIdx = 0;
3986 
3987   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3988     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3989 
3990     // Add the register arguments
3991     if (Op.isReg()) {
3992       assert(SrcIdx < 4);
3993       OperandIdx[SrcIdx] = Inst.size();
3994       Op.addRegOperands(Inst, 1);
3995       ++SrcIdx;
3996       continue;
3997     }
3998 
3999     if (Op.isOff()) {
4000       assert(SrcIdx < 4);
4001       OperandIdx[SrcIdx] = Inst.size();
4002       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4003       ++SrcIdx;
4004       continue;
4005     }
4006 
4007     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4008       Op.addImmOperands(Inst, 1);
4009       continue;
4010     }
4011 
4012     if (Op.isToken() && Op.getToken() == "done")
4013       continue;
4014 
4015     // Handle optional arguments
4016     OptionalIdx[Op.getImmTy()] = i;
4017   }
4018 
4019   assert(SrcIdx == 4);
4020 
4021   bool Compr = false;
4022   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4023     Compr = true;
4024     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4025     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4026     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4027   }
4028 
4029   for (auto i = 0; i < SrcIdx; ++i) {
4030     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4031       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4032     }
4033   }
4034 
4035   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4036   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4037 
4038   Inst.addOperand(MCOperand::createImm(EnMask));
4039 }
4040 
4041 //===----------------------------------------------------------------------===//
4042 // s_waitcnt
4043 //===----------------------------------------------------------------------===//
4044 
4045 static bool
4046 encodeCnt(
4047   const AMDGPU::IsaVersion ISA,
4048   int64_t &IntVal,
4049   int64_t CntVal,
4050   bool Saturate,
4051   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4052   unsigned (*decode)(const IsaVersion &Version, unsigned))
4053 {
4054   bool Failed = false;
4055 
4056   IntVal = encode(ISA, IntVal, CntVal);
4057   if (CntVal != decode(ISA, IntVal)) {
4058     if (Saturate) {
4059       IntVal = encode(ISA, IntVal, -1);
4060     } else {
4061       Failed = true;
4062     }
4063   }
4064   return Failed;
4065 }
4066 
4067 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4068   StringRef CntName = Parser.getTok().getString();
4069   int64_t CntVal;
4070 
4071   Parser.Lex();
4072   if (getLexer().isNot(AsmToken::LParen))
4073     return true;
4074 
4075   Parser.Lex();
4076   if (getLexer().isNot(AsmToken::Integer))
4077     return true;
4078 
4079   SMLoc ValLoc = Parser.getTok().getLoc();
4080   if (getParser().parseAbsoluteExpression(CntVal))
4081     return true;
4082 
4083   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4084 
4085   bool Failed = true;
4086   bool Sat = CntName.endswith("_sat");
4087 
4088   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4089     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4090   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4091     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4092   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4093     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4094   }
4095 
4096   if (Failed) {
4097     Error(ValLoc, "too large value for " + CntName);
4098     return true;
4099   }
4100 
4101   if (getLexer().isNot(AsmToken::RParen)) {
4102     return true;
4103   }
4104 
4105   Parser.Lex();
4106   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
4107     const AsmToken NextToken = getLexer().peekTok();
4108     if (NextToken.is(AsmToken::Identifier)) {
4109       Parser.Lex();
4110     }
4111   }
4112 
4113   return false;
4114 }
4115 
4116 OperandMatchResultTy
4117 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4118   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4119   int64_t Waitcnt = getWaitcntBitMask(ISA);
4120   SMLoc S = Parser.getTok().getLoc();
4121 
4122   switch(getLexer().getKind()) {
4123     default: return MatchOperand_ParseFail;
4124     case AsmToken::Integer:
4125       // The operand can be an integer value.
4126       if (getParser().parseAbsoluteExpression(Waitcnt))
4127         return MatchOperand_ParseFail;
4128       break;
4129 
4130     case AsmToken::Identifier:
4131       do {
4132         if (parseCnt(Waitcnt))
4133           return MatchOperand_ParseFail;
4134       } while(getLexer().isNot(AsmToken::EndOfStatement));
4135       break;
4136   }
4137   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4138   return MatchOperand_Success;
4139 }
4140 
4141 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
4142                                           int64_t &Width) {
4143   using namespace llvm::AMDGPU::Hwreg;
4144 
4145   if (Parser.getTok().getString() != "hwreg")
4146     return true;
4147   Parser.Lex();
4148 
4149   if (getLexer().isNot(AsmToken::LParen))
4150     return true;
4151   Parser.Lex();
4152 
4153   if (getLexer().is(AsmToken::Identifier)) {
4154     HwReg.IsSymbolic = true;
4155     HwReg.Id = ID_UNKNOWN_;
4156     const StringRef tok = Parser.getTok().getString();
4157     int Last = ID_SYMBOLIC_LAST_;
4158     if (isSI() || isCI() || isVI())
4159       Last = ID_SYMBOLIC_FIRST_GFX9_;
4160     else if (isGFX9())
4161       Last = ID_SYMBOLIC_FIRST_GFX10_;
4162     for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
4163       if (tok == IdSymbolic[i]) {
4164         HwReg.Id = i;
4165         break;
4166       }
4167     }
4168     Parser.Lex();
4169   } else {
4170     HwReg.IsSymbolic = false;
4171     if (getLexer().isNot(AsmToken::Integer))
4172       return true;
4173     if (getParser().parseAbsoluteExpression(HwReg.Id))
4174       return true;
4175   }
4176 
4177   if (getLexer().is(AsmToken::RParen)) {
4178     Parser.Lex();
4179     return false;
4180   }
4181 
4182   // optional params
4183   if (getLexer().isNot(AsmToken::Comma))
4184     return true;
4185   Parser.Lex();
4186 
4187   if (getLexer().isNot(AsmToken::Integer))
4188     return true;
4189   if (getParser().parseAbsoluteExpression(Offset))
4190     return true;
4191 
4192   if (getLexer().isNot(AsmToken::Comma))
4193     return true;
4194   Parser.Lex();
4195 
4196   if (getLexer().isNot(AsmToken::Integer))
4197     return true;
4198   if (getParser().parseAbsoluteExpression(Width))
4199     return true;
4200 
4201   if (getLexer().isNot(AsmToken::RParen))
4202     return true;
4203   Parser.Lex();
4204 
4205   return false;
4206 }
4207 
4208 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4209   using namespace llvm::AMDGPU::Hwreg;
4210 
4211   int64_t Imm16Val = 0;
4212   SMLoc S = Parser.getTok().getLoc();
4213 
4214   switch(getLexer().getKind()) {
4215     default: return MatchOperand_NoMatch;
4216     case AsmToken::Integer:
4217       // The operand can be an integer value.
4218       if (getParser().parseAbsoluteExpression(Imm16Val))
4219         return MatchOperand_NoMatch;
4220       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4221         Error(S, "invalid immediate: only 16-bit values are legal");
4222         // Do not return error code, but create an imm operand anyway and proceed
4223         // to the next operand, if any. That avoids unneccessary error messages.
4224       }
4225       break;
4226 
4227     case AsmToken::Identifier: {
4228         OperandInfoTy HwReg(ID_UNKNOWN_);
4229         int64_t Offset = OFFSET_DEFAULT_;
4230         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
4231         if (parseHwregConstruct(HwReg, Offset, Width))
4232           return MatchOperand_ParseFail;
4233         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
4234           if (HwReg.IsSymbolic)
4235             Error(S, "invalid symbolic name of hardware register");
4236           else
4237             Error(S, "invalid code of hardware register: only 6-bit values are legal");
4238         }
4239         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
4240           Error(S, "invalid bit offset: only 5-bit values are legal");
4241         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
4242           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
4243         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
4244       }
4245       break;
4246   }
4247   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
4248   return MatchOperand_Success;
4249 }
4250 
4251 bool AMDGPUOperand::isSWaitCnt() const {
4252   return isImm();
4253 }
4254 
4255 bool AMDGPUOperand::isHwreg() const {
4256   return isImmTy(ImmTyHwreg);
4257 }
4258 
4259 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
4260   using namespace llvm::AMDGPU::SendMsg;
4261 
4262   if (Parser.getTok().getString() != "sendmsg")
4263     return true;
4264   Parser.Lex();
4265 
4266   if (getLexer().isNot(AsmToken::LParen))
4267     return true;
4268   Parser.Lex();
4269 
4270   if (getLexer().is(AsmToken::Identifier)) {
4271     Msg.IsSymbolic = true;
4272     Msg.Id = ID_UNKNOWN_;
4273     const std::string tok = Parser.getTok().getString();
4274     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
4275       switch(i) {
4276         default: continue; // Omit gaps.
4277         case ID_GS_ALLOC_REQ:
4278           if (isSI() || isCI() || isVI())
4279             continue;
4280           break;
4281         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:
4282         case ID_SYSMSG: break;
4283       }
4284       if (tok == IdSymbolic[i]) {
4285         Msg.Id = i;
4286         break;
4287       }
4288     }
4289     Parser.Lex();
4290   } else {
4291     Msg.IsSymbolic = false;
4292     if (getLexer().isNot(AsmToken::Integer))
4293       return true;
4294     if (getParser().parseAbsoluteExpression(Msg.Id))
4295       return true;
4296     if (getLexer().is(AsmToken::Integer))
4297       if (getParser().parseAbsoluteExpression(Msg.Id))
4298         Msg.Id = ID_UNKNOWN_;
4299   }
4300   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
4301     return false;
4302 
4303   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
4304     if (getLexer().isNot(AsmToken::RParen))
4305       return true;
4306     Parser.Lex();
4307     return false;
4308   }
4309 
4310   if (getLexer().isNot(AsmToken::Comma))
4311     return true;
4312   Parser.Lex();
4313 
4314   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
4315   Operation.Id = ID_UNKNOWN_;
4316   if (getLexer().is(AsmToken::Identifier)) {
4317     Operation.IsSymbolic = true;
4318     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
4319     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
4320     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
4321     const StringRef Tok = Parser.getTok().getString();
4322     for (int i = F; i < L; ++i) {
4323       if (Tok == S[i]) {
4324         Operation.Id = i;
4325         break;
4326       }
4327     }
4328     Parser.Lex();
4329   } else {
4330     Operation.IsSymbolic = false;
4331     if (getLexer().isNot(AsmToken::Integer))
4332       return true;
4333     if (getParser().parseAbsoluteExpression(Operation.Id))
4334       return true;
4335   }
4336 
4337   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4338     // Stream id is optional.
4339     if (getLexer().is(AsmToken::RParen)) {
4340       Parser.Lex();
4341       return false;
4342     }
4343 
4344     if (getLexer().isNot(AsmToken::Comma))
4345       return true;
4346     Parser.Lex();
4347 
4348     if (getLexer().isNot(AsmToken::Integer))
4349       return true;
4350     if (getParser().parseAbsoluteExpression(StreamId))
4351       return true;
4352   }
4353 
4354   if (getLexer().isNot(AsmToken::RParen))
4355     return true;
4356   Parser.Lex();
4357   return false;
4358 }
4359 
4360 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4361   if (getLexer().getKind() != AsmToken::Identifier)
4362     return MatchOperand_NoMatch;
4363 
4364   StringRef Str = Parser.getTok().getString();
4365   int Slot = StringSwitch<int>(Str)
4366     .Case("p10", 0)
4367     .Case("p20", 1)
4368     .Case("p0", 2)
4369     .Default(-1);
4370 
4371   SMLoc S = Parser.getTok().getLoc();
4372   if (Slot == -1)
4373     return MatchOperand_ParseFail;
4374 
4375   Parser.Lex();
4376   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4377                                               AMDGPUOperand::ImmTyInterpSlot));
4378   return MatchOperand_Success;
4379 }
4380 
4381 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4382   if (getLexer().getKind() != AsmToken::Identifier)
4383     return MatchOperand_NoMatch;
4384 
4385   StringRef Str = Parser.getTok().getString();
4386   if (!Str.startswith("attr"))
4387     return MatchOperand_NoMatch;
4388 
4389   StringRef Chan = Str.take_back(2);
4390   int AttrChan = StringSwitch<int>(Chan)
4391     .Case(".x", 0)
4392     .Case(".y", 1)
4393     .Case(".z", 2)
4394     .Case(".w", 3)
4395     .Default(-1);
4396   if (AttrChan == -1)
4397     return MatchOperand_ParseFail;
4398 
4399   Str = Str.drop_back(2).drop_front(4);
4400 
4401   uint8_t Attr;
4402   if (Str.getAsInteger(10, Attr))
4403     return MatchOperand_ParseFail;
4404 
4405   SMLoc S = Parser.getTok().getLoc();
4406   Parser.Lex();
4407   if (Attr > 63) {
4408     Error(S, "out of bounds attr");
4409     return MatchOperand_Success;
4410   }
4411 
4412   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4413 
4414   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4415                                               AMDGPUOperand::ImmTyInterpAttr));
4416   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4417                                               AMDGPUOperand::ImmTyAttrChan));
4418   return MatchOperand_Success;
4419 }
4420 
4421 void AMDGPUAsmParser::errorExpTgt() {
4422   Error(Parser.getTok().getLoc(), "invalid exp target");
4423 }
4424 
4425 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4426                                                       uint8_t &Val) {
4427   if (Str == "null") {
4428     Val = 9;
4429     return MatchOperand_Success;
4430   }
4431 
4432   if (Str.startswith("mrt")) {
4433     Str = Str.drop_front(3);
4434     if (Str == "z") { // == mrtz
4435       Val = 8;
4436       return MatchOperand_Success;
4437     }
4438 
4439     if (Str.getAsInteger(10, Val))
4440       return MatchOperand_ParseFail;
4441 
4442     if (Val > 7)
4443       errorExpTgt();
4444 
4445     return MatchOperand_Success;
4446   }
4447 
4448   if (Str.startswith("pos")) {
4449     Str = Str.drop_front(3);
4450     if (Str.getAsInteger(10, Val))
4451       return MatchOperand_ParseFail;
4452 
4453     if (Val > 3)
4454       errorExpTgt();
4455 
4456     Val += 12;
4457     return MatchOperand_Success;
4458   }
4459 
4460   if (Str.startswith("param")) {
4461     Str = Str.drop_front(5);
4462     if (Str.getAsInteger(10, Val))
4463       return MatchOperand_ParseFail;
4464 
4465     if (Val >= 32)
4466       errorExpTgt();
4467 
4468     Val += 32;
4469     return MatchOperand_Success;
4470   }
4471 
4472   if (Str.startswith("invalid_target_")) {
4473     Str = Str.drop_front(15);
4474     if (Str.getAsInteger(10, Val))
4475       return MatchOperand_ParseFail;
4476 
4477     errorExpTgt();
4478     return MatchOperand_Success;
4479   }
4480 
4481   return MatchOperand_NoMatch;
4482 }
4483 
4484 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4485   uint8_t Val;
4486   StringRef Str = Parser.getTok().getString();
4487 
4488   auto Res = parseExpTgtImpl(Str, Val);
4489   if (Res != MatchOperand_Success)
4490     return Res;
4491 
4492   SMLoc S = Parser.getTok().getLoc();
4493   Parser.Lex();
4494 
4495   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4496                                               AMDGPUOperand::ImmTyExpTgt));
4497   return MatchOperand_Success;
4498 }
4499 
4500 OperandMatchResultTy
4501 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4502   using namespace llvm::AMDGPU::SendMsg;
4503 
4504   int64_t Imm16Val = 0;
4505   SMLoc S = Parser.getTok().getLoc();
4506 
4507   switch(getLexer().getKind()) {
4508   default:
4509     return MatchOperand_NoMatch;
4510   case AsmToken::Integer:
4511     // The operand can be an integer value.
4512     if (getParser().parseAbsoluteExpression(Imm16Val))
4513       return MatchOperand_NoMatch;
4514     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4515       Error(S, "invalid immediate: only 16-bit values are legal");
4516       // Do not return error code, but create an imm operand anyway and proceed
4517       // to the next operand, if any. That avoids unneccessary error messages.
4518     }
4519     break;
4520   case AsmToken::Identifier: {
4521       OperandInfoTy Msg(ID_UNKNOWN_);
4522       OperandInfoTy Operation(OP_UNKNOWN_);
4523       int64_t StreamId = STREAM_ID_DEFAULT_;
4524       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4525         return MatchOperand_ParseFail;
4526       do {
4527         // Validate and encode message ID.
4528         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4529                 || (Msg.Id == ID_GS_ALLOC_REQ && !isSI() && !isCI() && !isVI())
4530                 || Msg.Id == ID_SYSMSG)) {
4531           if (Msg.IsSymbolic)
4532             Error(S, "invalid/unsupported symbolic name of message");
4533           else
4534             Error(S, "invalid/unsupported code of message");
4535           break;
4536         }
4537         Imm16Val = (Msg.Id << ID_SHIFT_);
4538         // Validate and encode operation ID.
4539         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4540           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4541             if (Operation.IsSymbolic)
4542               Error(S, "invalid symbolic name of GS_OP");
4543             else
4544               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4545             break;
4546           }
4547           if (Operation.Id == OP_GS_NOP
4548               && Msg.Id != ID_GS_DONE) {
4549             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4550             break;
4551           }
4552           Imm16Val |= (Operation.Id << OP_SHIFT_);
4553         }
4554         if (Msg.Id == ID_SYSMSG) {
4555           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4556             if (Operation.IsSymbolic)
4557               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4558             else
4559               Error(S, "invalid/unsupported code of SYSMSG_OP");
4560             break;
4561           }
4562           Imm16Val |= (Operation.Id << OP_SHIFT_);
4563         }
4564         // Validate and encode stream ID.
4565         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4566           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4567             Error(S, "invalid stream id: only 2-bit values are legal");
4568             break;
4569           }
4570           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4571         }
4572       } while (false);
4573     }
4574     break;
4575   }
4576   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4577   return MatchOperand_Success;
4578 }
4579 
4580 bool AMDGPUOperand::isSendMsg() const {
4581   return isImmTy(ImmTySendMsg);
4582 }
4583 
4584 //===----------------------------------------------------------------------===//
4585 // parser helpers
4586 //===----------------------------------------------------------------------===//
4587 
4588 bool
4589 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
4590   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
4591 }
4592 
4593 bool
4594 AMDGPUAsmParser::isId(const StringRef Id) const {
4595   return isId(getToken(), Id);
4596 }
4597 
4598 bool
4599 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
4600   return getTokenKind() == Kind;
4601 }
4602 
4603 bool
4604 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4605   if (isId(Id)) {
4606     lex();
4607     return true;
4608   }
4609   return false;
4610 }
4611 
4612 bool
4613 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4614   if (isToken(Kind)) {
4615     lex();
4616     return true;
4617   }
4618   return false;
4619 }
4620 
4621 bool
4622 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4623                            const StringRef ErrMsg) {
4624   if (!trySkipToken(Kind)) {
4625     Error(getLoc(), ErrMsg);
4626     return false;
4627   }
4628   return true;
4629 }
4630 
4631 bool
4632 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4633   return !getParser().parseAbsoluteExpression(Imm);
4634 }
4635 
4636 bool
4637 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4638   if (isToken(AsmToken::String)) {
4639     Val = getToken().getStringContents();
4640     lex();
4641     return true;
4642   } else {
4643     Error(getLoc(), ErrMsg);
4644     return false;
4645   }
4646 }
4647 
4648 AsmToken
4649 AMDGPUAsmParser::getToken() const {
4650   return Parser.getTok();
4651 }
4652 
4653 AsmToken
4654 AMDGPUAsmParser::peekToken() {
4655   return getLexer().peekTok();
4656 }
4657 
4658 void
4659 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
4660   auto TokCount = getLexer().peekTokens(Tokens);
4661 
4662   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
4663     Tokens[Idx] = AsmToken(AsmToken::Error, "");
4664 }
4665 
4666 AsmToken::TokenKind
4667 AMDGPUAsmParser::getTokenKind() const {
4668   return getLexer().getKind();
4669 }
4670 
4671 SMLoc
4672 AMDGPUAsmParser::getLoc() const {
4673   return getToken().getLoc();
4674 }
4675 
4676 StringRef
4677 AMDGPUAsmParser::getTokenStr() const {
4678   return getToken().getString();
4679 }
4680 
4681 void
4682 AMDGPUAsmParser::lex() {
4683   Parser.Lex();
4684 }
4685 
4686 //===----------------------------------------------------------------------===//
4687 // swizzle
4688 //===----------------------------------------------------------------------===//
4689 
4690 LLVM_READNONE
4691 static unsigned
4692 encodeBitmaskPerm(const unsigned AndMask,
4693                   const unsigned OrMask,
4694                   const unsigned XorMask) {
4695   using namespace llvm::AMDGPU::Swizzle;
4696 
4697   return BITMASK_PERM_ENC |
4698          (AndMask << BITMASK_AND_SHIFT) |
4699          (OrMask  << BITMASK_OR_SHIFT)  |
4700          (XorMask << BITMASK_XOR_SHIFT);
4701 }
4702 
4703 bool
4704 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
4705                                       const unsigned MinVal,
4706                                       const unsigned MaxVal,
4707                                       const StringRef ErrMsg) {
4708   for (unsigned i = 0; i < OpNum; ++i) {
4709     if (!skipToken(AsmToken::Comma, "expected a comma")){
4710       return false;
4711     }
4712     SMLoc ExprLoc = Parser.getTok().getLoc();
4713     if (!parseExpr(Op[i])) {
4714       return false;
4715     }
4716     if (Op[i] < MinVal || Op[i] > MaxVal) {
4717       Error(ExprLoc, ErrMsg);
4718       return false;
4719     }
4720   }
4721 
4722   return true;
4723 }
4724 
4725 bool
4726 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
4727   using namespace llvm::AMDGPU::Swizzle;
4728 
4729   int64_t Lane[LANE_NUM];
4730   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
4731                            "expected a 2-bit lane id")) {
4732     Imm = QUAD_PERM_ENC;
4733     for (unsigned I = 0; I < LANE_NUM; ++I) {
4734       Imm |= Lane[I] << (LANE_SHIFT * I);
4735     }
4736     return true;
4737   }
4738   return false;
4739 }
4740 
4741 bool
4742 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
4743   using namespace llvm::AMDGPU::Swizzle;
4744 
4745   SMLoc S = Parser.getTok().getLoc();
4746   int64_t GroupSize;
4747   int64_t LaneIdx;
4748 
4749   if (!parseSwizzleOperands(1, &GroupSize,
4750                             2, 32,
4751                             "group size must be in the interval [2,32]")) {
4752     return false;
4753   }
4754   if (!isPowerOf2_64(GroupSize)) {
4755     Error(S, "group size must be a power of two");
4756     return false;
4757   }
4758   if (parseSwizzleOperands(1, &LaneIdx,
4759                            0, GroupSize - 1,
4760                            "lane id must be in the interval [0,group size - 1]")) {
4761     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
4762     return true;
4763   }
4764   return false;
4765 }
4766 
4767 bool
4768 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
4769   using namespace llvm::AMDGPU::Swizzle;
4770 
4771   SMLoc S = Parser.getTok().getLoc();
4772   int64_t GroupSize;
4773 
4774   if (!parseSwizzleOperands(1, &GroupSize,
4775       2, 32, "group size must be in the interval [2,32]")) {
4776     return false;
4777   }
4778   if (!isPowerOf2_64(GroupSize)) {
4779     Error(S, "group size must be a power of two");
4780     return false;
4781   }
4782 
4783   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
4784   return true;
4785 }
4786 
4787 bool
4788 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
4789   using namespace llvm::AMDGPU::Swizzle;
4790 
4791   SMLoc S = Parser.getTok().getLoc();
4792   int64_t GroupSize;
4793 
4794   if (!parseSwizzleOperands(1, &GroupSize,
4795       1, 16, "group size must be in the interval [1,16]")) {
4796     return false;
4797   }
4798   if (!isPowerOf2_64(GroupSize)) {
4799     Error(S, "group size must be a power of two");
4800     return false;
4801   }
4802 
4803   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
4804   return true;
4805 }
4806 
4807 bool
4808 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
4809   using namespace llvm::AMDGPU::Swizzle;
4810 
4811   if (!skipToken(AsmToken::Comma, "expected a comma")) {
4812     return false;
4813   }
4814 
4815   StringRef Ctl;
4816   SMLoc StrLoc = Parser.getTok().getLoc();
4817   if (!parseString(Ctl)) {
4818     return false;
4819   }
4820   if (Ctl.size() != BITMASK_WIDTH) {
4821     Error(StrLoc, "expected a 5-character mask");
4822     return false;
4823   }
4824 
4825   unsigned AndMask = 0;
4826   unsigned OrMask = 0;
4827   unsigned XorMask = 0;
4828 
4829   for (size_t i = 0; i < Ctl.size(); ++i) {
4830     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
4831     switch(Ctl[i]) {
4832     default:
4833       Error(StrLoc, "invalid mask");
4834       return false;
4835     case '0':
4836       break;
4837     case '1':
4838       OrMask |= Mask;
4839       break;
4840     case 'p':
4841       AndMask |= Mask;
4842       break;
4843     case 'i':
4844       AndMask |= Mask;
4845       XorMask |= Mask;
4846       break;
4847     }
4848   }
4849 
4850   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
4851   return true;
4852 }
4853 
4854 bool
4855 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
4856 
4857   SMLoc OffsetLoc = Parser.getTok().getLoc();
4858 
4859   if (!parseExpr(Imm)) {
4860     return false;
4861   }
4862   if (!isUInt<16>(Imm)) {
4863     Error(OffsetLoc, "expected a 16-bit offset");
4864     return false;
4865   }
4866   return true;
4867 }
4868 
4869 bool
4870 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
4871   using namespace llvm::AMDGPU::Swizzle;
4872 
4873   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
4874 
4875     SMLoc ModeLoc = Parser.getTok().getLoc();
4876     bool Ok = false;
4877 
4878     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
4879       Ok = parseSwizzleQuadPerm(Imm);
4880     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
4881       Ok = parseSwizzleBitmaskPerm(Imm);
4882     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
4883       Ok = parseSwizzleBroadcast(Imm);
4884     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
4885       Ok = parseSwizzleSwap(Imm);
4886     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
4887       Ok = parseSwizzleReverse(Imm);
4888     } else {
4889       Error(ModeLoc, "expected a swizzle mode");
4890     }
4891 
4892     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
4893   }
4894 
4895   return false;
4896 }
4897 
4898 OperandMatchResultTy
4899 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
4900   SMLoc S = Parser.getTok().getLoc();
4901   int64_t Imm = 0;
4902 
4903   if (trySkipId("offset")) {
4904 
4905     bool Ok = false;
4906     if (skipToken(AsmToken::Colon, "expected a colon")) {
4907       if (trySkipId("swizzle")) {
4908         Ok = parseSwizzleMacro(Imm);
4909       } else {
4910         Ok = parseSwizzleOffset(Imm);
4911       }
4912     }
4913 
4914     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
4915 
4916     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
4917   } else {
4918     // Swizzle "offset" operand is optional.
4919     // If it is omitted, try parsing other optional operands.
4920     return parseOptionalOpr(Operands);
4921   }
4922 }
4923 
4924 bool
4925 AMDGPUOperand::isSwizzle() const {
4926   return isImmTy(ImmTySwizzle);
4927 }
4928 
4929 //===----------------------------------------------------------------------===//
4930 // VGPR Index Mode
4931 //===----------------------------------------------------------------------===//
4932 
4933 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
4934 
4935   using namespace llvm::AMDGPU::VGPRIndexMode;
4936 
4937   if (trySkipToken(AsmToken::RParen)) {
4938     return OFF;
4939   }
4940 
4941   int64_t Imm = 0;
4942 
4943   while (true) {
4944     unsigned Mode = 0;
4945     SMLoc S = Parser.getTok().getLoc();
4946 
4947     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
4948       if (trySkipId(IdSymbolic[ModeId])) {
4949         Mode = 1 << ModeId;
4950         break;
4951       }
4952     }
4953 
4954     if (Mode == 0) {
4955       Error(S, (Imm == 0)?
4956                "expected a VGPR index mode or a closing parenthesis" :
4957                "expected a VGPR index mode");
4958       break;
4959     }
4960 
4961     if (Imm & Mode) {
4962       Error(S, "duplicate VGPR index mode");
4963       break;
4964     }
4965     Imm |= Mode;
4966 
4967     if (trySkipToken(AsmToken::RParen))
4968       break;
4969     if (!skipToken(AsmToken::Comma,
4970                    "expected a comma or a closing parenthesis"))
4971       break;
4972   }
4973 
4974   return Imm;
4975 }
4976 
4977 OperandMatchResultTy
4978 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
4979 
4980   int64_t Imm = 0;
4981   SMLoc S = Parser.getTok().getLoc();
4982 
4983   if (getLexer().getKind() == AsmToken::Identifier &&
4984       Parser.getTok().getString() == "gpr_idx" &&
4985       getLexer().peekTok().is(AsmToken::LParen)) {
4986 
4987     Parser.Lex();
4988     Parser.Lex();
4989 
4990     // If parse failed, trigger an error but do not return error code
4991     // to avoid excessive error messages.
4992     Imm = parseGPRIdxMacro();
4993 
4994   } else {
4995     if (getParser().parseAbsoluteExpression(Imm))
4996       return MatchOperand_NoMatch;
4997     if (Imm < 0 || !isUInt<4>(Imm)) {
4998       Error(S, "invalid immediate: only 4-bit values are legal");
4999     }
5000   }
5001 
5002   Operands.push_back(
5003       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5004   return MatchOperand_Success;
5005 }
5006 
5007 bool AMDGPUOperand::isGPRIdxMode() const {
5008   return isImmTy(ImmTyGprIdxMode);
5009 }
5010 
5011 //===----------------------------------------------------------------------===//
5012 // sopp branch targets
5013 //===----------------------------------------------------------------------===//
5014 
5015 OperandMatchResultTy
5016 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5017   SMLoc S = Parser.getTok().getLoc();
5018 
5019   switch (getLexer().getKind()) {
5020     default: return MatchOperand_ParseFail;
5021     case AsmToken::Integer: {
5022       int64_t Imm;
5023       if (getParser().parseAbsoluteExpression(Imm))
5024         return MatchOperand_ParseFail;
5025       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
5026       return MatchOperand_Success;
5027     }
5028 
5029     case AsmToken::Identifier:
5030       Operands.push_back(AMDGPUOperand::CreateExpr(this,
5031           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
5032                                   Parser.getTok().getString()), getContext()), S));
5033       Parser.Lex();
5034       return MatchOperand_Success;
5035   }
5036 }
5037 
5038 //===----------------------------------------------------------------------===//
5039 // mubuf
5040 //===----------------------------------------------------------------------===//
5041 
5042 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5043   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5044 }
5045 
5046 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5047   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5048 }
5049 
5050 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5051                                const OperandVector &Operands,
5052                                bool IsAtomic,
5053                                bool IsAtomicReturn,
5054                                bool IsLds) {
5055   bool IsLdsOpcode = IsLds;
5056   bool HasLdsModifier = false;
5057   OptionalImmIndexMap OptionalIdx;
5058   assert(IsAtomicReturn ? IsAtomic : true);
5059   unsigned FirstOperandIdx = 1;
5060 
5061   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5062     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5063 
5064     // Add the register arguments
5065     if (Op.isReg()) {
5066       Op.addRegOperands(Inst, 1);
5067       // Insert a tied src for atomic return dst.
5068       // This cannot be postponed as subsequent calls to
5069       // addImmOperands rely on correct number of MC operands.
5070       if (IsAtomicReturn && i == FirstOperandIdx)
5071         Op.addRegOperands(Inst, 1);
5072       continue;
5073     }
5074 
5075     // Handle the case where soffset is an immediate
5076     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5077       Op.addImmOperands(Inst, 1);
5078       continue;
5079     }
5080 
5081     HasLdsModifier = Op.isLDS();
5082 
5083     // Handle tokens like 'offen' which are sometimes hard-coded into the
5084     // asm string.  There are no MCInst operands for these.
5085     if (Op.isToken()) {
5086       continue;
5087     }
5088     assert(Op.isImm());
5089 
5090     // Handle optional arguments
5091     OptionalIdx[Op.getImmTy()] = i;
5092   }
5093 
5094   // This is a workaround for an llvm quirk which may result in an
5095   // incorrect instruction selection. Lds and non-lds versions of
5096   // MUBUF instructions are identical except that lds versions
5097   // have mandatory 'lds' modifier. However this modifier follows
5098   // optional modifiers and llvm asm matcher regards this 'lds'
5099   // modifier as an optional one. As a result, an lds version
5100   // of opcode may be selected even if it has no 'lds' modifier.
5101   if (IsLdsOpcode && !HasLdsModifier) {
5102     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5103     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5104       Inst.setOpcode(NoLdsOpcode);
5105       IsLdsOpcode = false;
5106     }
5107   }
5108 
5109   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5110   if (!IsAtomic) { // glc is hard-coded.
5111     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5112   }
5113   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5114 
5115   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5116     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5117   }
5118 }
5119 
5120 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5121   OptionalImmIndexMap OptionalIdx;
5122 
5123   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5124     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5125 
5126     // Add the register arguments
5127     if (Op.isReg()) {
5128       Op.addRegOperands(Inst, 1);
5129       continue;
5130     }
5131 
5132     // Handle the case where soffset is an immediate
5133     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5134       Op.addImmOperands(Inst, 1);
5135       continue;
5136     }
5137 
5138     // Handle tokens like 'offen' which are sometimes hard-coded into the
5139     // asm string.  There are no MCInst operands for these.
5140     if (Op.isToken()) {
5141       continue;
5142     }
5143     assert(Op.isImm());
5144 
5145     // Handle optional arguments
5146     OptionalIdx[Op.getImmTy()] = i;
5147   }
5148 
5149   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5150                         AMDGPUOperand::ImmTyOffset);
5151   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5152   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5153   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5154   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5155 }
5156 
5157 //===----------------------------------------------------------------------===//
5158 // mimg
5159 //===----------------------------------------------------------------------===//
5160 
5161 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5162                               bool IsAtomic) {
5163   unsigned I = 1;
5164   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5165   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5166     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5167   }
5168 
5169   if (IsAtomic) {
5170     // Add src, same as dst
5171     assert(Desc.getNumDefs() == 1);
5172     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5173   }
5174 
5175   OptionalImmIndexMap OptionalIdx;
5176 
5177   for (unsigned E = Operands.size(); I != E; ++I) {
5178     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5179 
5180     // Add the register arguments
5181     if (Op.isReg()) {
5182       Op.addRegOperands(Inst, 1);
5183     } else if (Op.isImmModifier()) {
5184       OptionalIdx[Op.getImmTy()] = I;
5185     } else {
5186       llvm_unreachable("unexpected operand type");
5187     }
5188   }
5189 
5190   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5191   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5192   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5193   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5194   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5195   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5196   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5197   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5198   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5199 }
5200 
5201 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5202   cvtMIMG(Inst, Operands, true);
5203 }
5204 
5205 //===----------------------------------------------------------------------===//
5206 // smrd
5207 //===----------------------------------------------------------------------===//
5208 
5209 bool AMDGPUOperand::isSMRDOffset8() const {
5210   return isImm() && isUInt<8>(getImm());
5211 }
5212 
5213 bool AMDGPUOperand::isSMRDOffset20() const {
5214   return isImm() && isUInt<20>(getImm());
5215 }
5216 
5217 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5218   // 32-bit literals are only supported on CI and we only want to use them
5219   // when the offset is > 8-bits.
5220   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5221 }
5222 
5223 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5224   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5225 }
5226 
5227 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5228   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5229 }
5230 
5231 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5232   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5233 }
5234 
5235 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
5236   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5237 }
5238 
5239 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
5240   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5241 }
5242 
5243 //===----------------------------------------------------------------------===//
5244 // vop3
5245 //===----------------------------------------------------------------------===//
5246 
5247 static bool ConvertOmodMul(int64_t &Mul) {
5248   if (Mul != 1 && Mul != 2 && Mul != 4)
5249     return false;
5250 
5251   Mul >>= 1;
5252   return true;
5253 }
5254 
5255 static bool ConvertOmodDiv(int64_t &Div) {
5256   if (Div == 1) {
5257     Div = 0;
5258     return true;
5259   }
5260 
5261   if (Div == 2) {
5262     Div = 3;
5263     return true;
5264   }
5265 
5266   return false;
5267 }
5268 
5269 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5270   if (BoundCtrl == 0) {
5271     BoundCtrl = 1;
5272     return true;
5273   }
5274 
5275   if (BoundCtrl == -1) {
5276     BoundCtrl = 0;
5277     return true;
5278   }
5279 
5280   return false;
5281 }
5282 
5283 // Note: the order in this table matches the order of operands in AsmString.
5284 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5285   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5286   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5287   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5288   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5289   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5290   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5291   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5292   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5293   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5294   {"dfmt",    AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5295   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5296   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5297   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5298   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5299   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5300   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5301   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5302   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5303   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5304   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5305   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5306   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5307   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5308   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5309   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5310   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5311   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5312   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5313   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5314   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5315   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5316   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5317   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5318   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5319   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5320   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5321   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
5322 };
5323 
5324 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5325   unsigned size = Operands.size();
5326   assert(size > 0);
5327 
5328   OperandMatchResultTy res = parseOptionalOpr(Operands);
5329 
5330   // This is a hack to enable hardcoded mandatory operands which follow
5331   // optional operands.
5332   //
5333   // Current design assumes that all operands after the first optional operand
5334   // are also optional. However implementation of some instructions violates
5335   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5336   //
5337   // To alleviate this problem, we have to (implicitly) parse extra operands
5338   // to make sure autogenerated parser of custom operands never hit hardcoded
5339   // mandatory operands.
5340 
5341   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5342 
5343     // We have parsed the first optional operand.
5344     // Parse as many operands as necessary to skip all mandatory operands.
5345 
5346     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5347       if (res != MatchOperand_Success ||
5348           getLexer().is(AsmToken::EndOfStatement)) break;
5349       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5350       res = parseOptionalOpr(Operands);
5351     }
5352   }
5353 
5354   return res;
5355 }
5356 
5357 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5358   OperandMatchResultTy res;
5359   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5360     // try to parse any optional operand here
5361     if (Op.IsBit) {
5362       res = parseNamedBit(Op.Name, Operands, Op.Type);
5363     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5364       res = parseOModOperand(Operands);
5365     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5366                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5367                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5368       res = parseSDWASel(Operands, Op.Name, Op.Type);
5369     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5370       res = parseSDWADstUnused(Operands);
5371     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5372                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5373                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5374                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5375       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5376                                         Op.ConvertResult);
5377     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
5378       res = parseDfmtNfmt(Operands);
5379     } else {
5380       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
5381     }
5382     if (res != MatchOperand_NoMatch) {
5383       return res;
5384     }
5385   }
5386   return MatchOperand_NoMatch;
5387 }
5388 
5389 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
5390   StringRef Name = Parser.getTok().getString();
5391   if (Name == "mul") {
5392     return parseIntWithPrefix("mul", Operands,
5393                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
5394   }
5395 
5396   if (Name == "div") {
5397     return parseIntWithPrefix("div", Operands,
5398                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
5399   }
5400 
5401   return MatchOperand_NoMatch;
5402 }
5403 
5404 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
5405   cvtVOP3P(Inst, Operands);
5406 
5407   int Opc = Inst.getOpcode();
5408 
5409   int SrcNum;
5410   const int Ops[] = { AMDGPU::OpName::src0,
5411                       AMDGPU::OpName::src1,
5412                       AMDGPU::OpName::src2 };
5413   for (SrcNum = 0;
5414        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
5415        ++SrcNum);
5416   assert(SrcNum > 0);
5417 
5418   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5419   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5420 
5421   if ((OpSel & (1 << SrcNum)) != 0) {
5422     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
5423     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
5424     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
5425   }
5426 }
5427 
5428 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
5429       // 1. This operand is input modifiers
5430   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
5431       // 2. This is not last operand
5432       && Desc.NumOperands > (OpNum + 1)
5433       // 3. Next operand is register class
5434       && Desc.OpInfo[OpNum + 1].RegClass != -1
5435       // 4. Next register is not tied to any other operand
5436       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
5437 }
5438 
5439 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
5440 {
5441   OptionalImmIndexMap OptionalIdx;
5442   unsigned Opc = Inst.getOpcode();
5443 
5444   unsigned I = 1;
5445   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5446   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5447     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5448   }
5449 
5450   for (unsigned E = Operands.size(); I != E; ++I) {
5451     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5452     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5453       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5454     } else if (Op.isInterpSlot() ||
5455                Op.isInterpAttr() ||
5456                Op.isAttrChan()) {
5457       Inst.addOperand(MCOperand::createImm(Op.getImm()));
5458     } else if (Op.isImmModifier()) {
5459       OptionalIdx[Op.getImmTy()] = I;
5460     } else {
5461       llvm_unreachable("unhandled operand type");
5462     }
5463   }
5464 
5465   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5466     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5467   }
5468 
5469   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5470     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5471   }
5472 
5473   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5474     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5475   }
5476 }
5477 
5478 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5479                               OptionalImmIndexMap &OptionalIdx) {
5480   unsigned Opc = Inst.getOpcode();
5481 
5482   unsigned I = 1;
5483   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5484   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5485     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5486   }
5487 
5488   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5489     // This instruction has src modifiers
5490     for (unsigned E = Operands.size(); I != E; ++I) {
5491       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5492       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5493         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5494       } else if (Op.isImmModifier()) {
5495         OptionalIdx[Op.getImmTy()] = I;
5496       } else if (Op.isRegOrImm()) {
5497         Op.addRegOrImmOperands(Inst, 1);
5498       } else {
5499         llvm_unreachable("unhandled operand type");
5500       }
5501     }
5502   } else {
5503     // No src modifiers
5504     for (unsigned E = Operands.size(); I != E; ++I) {
5505       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5506       if (Op.isMod()) {
5507         OptionalIdx[Op.getImmTy()] = I;
5508       } else {
5509         Op.addRegOrImmOperands(Inst, 1);
5510       }
5511     }
5512   }
5513 
5514   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5515     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5516   }
5517 
5518   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5519     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5520   }
5521 
5522   // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
5523   // it has src2 register operand that is tied to dst operand
5524   // we don't allow modifiers for this operand in assembler so src2_modifiers
5525   // should be 0.
5526   if (Opc == AMDGPU::V_MAC_F32_e64_si ||
5527       Opc == AMDGPU::V_MAC_F32_e64_vi ||
5528       Opc == AMDGPU::V_MAC_F16_e64_vi ||
5529       Opc == AMDGPU::V_FMAC_F32_e64_vi) {
5530     auto it = Inst.begin();
5531     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5532     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5533     ++it;
5534     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5535   }
5536 }
5537 
5538 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5539   OptionalImmIndexMap OptionalIdx;
5540   cvtVOP3(Inst, Operands, OptionalIdx);
5541 }
5542 
5543 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5544                                const OperandVector &Operands) {
5545   OptionalImmIndexMap OptIdx;
5546   const int Opc = Inst.getOpcode();
5547   const MCInstrDesc &Desc = MII.get(Opc);
5548 
5549   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5550 
5551   cvtVOP3(Inst, Operands, OptIdx);
5552 
5553   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5554     assert(!IsPacked);
5555     Inst.addOperand(Inst.getOperand(0));
5556   }
5557 
5558   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5559   // instruction, and then figure out where to actually put the modifiers
5560 
5561   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5562 
5563   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5564   if (OpSelHiIdx != -1) {
5565     int DefaultVal = IsPacked ? -1 : 0;
5566     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5567                           DefaultVal);
5568   }
5569 
5570   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5571   if (NegLoIdx != -1) {
5572     assert(IsPacked);
5573     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5574     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5575   }
5576 
5577   const int Ops[] = { AMDGPU::OpName::src0,
5578                       AMDGPU::OpName::src1,
5579                       AMDGPU::OpName::src2 };
5580   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5581                          AMDGPU::OpName::src1_modifiers,
5582                          AMDGPU::OpName::src2_modifiers };
5583 
5584   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5585 
5586   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5587   unsigned OpSelHi = 0;
5588   unsigned NegLo = 0;
5589   unsigned NegHi = 0;
5590 
5591   if (OpSelHiIdx != -1) {
5592     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5593   }
5594 
5595   if (NegLoIdx != -1) {
5596     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5597     NegLo = Inst.getOperand(NegLoIdx).getImm();
5598     NegHi = Inst.getOperand(NegHiIdx).getImm();
5599   }
5600 
5601   for (int J = 0; J < 3; ++J) {
5602     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5603     if (OpIdx == -1)
5604       break;
5605 
5606     uint32_t ModVal = 0;
5607 
5608     if ((OpSel & (1 << J)) != 0)
5609       ModVal |= SISrcMods::OP_SEL_0;
5610 
5611     if ((OpSelHi & (1 << J)) != 0)
5612       ModVal |= SISrcMods::OP_SEL_1;
5613 
5614     if ((NegLo & (1 << J)) != 0)
5615       ModVal |= SISrcMods::NEG;
5616 
5617     if ((NegHi & (1 << J)) != 0)
5618       ModVal |= SISrcMods::NEG_HI;
5619 
5620     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5621 
5622     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5623   }
5624 }
5625 
5626 //===----------------------------------------------------------------------===//
5627 // dpp
5628 //===----------------------------------------------------------------------===//
5629 
5630 bool AMDGPUOperand::isDPPCtrl() const {
5631   using namespace AMDGPU::DPP;
5632 
5633   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5634   if (result) {
5635     int64_t Imm = getImm();
5636     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
5637            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
5638            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
5639            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
5640            (Imm == DppCtrl::WAVE_SHL1) ||
5641            (Imm == DppCtrl::WAVE_ROL1) ||
5642            (Imm == DppCtrl::WAVE_SHR1) ||
5643            (Imm == DppCtrl::WAVE_ROR1) ||
5644            (Imm == DppCtrl::ROW_MIRROR) ||
5645            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
5646            (Imm == DppCtrl::BCAST15) ||
5647            (Imm == DppCtrl::BCAST31);
5648   }
5649   return false;
5650 }
5651 
5652 bool AMDGPUOperand::isS16Imm() const {
5653   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
5654 }
5655 
5656 bool AMDGPUOperand::isU16Imm() const {
5657   return isImm() && isUInt<16>(getImm());
5658 }
5659 
5660 OperandMatchResultTy
5661 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
5662   using namespace AMDGPU::DPP;
5663 
5664   SMLoc S = Parser.getTok().getLoc();
5665   StringRef Prefix;
5666   int64_t Int;
5667 
5668   if (getLexer().getKind() == AsmToken::Identifier) {
5669     Prefix = Parser.getTok().getString();
5670   } else {
5671     return MatchOperand_NoMatch;
5672   }
5673 
5674   if (Prefix == "row_mirror") {
5675     Int = DppCtrl::ROW_MIRROR;
5676     Parser.Lex();
5677   } else if (Prefix == "row_half_mirror") {
5678     Int = DppCtrl::ROW_HALF_MIRROR;
5679     Parser.Lex();
5680   } else {
5681     // Check to prevent parseDPPCtrlOps from eating invalid tokens
5682     if (Prefix != "quad_perm"
5683         && Prefix != "row_shl"
5684         && Prefix != "row_shr"
5685         && Prefix != "row_ror"
5686         && Prefix != "wave_shl"
5687         && Prefix != "wave_rol"
5688         && Prefix != "wave_shr"
5689         && Prefix != "wave_ror"
5690         && Prefix != "row_bcast") {
5691       return MatchOperand_NoMatch;
5692     }
5693 
5694     Parser.Lex();
5695     if (getLexer().isNot(AsmToken::Colon))
5696       return MatchOperand_ParseFail;
5697 
5698     if (Prefix == "quad_perm") {
5699       // quad_perm:[%d,%d,%d,%d]
5700       Parser.Lex();
5701       if (getLexer().isNot(AsmToken::LBrac))
5702         return MatchOperand_ParseFail;
5703       Parser.Lex();
5704 
5705       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
5706         return MatchOperand_ParseFail;
5707 
5708       for (int i = 0; i < 3; ++i) {
5709         if (getLexer().isNot(AsmToken::Comma))
5710           return MatchOperand_ParseFail;
5711         Parser.Lex();
5712 
5713         int64_t Temp;
5714         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
5715           return MatchOperand_ParseFail;
5716         const int shift = i*2 + 2;
5717         Int += (Temp << shift);
5718       }
5719 
5720       if (getLexer().isNot(AsmToken::RBrac))
5721         return MatchOperand_ParseFail;
5722       Parser.Lex();
5723     } else {
5724       // sel:%d
5725       Parser.Lex();
5726       if (getParser().parseAbsoluteExpression(Int))
5727         return MatchOperand_ParseFail;
5728 
5729       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
5730         Int |= DppCtrl::ROW_SHL0;
5731       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
5732         Int |= DppCtrl::ROW_SHR0;
5733       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
5734         Int |= DppCtrl::ROW_ROR0;
5735       } else if (Prefix == "wave_shl" && 1 == Int) {
5736         Int = DppCtrl::WAVE_SHL1;
5737       } else if (Prefix == "wave_rol" && 1 == Int) {
5738         Int = DppCtrl::WAVE_ROL1;
5739       } else if (Prefix == "wave_shr" && 1 == Int) {
5740         Int = DppCtrl::WAVE_SHR1;
5741       } else if (Prefix == "wave_ror" && 1 == Int) {
5742         Int = DppCtrl::WAVE_ROR1;
5743       } else if (Prefix == "row_bcast") {
5744         if (Int == 15) {
5745           Int = DppCtrl::BCAST15;
5746         } else if (Int == 31) {
5747           Int = DppCtrl::BCAST31;
5748         } else {
5749           return MatchOperand_ParseFail;
5750         }
5751       } else {
5752         return MatchOperand_ParseFail;
5753       }
5754     }
5755   }
5756 
5757   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
5758   return MatchOperand_Success;
5759 }
5760 
5761 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
5762   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
5763 }
5764 
5765 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
5766   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
5767 }
5768 
5769 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
5770   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
5771 }
5772 
5773 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
5774   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
5775 }
5776 
5777 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
5778   OptionalImmIndexMap OptionalIdx;
5779 
5780   unsigned I = 1;
5781   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5782   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5783     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5784   }
5785 
5786   for (unsigned E = Operands.size(); I != E; ++I) {
5787     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
5788                                             MCOI::TIED_TO);
5789     if (TiedTo != -1) {
5790       assert((unsigned)TiedTo < Inst.getNumOperands());
5791       // handle tied old or src2 for MAC instructions
5792       Inst.addOperand(Inst.getOperand(TiedTo));
5793     }
5794     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5795     // Add the register arguments
5796     if (Op.isReg() && Op.getReg() == AMDGPU::VCC) {
5797       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
5798       // Skip it.
5799       continue;
5800     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5801       Op.addRegWithFPInputModsOperands(Inst, 2);
5802     } else if (Op.isDPPCtrl()) {
5803       Op.addImmOperands(Inst, 1);
5804     } else if (Op.isImm()) {
5805       // Handle optional arguments
5806       OptionalIdx[Op.getImmTy()] = I;
5807     } else {
5808       llvm_unreachable("Invalid operand type");
5809     }
5810   }
5811 
5812   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
5813   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
5814   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
5815 }
5816 
5817 //===----------------------------------------------------------------------===//
5818 // sdwa
5819 //===----------------------------------------------------------------------===//
5820 
5821 OperandMatchResultTy
5822 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
5823                               AMDGPUOperand::ImmTy Type) {
5824   using namespace llvm::AMDGPU::SDWA;
5825 
5826   SMLoc S = Parser.getTok().getLoc();
5827   StringRef Value;
5828   OperandMatchResultTy res;
5829 
5830   res = parseStringWithPrefix(Prefix, Value);
5831   if (res != MatchOperand_Success) {
5832     return res;
5833   }
5834 
5835   int64_t Int;
5836   Int = StringSwitch<int64_t>(Value)
5837         .Case("BYTE_0", SdwaSel::BYTE_0)
5838         .Case("BYTE_1", SdwaSel::BYTE_1)
5839         .Case("BYTE_2", SdwaSel::BYTE_2)
5840         .Case("BYTE_3", SdwaSel::BYTE_3)
5841         .Case("WORD_0", SdwaSel::WORD_0)
5842         .Case("WORD_1", SdwaSel::WORD_1)
5843         .Case("DWORD", SdwaSel::DWORD)
5844         .Default(0xffffffff);
5845   Parser.Lex(); // eat last token
5846 
5847   if (Int == 0xffffffff) {
5848     return MatchOperand_ParseFail;
5849   }
5850 
5851   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
5852   return MatchOperand_Success;
5853 }
5854 
5855 OperandMatchResultTy
5856 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
5857   using namespace llvm::AMDGPU::SDWA;
5858 
5859   SMLoc S = Parser.getTok().getLoc();
5860   StringRef Value;
5861   OperandMatchResultTy res;
5862 
5863   res = parseStringWithPrefix("dst_unused", Value);
5864   if (res != MatchOperand_Success) {
5865     return res;
5866   }
5867 
5868   int64_t Int;
5869   Int = StringSwitch<int64_t>(Value)
5870         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
5871         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
5872         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
5873         .Default(0xffffffff);
5874   Parser.Lex(); // eat last token
5875 
5876   if (Int == 0xffffffff) {
5877     return MatchOperand_ParseFail;
5878   }
5879 
5880   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
5881   return MatchOperand_Success;
5882 }
5883 
5884 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
5885   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
5886 }
5887 
5888 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
5889   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
5890 }
5891 
5892 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
5893   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
5894 }
5895 
5896 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
5897   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
5898 }
5899 
5900 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
5901                               uint64_t BasicInstType, bool skipVcc) {
5902   using namespace llvm::AMDGPU::SDWA;
5903 
5904   OptionalImmIndexMap OptionalIdx;
5905   bool skippedVcc = false;
5906 
5907   unsigned I = 1;
5908   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5909   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5910     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5911   }
5912 
5913   for (unsigned E = Operands.size(); I != E; ++I) {
5914     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5915     if (skipVcc && !skippedVcc && Op.isReg() && Op.getReg() == AMDGPU::VCC) {
5916       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
5917       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
5918       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
5919       // Skip VCC only if we didn't skip it on previous iteration.
5920       if (BasicInstType == SIInstrFlags::VOP2 &&
5921           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
5922         skippedVcc = true;
5923         continue;
5924       } else if (BasicInstType == SIInstrFlags::VOPC &&
5925                  Inst.getNumOperands() == 0) {
5926         skippedVcc = true;
5927         continue;
5928       }
5929     }
5930     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5931       Op.addRegOrImmWithInputModsOperands(Inst, 2);
5932     } else if (Op.isImm()) {
5933       // Handle optional arguments
5934       OptionalIdx[Op.getImmTy()] = I;
5935     } else {
5936       llvm_unreachable("Invalid operand type");
5937     }
5938     skippedVcc = false;
5939   }
5940 
5941   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
5942       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
5943     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
5944     switch (BasicInstType) {
5945     case SIInstrFlags::VOP1:
5946       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5947       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5948         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5949       }
5950       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5951       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5952       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5953       break;
5954 
5955     case SIInstrFlags::VOP2:
5956       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5957       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5958         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5959       }
5960       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5961       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5962       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5963       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5964       break;
5965 
5966     case SIInstrFlags::VOPC:
5967       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5968       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5969       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5970       break;
5971 
5972     default:
5973       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
5974     }
5975   }
5976 
5977   // special case v_mac_{f16, f32}:
5978   // it has src2 register operand that is tied to dst operand
5979   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
5980       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
5981     auto it = Inst.begin();
5982     std::advance(
5983       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
5984     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5985   }
5986 }
5987 
5988 /// Force static initialization.
5989 extern "C" void LLVMInitializeAMDGPUAsmParser() {
5990   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
5991   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
5992 }
5993 
5994 #define GET_REGISTER_MATCHER
5995 #define GET_MATCHER_IMPLEMENTATION
5996 #define GET_MNEMONIC_SPELL_CHECKER
5997 #include "AMDGPUGenAsmMatcher.inc"
5998 
5999 // This fuction should be defined after auto-generated include so that we have
6000 // MatchClassKind enum defined
6001 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6002                                                      unsigned Kind) {
6003   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6004   // But MatchInstructionImpl() expects to meet token and fails to validate
6005   // operand. This method checks if we are given immediate operand but expect to
6006   // get corresponding token.
6007   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6008   switch (Kind) {
6009   case MCK_addr64:
6010     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6011   case MCK_gds:
6012     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6013   case MCK_lds:
6014     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6015   case MCK_glc:
6016     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6017   case MCK_idxen:
6018     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6019   case MCK_offen:
6020     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6021   case MCK_SSrcB32:
6022     // When operands have expression values, they will return true for isToken,
6023     // because it is not possible to distinguish between a token and an
6024     // expression at parse time. MatchInstructionImpl() will always try to
6025     // match an operand as a token, when isToken returns true, and when the
6026     // name of the expression is not a valid token, the match will fail,
6027     // so we need to handle it here.
6028     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6029   case MCK_SSrcF32:
6030     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6031   case MCK_SoppBrTarget:
6032     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6033   case MCK_VReg32OrOff:
6034     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6035   case MCK_InterpSlot:
6036     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6037   case MCK_Attr:
6038     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6039   case MCK_AttrChan:
6040     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6041   default:
6042     return Match_InvalidOperand;
6043   }
6044 }
6045 
6046 //===----------------------------------------------------------------------===//
6047 // endpgm
6048 //===----------------------------------------------------------------------===//
6049 
6050 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6051   SMLoc S = Parser.getTok().getLoc();
6052   int64_t Imm = 0;
6053 
6054   if (!parseExpr(Imm)) {
6055     // The operand is optional, if not present default to 0
6056     Imm = 0;
6057   }
6058 
6059   if (!isUInt<16>(Imm)) {
6060     Error(S, "expected a 16-bit value");
6061     return MatchOperand_ParseFail;
6062   }
6063 
6064   Operands.push_back(
6065       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6066   return MatchOperand_Success;
6067 }
6068 
6069 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6070