1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/MC/MCInst.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCInstrInfo.h"
34 #include "llvm/MC/MCParser/MCAsmLexer.h"
35 #include "llvm/MC/MCParser/MCAsmParser.h"
36 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
38 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
39 #include "llvm/MC/MCRegisterInfo.h"
40 #include "llvm/MC/MCStreamer.h"
41 #include "llvm/MC/MCSubtargetInfo.h"
42 #include "llvm/MC/MCSymbol.h"
43 #include "llvm/Support/AMDGPUMetadata.h"
44 #include "llvm/Support/AMDHSAKernelDescriptor.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/MachineValueType.h"
49 #include "llvm/Support/MathExtras.h"
50 #include "llvm/Support/SMLoc.h"
51 #include "llvm/Support/TargetParser.h"
52 #include "llvm/Support/TargetRegistry.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include <algorithm>
55 #include <cassert>
56 #include <cstdint>
57 #include <cstring>
58 #include <iterator>
59 #include <map>
60 #include <memory>
61 #include <string>
62 
63 using namespace llvm;
64 using namespace llvm::AMDGPU;
65 using namespace llvm::amdhsa;
66 
67 namespace {
68 
69 class AMDGPUAsmParser;
70 
71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
72 
73 //===----------------------------------------------------------------------===//
74 // Operand
75 //===----------------------------------------------------------------------===//
76 
77 class AMDGPUOperand : public MCParsedAsmOperand {
78   enum KindTy {
79     Token,
80     Immediate,
81     Register,
82     Expression
83   } Kind;
84 
85   SMLoc StartLoc, EndLoc;
86   const AMDGPUAsmParser *AsmParser;
87 
88 public:
89   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
90     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
91 
92   using Ptr = std::unique_ptr<AMDGPUOperand>;
93 
94   struct Modifiers {
95     bool Abs = false;
96     bool Neg = false;
97     bool Sext = false;
98 
99     bool hasFPModifiers() const { return Abs || Neg; }
100     bool hasIntModifiers() const { return Sext; }
101     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
102 
103     int64_t getFPModifiersOperand() const {
104       int64_t Operand = 0;
105       Operand |= Abs ? SISrcMods::ABS : 0u;
106       Operand |= Neg ? SISrcMods::NEG : 0u;
107       return Operand;
108     }
109 
110     int64_t getIntModifiersOperand() const {
111       int64_t Operand = 0;
112       Operand |= Sext ? SISrcMods::SEXT : 0u;
113       return Operand;
114     }
115 
116     int64_t getModifiersOperand() const {
117       assert(!(hasFPModifiers() && hasIntModifiers())
118            && "fp and int modifiers should not be used simultaneously");
119       if (hasFPModifiers()) {
120         return getFPModifiersOperand();
121       } else if (hasIntModifiers()) {
122         return getIntModifiersOperand();
123       } else {
124         return 0;
125       }
126     }
127 
128     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
129   };
130 
131   enum ImmTy {
132     ImmTyNone,
133     ImmTyGDS,
134     ImmTyLDS,
135     ImmTyOffen,
136     ImmTyIdxen,
137     ImmTyAddr64,
138     ImmTyOffset,
139     ImmTyInstOffset,
140     ImmTyOffset0,
141     ImmTyOffset1,
142     ImmTyGLC,
143     ImmTySLC,
144     ImmTyTFE,
145     ImmTyD16,
146     ImmTyClampSI,
147     ImmTyOModSI,
148     ImmTyDppCtrl,
149     ImmTyDppRowMask,
150     ImmTyDppBankMask,
151     ImmTyDppBoundCtrl,
152     ImmTySdwaDstSel,
153     ImmTySdwaSrc0Sel,
154     ImmTySdwaSrc1Sel,
155     ImmTySdwaDstUnused,
156     ImmTyDMask,
157     ImmTyUNorm,
158     ImmTyDA,
159     ImmTyR128A16,
160     ImmTyLWE,
161     ImmTyExpTgt,
162     ImmTyExpCompr,
163     ImmTyExpVM,
164     ImmTyFORMAT,
165     ImmTyHwreg,
166     ImmTyOff,
167     ImmTySendMsg,
168     ImmTyInterpSlot,
169     ImmTyInterpAttr,
170     ImmTyAttrChan,
171     ImmTyOpSel,
172     ImmTyOpSelHi,
173     ImmTyNegLo,
174     ImmTyNegHi,
175     ImmTySwizzle,
176     ImmTyGprIdxMode,
177     ImmTyEndpgm,
178     ImmTyHigh
179   };
180 
181 private:
182   struct TokOp {
183     const char *Data;
184     unsigned Length;
185   };
186 
187   struct ImmOp {
188     int64_t Val;
189     ImmTy Type;
190     bool IsFPImm;
191     Modifiers Mods;
192   };
193 
194   struct RegOp {
195     unsigned RegNo;
196     Modifiers Mods;
197   };
198 
199   union {
200     TokOp Tok;
201     ImmOp Imm;
202     RegOp Reg;
203     const MCExpr *Expr;
204   };
205 
206 public:
207   bool isToken() const override {
208     if (Kind == Token)
209       return true;
210 
211     if (Kind != Expression || !Expr)
212       return false;
213 
214     // When parsing operands, we can't always tell if something was meant to be
215     // a token, like 'gds', or an expression that references a global variable.
216     // In this case, we assume the string is an expression, and if we need to
217     // interpret is a token, then we treat the symbol name as the token.
218     return isa<MCSymbolRefExpr>(Expr);
219   }
220 
221   bool isImm() const override {
222     return Kind == Immediate;
223   }
224 
225   bool isInlinableImm(MVT type) const;
226   bool isLiteralImm(MVT type) const;
227 
228   bool isRegKind() const {
229     return Kind == Register;
230   }
231 
232   bool isReg() const override {
233     return isRegKind() && !hasModifiers();
234   }
235 
236   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
237     return isRegClass(RCID) || isInlinableImm(type);
238   }
239 
240   bool isRegOrImmWithInt16InputMods() const {
241     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
242   }
243 
244   bool isRegOrImmWithInt32InputMods() const {
245     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
246   }
247 
248   bool isRegOrImmWithInt64InputMods() const {
249     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
250   }
251 
252   bool isRegOrImmWithFP16InputMods() const {
253     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
254   }
255 
256   bool isRegOrImmWithFP32InputMods() const {
257     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
258   }
259 
260   bool isRegOrImmWithFP64InputMods() const {
261     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
262   }
263 
264   bool isVReg() const {
265     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
266            isRegClass(AMDGPU::VReg_64RegClassID) ||
267            isRegClass(AMDGPU::VReg_96RegClassID) ||
268            isRegClass(AMDGPU::VReg_128RegClassID) ||
269            isRegClass(AMDGPU::VReg_256RegClassID) ||
270            isRegClass(AMDGPU::VReg_512RegClassID);
271   }
272 
273   bool isVReg32() const {
274     return isRegClass(AMDGPU::VGPR_32RegClassID);
275   }
276 
277   bool isVReg32OrOff() const {
278     return isOff() || isVReg32();
279   }
280 
281   bool isSDWAOperand(MVT type) const;
282   bool isSDWAFP16Operand() const;
283   bool isSDWAFP32Operand() const;
284   bool isSDWAInt16Operand() const;
285   bool isSDWAInt32Operand() const;
286 
287   bool isImmTy(ImmTy ImmT) const {
288     return isImm() && Imm.Type == ImmT;
289   }
290 
291   bool isImmModifier() const {
292     return isImm() && Imm.Type != ImmTyNone;
293   }
294 
295   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
296   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
297   bool isDMask() const { return isImmTy(ImmTyDMask); }
298   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
299   bool isDA() const { return isImmTy(ImmTyDA); }
300   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
301   bool isLWE() const { return isImmTy(ImmTyLWE); }
302   bool isOff() const { return isImmTy(ImmTyOff); }
303   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
304   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
305   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
306   bool isOffen() const { return isImmTy(ImmTyOffen); }
307   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
308   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
309   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
310   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
311   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
312 
313   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
314   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
315   bool isGDS() const { return isImmTy(ImmTyGDS); }
316   bool isLDS() const { return isImmTy(ImmTyLDS); }
317   bool isGLC() const { return isImmTy(ImmTyGLC); }
318   bool isSLC() const { return isImmTy(ImmTySLC); }
319   bool isTFE() const { return isImmTy(ImmTyTFE); }
320   bool isD16() const { return isImmTy(ImmTyD16); }
321   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
322   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
323   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
324   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
325   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
326   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
327   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
328   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
329   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
330   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
331   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
332   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
333   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
334   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
335   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
336   bool isHigh() const { return isImmTy(ImmTyHigh); }
337 
338   bool isMod() const {
339     return isClampSI() || isOModSI();
340   }
341 
342   bool isRegOrImm() const {
343     return isReg() || isImm();
344   }
345 
346   bool isRegClass(unsigned RCID) const;
347 
348   bool isInlineValue() const;
349 
350   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
351     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
352   }
353 
354   bool isSCSrcB16() const {
355     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
356   }
357 
358   bool isSCSrcV2B16() const {
359     return isSCSrcB16();
360   }
361 
362   bool isSCSrcB32() const {
363     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
364   }
365 
366   bool isSCSrcB64() const {
367     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
368   }
369 
370   bool isSCSrcF16() const {
371     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
372   }
373 
374   bool isSCSrcV2F16() const {
375     return isSCSrcF16();
376   }
377 
378   bool isSCSrcF32() const {
379     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
380   }
381 
382   bool isSCSrcF64() const {
383     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
384   }
385 
386   bool isSSrcB32() const {
387     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
388   }
389 
390   bool isSSrcB16() const {
391     return isSCSrcB16() || isLiteralImm(MVT::i16);
392   }
393 
394   bool isSSrcV2B16() const {
395     llvm_unreachable("cannot happen");
396     return isSSrcB16();
397   }
398 
399   bool isSSrcB64() const {
400     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
401     // See isVSrc64().
402     return isSCSrcB64() || isLiteralImm(MVT::i64);
403   }
404 
405   bool isSSrcF32() const {
406     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
407   }
408 
409   bool isSSrcF64() const {
410     return isSCSrcB64() || isLiteralImm(MVT::f64);
411   }
412 
413   bool isSSrcF16() const {
414     return isSCSrcB16() || isLiteralImm(MVT::f16);
415   }
416 
417   bool isSSrcV2F16() const {
418     llvm_unreachable("cannot happen");
419     return isSSrcF16();
420   }
421 
422   bool isSSrcOrLdsB32() const {
423     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
424            isLiteralImm(MVT::i32) || isExpr();
425   }
426 
427   bool isVCSrcB32() const {
428     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
429   }
430 
431   bool isVCSrcB64() const {
432     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
433   }
434 
435   bool isVCSrcB16() const {
436     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
437   }
438 
439   bool isVCSrcV2B16() const {
440     return isVCSrcB16();
441   }
442 
443   bool isVCSrcF32() const {
444     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
445   }
446 
447   bool isVCSrcF64() const {
448     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
449   }
450 
451   bool isVCSrcF16() const {
452     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
453   }
454 
455   bool isVCSrcV2F16() const {
456     return isVCSrcF16();
457   }
458 
459   bool isVSrcB32() const {
460     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
461   }
462 
463   bool isVSrcB64() const {
464     return isVCSrcF64() || isLiteralImm(MVT::i64);
465   }
466 
467   bool isVSrcB16() const {
468     return isVCSrcF16() || isLiteralImm(MVT::i16);
469   }
470 
471   bool isVSrcV2B16() const {
472     llvm_unreachable("cannot happen");
473     return isVSrcB16();
474   }
475 
476   bool isVSrcF32() const {
477     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
478   }
479 
480   bool isVSrcF64() const {
481     return isVCSrcF64() || isLiteralImm(MVT::f64);
482   }
483 
484   bool isVSrcF16() const {
485     return isVCSrcF16() || isLiteralImm(MVT::f16);
486   }
487 
488   bool isVSrcV2F16() const {
489     llvm_unreachable("cannot happen");
490     return isVSrcF16();
491   }
492 
493   bool isKImmFP32() const {
494     return isLiteralImm(MVT::f32);
495   }
496 
497   bool isKImmFP16() const {
498     return isLiteralImm(MVT::f16);
499   }
500 
501   bool isMem() const override {
502     return false;
503   }
504 
505   bool isExpr() const {
506     return Kind == Expression;
507   }
508 
509   bool isSoppBrTarget() const {
510     return isExpr() || isImm();
511   }
512 
513   bool isSWaitCnt() const;
514   bool isHwreg() const;
515   bool isSendMsg() const;
516   bool isSwizzle() const;
517   bool isSMRDOffset8() const;
518   bool isSMRDOffset20() const;
519   bool isSMRDLiteralOffset() const;
520   bool isDPPCtrl() const;
521   bool isGPRIdxMode() const;
522   bool isS16Imm() const;
523   bool isU16Imm() const;
524   bool isEndpgm() const;
525 
526   StringRef getExpressionAsToken() const {
527     assert(isExpr());
528     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
529     return S->getSymbol().getName();
530   }
531 
532   StringRef getToken() const {
533     assert(isToken());
534 
535     if (Kind == Expression)
536       return getExpressionAsToken();
537 
538     return StringRef(Tok.Data, Tok.Length);
539   }
540 
541   int64_t getImm() const {
542     assert(isImm());
543     return Imm.Val;
544   }
545 
546   ImmTy getImmTy() const {
547     assert(isImm());
548     return Imm.Type;
549   }
550 
551   unsigned getReg() const override {
552     assert(isRegKind());
553     return Reg.RegNo;
554   }
555 
556   SMLoc getStartLoc() const override {
557     return StartLoc;
558   }
559 
560   SMLoc getEndLoc() const override {
561     return EndLoc;
562   }
563 
564   SMRange getLocRange() const {
565     return SMRange(StartLoc, EndLoc);
566   }
567 
568   Modifiers getModifiers() const {
569     assert(isRegKind() || isImmTy(ImmTyNone));
570     return isRegKind() ? Reg.Mods : Imm.Mods;
571   }
572 
573   void setModifiers(Modifiers Mods) {
574     assert(isRegKind() || isImmTy(ImmTyNone));
575     if (isRegKind())
576       Reg.Mods = Mods;
577     else
578       Imm.Mods = Mods;
579   }
580 
581   bool hasModifiers() const {
582     return getModifiers().hasModifiers();
583   }
584 
585   bool hasFPModifiers() const {
586     return getModifiers().hasFPModifiers();
587   }
588 
589   bool hasIntModifiers() const {
590     return getModifiers().hasIntModifiers();
591   }
592 
593   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
594 
595   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
596 
597   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
598 
599   template <unsigned Bitwidth>
600   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
601 
602   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
603     addKImmFPOperands<16>(Inst, N);
604   }
605 
606   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
607     addKImmFPOperands<32>(Inst, N);
608   }
609 
610   void addRegOperands(MCInst &Inst, unsigned N) const;
611 
612   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
613     if (isRegKind())
614       addRegOperands(Inst, N);
615     else if (isExpr())
616       Inst.addOperand(MCOperand::createExpr(Expr));
617     else
618       addImmOperands(Inst, N);
619   }
620 
621   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
622     Modifiers Mods = getModifiers();
623     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
624     if (isRegKind()) {
625       addRegOperands(Inst, N);
626     } else {
627       addImmOperands(Inst, N, false);
628     }
629   }
630 
631   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
632     assert(!hasIntModifiers());
633     addRegOrImmWithInputModsOperands(Inst, N);
634   }
635 
636   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
637     assert(!hasFPModifiers());
638     addRegOrImmWithInputModsOperands(Inst, N);
639   }
640 
641   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
642     Modifiers Mods = getModifiers();
643     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
644     assert(isRegKind());
645     addRegOperands(Inst, N);
646   }
647 
648   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
649     assert(!hasIntModifiers());
650     addRegWithInputModsOperands(Inst, N);
651   }
652 
653   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
654     assert(!hasFPModifiers());
655     addRegWithInputModsOperands(Inst, N);
656   }
657 
658   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
659     if (isImm())
660       addImmOperands(Inst, N);
661     else {
662       assert(isExpr());
663       Inst.addOperand(MCOperand::createExpr(Expr));
664     }
665   }
666 
667   static void printImmTy(raw_ostream& OS, ImmTy Type) {
668     switch (Type) {
669     case ImmTyNone: OS << "None"; break;
670     case ImmTyGDS: OS << "GDS"; break;
671     case ImmTyLDS: OS << "LDS"; break;
672     case ImmTyOffen: OS << "Offen"; break;
673     case ImmTyIdxen: OS << "Idxen"; break;
674     case ImmTyAddr64: OS << "Addr64"; break;
675     case ImmTyOffset: OS << "Offset"; break;
676     case ImmTyInstOffset: OS << "InstOffset"; break;
677     case ImmTyOffset0: OS << "Offset0"; break;
678     case ImmTyOffset1: OS << "Offset1"; break;
679     case ImmTyGLC: OS << "GLC"; break;
680     case ImmTySLC: OS << "SLC"; break;
681     case ImmTyTFE: OS << "TFE"; break;
682     case ImmTyD16: OS << "D16"; break;
683     case ImmTyFORMAT: OS << "FORMAT"; break;
684     case ImmTyClampSI: OS << "ClampSI"; break;
685     case ImmTyOModSI: OS << "OModSI"; break;
686     case ImmTyDppCtrl: OS << "DppCtrl"; break;
687     case ImmTyDppRowMask: OS << "DppRowMask"; break;
688     case ImmTyDppBankMask: OS << "DppBankMask"; break;
689     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
690     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
691     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
692     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
693     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
694     case ImmTyDMask: OS << "DMask"; break;
695     case ImmTyUNorm: OS << "UNorm"; break;
696     case ImmTyDA: OS << "DA"; break;
697     case ImmTyR128A16: OS << "R128A16"; break;
698     case ImmTyLWE: OS << "LWE"; break;
699     case ImmTyOff: OS << "Off"; break;
700     case ImmTyExpTgt: OS << "ExpTgt"; break;
701     case ImmTyExpCompr: OS << "ExpCompr"; break;
702     case ImmTyExpVM: OS << "ExpVM"; break;
703     case ImmTyHwreg: OS << "Hwreg"; break;
704     case ImmTySendMsg: OS << "SendMsg"; break;
705     case ImmTyInterpSlot: OS << "InterpSlot"; break;
706     case ImmTyInterpAttr: OS << "InterpAttr"; break;
707     case ImmTyAttrChan: OS << "AttrChan"; break;
708     case ImmTyOpSel: OS << "OpSel"; break;
709     case ImmTyOpSelHi: OS << "OpSelHi"; break;
710     case ImmTyNegLo: OS << "NegLo"; break;
711     case ImmTyNegHi: OS << "NegHi"; break;
712     case ImmTySwizzle: OS << "Swizzle"; break;
713     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
714     case ImmTyHigh: OS << "High"; break;
715     case ImmTyEndpgm:
716       OS << "Endpgm";
717       break;
718     }
719   }
720 
721   void print(raw_ostream &OS) const override {
722     switch (Kind) {
723     case Register:
724       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
725       break;
726     case Immediate:
727       OS << '<' << getImm();
728       if (getImmTy() != ImmTyNone) {
729         OS << " type: "; printImmTy(OS, getImmTy());
730       }
731       OS << " mods: " << Imm.Mods << '>';
732       break;
733     case Token:
734       OS << '\'' << getToken() << '\'';
735       break;
736     case Expression:
737       OS << "<expr " << *Expr << '>';
738       break;
739     }
740   }
741 
742   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
743                                       int64_t Val, SMLoc Loc,
744                                       ImmTy Type = ImmTyNone,
745                                       bool IsFPImm = false) {
746     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
747     Op->Imm.Val = Val;
748     Op->Imm.IsFPImm = IsFPImm;
749     Op->Imm.Type = Type;
750     Op->Imm.Mods = Modifiers();
751     Op->StartLoc = Loc;
752     Op->EndLoc = Loc;
753     return Op;
754   }
755 
756   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
757                                         StringRef Str, SMLoc Loc,
758                                         bool HasExplicitEncodingSize = true) {
759     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
760     Res->Tok.Data = Str.data();
761     Res->Tok.Length = Str.size();
762     Res->StartLoc = Loc;
763     Res->EndLoc = Loc;
764     return Res;
765   }
766 
767   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
768                                       unsigned RegNo, SMLoc S,
769                                       SMLoc E) {
770     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
771     Op->Reg.RegNo = RegNo;
772     Op->Reg.Mods = Modifiers();
773     Op->StartLoc = S;
774     Op->EndLoc = E;
775     return Op;
776   }
777 
778   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
779                                        const class MCExpr *Expr, SMLoc S) {
780     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
781     Op->Expr = Expr;
782     Op->StartLoc = S;
783     Op->EndLoc = S;
784     return Op;
785   }
786 };
787 
788 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
789   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
790   return OS;
791 }
792 
793 //===----------------------------------------------------------------------===//
794 // AsmParser
795 //===----------------------------------------------------------------------===//
796 
797 // Holds info related to the current kernel, e.g. count of SGPRs used.
798 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
799 // .amdgpu_hsa_kernel or at EOF.
800 class KernelScopeInfo {
801   int SgprIndexUnusedMin = -1;
802   int VgprIndexUnusedMin = -1;
803   MCContext *Ctx = nullptr;
804 
805   void usesSgprAt(int i) {
806     if (i >= SgprIndexUnusedMin) {
807       SgprIndexUnusedMin = ++i;
808       if (Ctx) {
809         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
810         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
811       }
812     }
813   }
814 
815   void usesVgprAt(int i) {
816     if (i >= VgprIndexUnusedMin) {
817       VgprIndexUnusedMin = ++i;
818       if (Ctx) {
819         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
820         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
821       }
822     }
823   }
824 
825 public:
826   KernelScopeInfo() = default;
827 
828   void initialize(MCContext &Context) {
829     Ctx = &Context;
830     usesSgprAt(SgprIndexUnusedMin = -1);
831     usesVgprAt(VgprIndexUnusedMin = -1);
832   }
833 
834   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
835     switch (RegKind) {
836       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
837       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
838       default: break;
839     }
840   }
841 };
842 
843 class AMDGPUAsmParser : public MCTargetAsmParser {
844   MCAsmParser &Parser;
845 
846   // Number of extra operands parsed after the first optional operand.
847   // This may be necessary to skip hardcoded mandatory operands.
848   static const unsigned MAX_OPR_LOOKAHEAD = 8;
849 
850   unsigned ForcedEncodingSize = 0;
851   bool ForcedDPP = false;
852   bool ForcedSDWA = false;
853   KernelScopeInfo KernelScope;
854 
855   /// @name Auto-generated Match Functions
856   /// {
857 
858 #define GET_ASSEMBLER_HEADER
859 #include "AMDGPUGenAsmMatcher.inc"
860 
861   /// }
862 
863 private:
864   bool ParseAsAbsoluteExpression(uint32_t &Ret);
865   bool OutOfRangeError(SMRange Range);
866   /// Calculate VGPR/SGPR blocks required for given target, reserved
867   /// registers, and user-specified NextFreeXGPR values.
868   ///
869   /// \param Features [in] Target features, used for bug corrections.
870   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
871   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
872   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
873   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
874   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
875   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
876   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
877   /// \param VGPRBlocks [out] Result VGPR block count.
878   /// \param SGPRBlocks [out] Result SGPR block count.
879   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
880                           bool FlatScrUsed, bool XNACKUsed,
881                           unsigned NextFreeVGPR, SMRange VGPRRange,
882                           unsigned NextFreeSGPR, SMRange SGPRRange,
883                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
884   bool ParseDirectiveAMDGCNTarget();
885   bool ParseDirectiveAMDHSAKernel();
886   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
887   bool ParseDirectiveHSACodeObjectVersion();
888   bool ParseDirectiveHSACodeObjectISA();
889   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
890   bool ParseDirectiveAMDKernelCodeT();
891   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
892   bool ParseDirectiveAMDGPUHsaKernel();
893 
894   bool ParseDirectiveISAVersion();
895   bool ParseDirectiveHSAMetadata();
896   bool ParseDirectivePALMetadataBegin();
897   bool ParseDirectivePALMetadata();
898 
899   /// Common code to parse out a block of text (typically YAML) between start and
900   /// end directives.
901   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
902                            const char *AssemblerDirectiveEnd,
903                            std::string &CollectString);
904 
905   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
906                              RegisterKind RegKind, unsigned Reg1,
907                              unsigned RegNum);
908   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
909                            unsigned& RegNum, unsigned& RegWidth,
910                            unsigned *DwordRegIndex);
911   bool isRegister();
912   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
913   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
914   void initializeGprCountSymbol(RegisterKind RegKind);
915   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
916                              unsigned RegWidth);
917   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
918                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
919   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
920                  bool IsGdsHardcoded);
921 
922 public:
923   enum AMDGPUMatchResultTy {
924     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
925   };
926 
927   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
928 
929   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
930                const MCInstrInfo &MII,
931                const MCTargetOptions &Options)
932       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
933     MCAsmParserExtension::Initialize(Parser);
934 
935     if (getFeatureBits().none()) {
936       // Set default features.
937       copySTI().ToggleFeature("southern-islands");
938     }
939 
940     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
941 
942     {
943       // TODO: make those pre-defined variables read-only.
944       // Currently there is none suitable machinery in the core llvm-mc for this.
945       // MCSymbol::isRedefinable is intended for another purpose, and
946       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
947       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
948       MCContext &Ctx = getContext();
949       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
950         MCSymbol *Sym =
951             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
952         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
953         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
954         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
955         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
956         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
957       } else {
958         MCSymbol *Sym =
959             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
960         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
961         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
962         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
963         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
964         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
965       }
966       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
967         initializeGprCountSymbol(IS_VGPR);
968         initializeGprCountSymbol(IS_SGPR);
969       } else
970         KernelScope.initialize(getContext());
971     }
972   }
973 
974   bool hasXNACK() const {
975     return AMDGPU::hasXNACK(getSTI());
976   }
977 
978   bool hasMIMG_R128() const {
979     return AMDGPU::hasMIMG_R128(getSTI());
980   }
981 
982   bool hasPackedD16() const {
983     return AMDGPU::hasPackedD16(getSTI());
984   }
985 
986   bool isSI() const {
987     return AMDGPU::isSI(getSTI());
988   }
989 
990   bool isCI() const {
991     return AMDGPU::isCI(getSTI());
992   }
993 
994   bool isVI() const {
995     return AMDGPU::isVI(getSTI());
996   }
997 
998   bool isGFX9() const {
999     return AMDGPU::isGFX9(getSTI());
1000   }
1001 
1002   bool hasInv2PiInlineImm() const {
1003     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1004   }
1005 
1006   bool hasFlatOffsets() const {
1007     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1008   }
1009 
1010   bool hasSGPR102_SGPR103() const {
1011     return !isVI();
1012   }
1013 
1014   bool hasIntClamp() const {
1015     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1016   }
1017 
1018   AMDGPUTargetStreamer &getTargetStreamer() {
1019     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1020     return static_cast<AMDGPUTargetStreamer &>(TS);
1021   }
1022 
1023   const MCRegisterInfo *getMRI() const {
1024     // We need this const_cast because for some reason getContext() is not const
1025     // in MCAsmParser.
1026     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1027   }
1028 
1029   const MCInstrInfo *getMII() const {
1030     return &MII;
1031   }
1032 
1033   const FeatureBitset &getFeatureBits() const {
1034     return getSTI().getFeatureBits();
1035   }
1036 
1037   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1038   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1039   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1040 
1041   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1042   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1043   bool isForcedDPP() const { return ForcedDPP; }
1044   bool isForcedSDWA() const { return ForcedSDWA; }
1045   ArrayRef<unsigned> getMatchedVariants() const;
1046 
1047   std::unique_ptr<AMDGPUOperand> parseRegister();
1048   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1049   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1050   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1051                                       unsigned Kind) override;
1052   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1053                                OperandVector &Operands, MCStreamer &Out,
1054                                uint64_t &ErrorInfo,
1055                                bool MatchingInlineAsm) override;
1056   bool ParseDirective(AsmToken DirectiveID) override;
1057   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
1058   StringRef parseMnemonicSuffix(StringRef Name);
1059   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1060                         SMLoc NameLoc, OperandVector &Operands) override;
1061   //bool ProcessInstruction(MCInst &Inst);
1062 
1063   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1064 
1065   OperandMatchResultTy
1066   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1067                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1068                      bool (*ConvertResult)(int64_t &) = nullptr);
1069 
1070   OperandMatchResultTy parseOperandArrayWithPrefix(
1071     const char *Prefix,
1072     OperandVector &Operands,
1073     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1074     bool (*ConvertResult)(int64_t&) = nullptr);
1075 
1076   OperandMatchResultTy
1077   parseNamedBit(const char *Name, OperandVector &Operands,
1078                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1079   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1080                                              StringRef &Value);
1081 
1082   bool parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier = false);
1083   bool parseSP3NegModifier();
1084   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1085   OperandMatchResultTy parseReg(OperandVector &Operands);
1086   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1087   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1088   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1089   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1090   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1091   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1092   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1093 
1094   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1095   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1096   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1097   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1098 
1099   bool parseCnt(int64_t &IntVal);
1100   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1101   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1102 
1103 private:
1104   struct OperandInfoTy {
1105     int64_t Id;
1106     bool IsSymbolic = false;
1107 
1108     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1109   };
1110 
1111   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1112   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1113 
1114   void errorExpTgt();
1115   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1116 
1117   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1118   bool validateSOPLiteral(const MCInst &Inst) const;
1119   bool validateConstantBusLimitations(const MCInst &Inst);
1120   bool validateEarlyClobberLimitations(const MCInst &Inst);
1121   bool validateIntClampSupported(const MCInst &Inst);
1122   bool validateMIMGAtomicDMask(const MCInst &Inst);
1123   bool validateMIMGGatherDMask(const MCInst &Inst);
1124   bool validateMIMGDataSize(const MCInst &Inst);
1125   bool validateMIMGD16(const MCInst &Inst);
1126   bool validateLdsDirect(const MCInst &Inst);
1127   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1128   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1129   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1130 
1131   bool isId(const StringRef Id) const;
1132   bool isId(const AsmToken &Token, const StringRef Id) const;
1133   bool isToken(const AsmToken::TokenKind Kind) const;
1134   bool trySkipId(const StringRef Id);
1135   bool trySkipToken(const AsmToken::TokenKind Kind);
1136   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1137   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1138   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1139   AsmToken::TokenKind getTokenKind() const;
1140   bool parseExpr(int64_t &Imm);
1141   StringRef getTokenStr() const;
1142   AsmToken peekToken();
1143   AsmToken getToken() const;
1144   SMLoc getLoc() const;
1145   void lex();
1146 
1147 public:
1148   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1149   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1150 
1151   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1152   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1153   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1154   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1155   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1156 
1157   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1158                             const unsigned MinVal,
1159                             const unsigned MaxVal,
1160                             const StringRef ErrMsg);
1161   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1162   bool parseSwizzleOffset(int64_t &Imm);
1163   bool parseSwizzleMacro(int64_t &Imm);
1164   bool parseSwizzleQuadPerm(int64_t &Imm);
1165   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1166   bool parseSwizzleBroadcast(int64_t &Imm);
1167   bool parseSwizzleSwap(int64_t &Imm);
1168   bool parseSwizzleReverse(int64_t &Imm);
1169 
1170   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1171   int64_t parseGPRIdxMacro();
1172 
1173   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1174   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1175   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1176   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1177   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1178 
1179   AMDGPUOperand::Ptr defaultGLC() const;
1180   AMDGPUOperand::Ptr defaultSLC() const;
1181 
1182   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1183   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1184   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1185   AMDGPUOperand::Ptr defaultOffsetU12() const;
1186   AMDGPUOperand::Ptr defaultOffsetS13() const;
1187 
1188   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1189 
1190   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1191                OptionalImmIndexMap &OptionalIdx);
1192   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1193   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1194   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1195 
1196   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1197 
1198   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1199                bool IsAtomic = false);
1200   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1201 
1202   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1203   AMDGPUOperand::Ptr defaultRowMask() const;
1204   AMDGPUOperand::Ptr defaultBankMask() const;
1205   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1206   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1207 
1208   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1209                                     AMDGPUOperand::ImmTy Type);
1210   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1211   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1212   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1213   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1214   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1215   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1216                 uint64_t BasicInstType, bool skipVcc = false);
1217 
1218   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1219   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1220 };
1221 
1222 struct OptionalOperand {
1223   const char *Name;
1224   AMDGPUOperand::ImmTy Type;
1225   bool IsBit;
1226   bool (*ConvertResult)(int64_t&);
1227 };
1228 
1229 } // end anonymous namespace
1230 
1231 // May be called with integer type with equivalent bitwidth.
1232 static const fltSemantics *getFltSemantics(unsigned Size) {
1233   switch (Size) {
1234   case 4:
1235     return &APFloat::IEEEsingle();
1236   case 8:
1237     return &APFloat::IEEEdouble();
1238   case 2:
1239     return &APFloat::IEEEhalf();
1240   default:
1241     llvm_unreachable("unsupported fp type");
1242   }
1243 }
1244 
1245 static const fltSemantics *getFltSemantics(MVT VT) {
1246   return getFltSemantics(VT.getSizeInBits() / 8);
1247 }
1248 
1249 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1250   switch (OperandType) {
1251   case AMDGPU::OPERAND_REG_IMM_INT32:
1252   case AMDGPU::OPERAND_REG_IMM_FP32:
1253   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1254   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1255     return &APFloat::IEEEsingle();
1256   case AMDGPU::OPERAND_REG_IMM_INT64:
1257   case AMDGPU::OPERAND_REG_IMM_FP64:
1258   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1259   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1260     return &APFloat::IEEEdouble();
1261   case AMDGPU::OPERAND_REG_IMM_INT16:
1262   case AMDGPU::OPERAND_REG_IMM_FP16:
1263   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1264   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1265   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1266   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1267     return &APFloat::IEEEhalf();
1268   default:
1269     llvm_unreachable("unsupported fp type");
1270   }
1271 }
1272 
1273 //===----------------------------------------------------------------------===//
1274 // Operand
1275 //===----------------------------------------------------------------------===//
1276 
1277 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1278   bool Lost;
1279 
1280   // Convert literal to single precision
1281   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1282                                                APFloat::rmNearestTiesToEven,
1283                                                &Lost);
1284   // We allow precision lost but not overflow or underflow
1285   if (Status != APFloat::opOK &&
1286       Lost &&
1287       ((Status & APFloat::opOverflow)  != 0 ||
1288        (Status & APFloat::opUnderflow) != 0)) {
1289     return false;
1290   }
1291 
1292   return true;
1293 }
1294 
1295 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1296   return isUIntN(Size, Val) || isIntN(Size, Val);
1297 }
1298 
1299 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1300 
1301   // This is a hack to enable named inline values like
1302   // shared_base with both 32-bit and 64-bit operands.
1303   // Note that these values are defined as
1304   // 32-bit operands only.
1305   if (isInlineValue()) {
1306     return true;
1307   }
1308 
1309   if (!isImmTy(ImmTyNone)) {
1310     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1311     return false;
1312   }
1313   // TODO: We should avoid using host float here. It would be better to
1314   // check the float bit values which is what a few other places do.
1315   // We've had bot failures before due to weird NaN support on mips hosts.
1316 
1317   APInt Literal(64, Imm.Val);
1318 
1319   if (Imm.IsFPImm) { // We got fp literal token
1320     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1321       return AMDGPU::isInlinableLiteral64(Imm.Val,
1322                                           AsmParser->hasInv2PiInlineImm());
1323     }
1324 
1325     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1326     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1327       return false;
1328 
1329     if (type.getScalarSizeInBits() == 16) {
1330       return AMDGPU::isInlinableLiteral16(
1331         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1332         AsmParser->hasInv2PiInlineImm());
1333     }
1334 
1335     // Check if single precision literal is inlinable
1336     return AMDGPU::isInlinableLiteral32(
1337       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1338       AsmParser->hasInv2PiInlineImm());
1339   }
1340 
1341   // We got int literal token.
1342   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1343     return AMDGPU::isInlinableLiteral64(Imm.Val,
1344                                         AsmParser->hasInv2PiInlineImm());
1345   }
1346 
1347   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1348     return false;
1349   }
1350 
1351   if (type.getScalarSizeInBits() == 16) {
1352     return AMDGPU::isInlinableLiteral16(
1353       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1354       AsmParser->hasInv2PiInlineImm());
1355   }
1356 
1357   return AMDGPU::isInlinableLiteral32(
1358     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1359     AsmParser->hasInv2PiInlineImm());
1360 }
1361 
1362 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1363   // Check that this immediate can be added as literal
1364   if (!isImmTy(ImmTyNone)) {
1365     return false;
1366   }
1367 
1368   if (!Imm.IsFPImm) {
1369     // We got int literal token.
1370 
1371     if (type == MVT::f64 && hasFPModifiers()) {
1372       // Cannot apply fp modifiers to int literals preserving the same semantics
1373       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1374       // disable these cases.
1375       return false;
1376     }
1377 
1378     unsigned Size = type.getSizeInBits();
1379     if (Size == 64)
1380       Size = 32;
1381 
1382     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1383     // types.
1384     return isSafeTruncation(Imm.Val, Size);
1385   }
1386 
1387   // We got fp literal token
1388   if (type == MVT::f64) { // Expected 64-bit fp operand
1389     // We would set low 64-bits of literal to zeroes but we accept this literals
1390     return true;
1391   }
1392 
1393   if (type == MVT::i64) { // Expected 64-bit int operand
1394     // We don't allow fp literals in 64-bit integer instructions. It is
1395     // unclear how we should encode them.
1396     return false;
1397   }
1398 
1399   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1400   return canLosslesslyConvertToFPType(FPLiteral, type);
1401 }
1402 
1403 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1404   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1405 }
1406 
1407 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1408   if (AsmParser->isVI())
1409     return isVReg32();
1410   else if (AsmParser->isGFX9())
1411     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1412   else
1413     return false;
1414 }
1415 
1416 bool AMDGPUOperand::isSDWAFP16Operand() const {
1417   return isSDWAOperand(MVT::f16);
1418 }
1419 
1420 bool AMDGPUOperand::isSDWAFP32Operand() const {
1421   return isSDWAOperand(MVT::f32);
1422 }
1423 
1424 bool AMDGPUOperand::isSDWAInt16Operand() const {
1425   return isSDWAOperand(MVT::i16);
1426 }
1427 
1428 bool AMDGPUOperand::isSDWAInt32Operand() const {
1429   return isSDWAOperand(MVT::i32);
1430 }
1431 
1432 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1433 {
1434   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1435   assert(Size == 2 || Size == 4 || Size == 8);
1436 
1437   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1438 
1439   if (Imm.Mods.Abs) {
1440     Val &= ~FpSignMask;
1441   }
1442   if (Imm.Mods.Neg) {
1443     Val ^= FpSignMask;
1444   }
1445 
1446   return Val;
1447 }
1448 
1449 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1450   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1451                              Inst.getNumOperands())) {
1452     addLiteralImmOperand(Inst, Imm.Val,
1453                          ApplyModifiers &
1454                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1455   } else {
1456     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1457     Inst.addOperand(MCOperand::createImm(Imm.Val));
1458   }
1459 }
1460 
1461 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1462   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1463   auto OpNum = Inst.getNumOperands();
1464   // Check that this operand accepts literals
1465   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1466 
1467   if (ApplyModifiers) {
1468     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1469     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1470     Val = applyInputFPModifiers(Val, Size);
1471   }
1472 
1473   APInt Literal(64, Val);
1474   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1475 
1476   if (Imm.IsFPImm) { // We got fp literal token
1477     switch (OpTy) {
1478     case AMDGPU::OPERAND_REG_IMM_INT64:
1479     case AMDGPU::OPERAND_REG_IMM_FP64:
1480     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1481     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1482       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1483                                        AsmParser->hasInv2PiInlineImm())) {
1484         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1485         return;
1486       }
1487 
1488       // Non-inlineable
1489       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1490         // For fp operands we check if low 32 bits are zeros
1491         if (Literal.getLoBits(32) != 0) {
1492           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1493           "Can't encode literal as exact 64-bit floating-point operand. "
1494           "Low 32-bits will be set to zero");
1495         }
1496 
1497         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1498         return;
1499       }
1500 
1501       // We don't allow fp literals in 64-bit integer instructions. It is
1502       // unclear how we should encode them. This case should be checked earlier
1503       // in predicate methods (isLiteralImm())
1504       llvm_unreachable("fp literal in 64-bit integer instruction.");
1505 
1506     case AMDGPU::OPERAND_REG_IMM_INT32:
1507     case AMDGPU::OPERAND_REG_IMM_FP32:
1508     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1509     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1510     case AMDGPU::OPERAND_REG_IMM_INT16:
1511     case AMDGPU::OPERAND_REG_IMM_FP16:
1512     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1513     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1514     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1515     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1516       bool lost;
1517       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1518       // Convert literal to single precision
1519       FPLiteral.convert(*getOpFltSemantics(OpTy),
1520                         APFloat::rmNearestTiesToEven, &lost);
1521       // We allow precision lost but not overflow or underflow. This should be
1522       // checked earlier in isLiteralImm()
1523 
1524       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1525       Inst.addOperand(MCOperand::createImm(ImmVal));
1526       return;
1527     }
1528     default:
1529       llvm_unreachable("invalid operand size");
1530     }
1531 
1532     return;
1533   }
1534 
1535   // We got int literal token.
1536   // Only sign extend inline immediates.
1537   switch (OpTy) {
1538   case AMDGPU::OPERAND_REG_IMM_INT32:
1539   case AMDGPU::OPERAND_REG_IMM_FP32:
1540   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1541   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1542     if (isSafeTruncation(Val, 32) &&
1543         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1544                                      AsmParser->hasInv2PiInlineImm())) {
1545       Inst.addOperand(MCOperand::createImm(Val));
1546       return;
1547     }
1548 
1549     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1550     return;
1551 
1552   case AMDGPU::OPERAND_REG_IMM_INT64:
1553   case AMDGPU::OPERAND_REG_IMM_FP64:
1554   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1555   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1556     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1557       Inst.addOperand(MCOperand::createImm(Val));
1558       return;
1559     }
1560 
1561     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1562     return;
1563 
1564   case AMDGPU::OPERAND_REG_IMM_INT16:
1565   case AMDGPU::OPERAND_REG_IMM_FP16:
1566   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1567   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1568     if (isSafeTruncation(Val, 16) &&
1569         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1570                                      AsmParser->hasInv2PiInlineImm())) {
1571       Inst.addOperand(MCOperand::createImm(Val));
1572       return;
1573     }
1574 
1575     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1576     return;
1577 
1578   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1579   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1580     assert(isSafeTruncation(Val, 16));
1581     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1582                                         AsmParser->hasInv2PiInlineImm()));
1583 
1584     Inst.addOperand(MCOperand::createImm(Val));
1585     return;
1586   }
1587   default:
1588     llvm_unreachable("invalid operand size");
1589   }
1590 }
1591 
1592 template <unsigned Bitwidth>
1593 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1594   APInt Literal(64, Imm.Val);
1595 
1596   if (!Imm.IsFPImm) {
1597     // We got int literal token.
1598     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1599     return;
1600   }
1601 
1602   bool Lost;
1603   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1604   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1605                     APFloat::rmNearestTiesToEven, &Lost);
1606   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1607 }
1608 
1609 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1610   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1611 }
1612 
1613 static bool isInlineValue(unsigned Reg) {
1614   switch (Reg) {
1615   case AMDGPU::SRC_SHARED_BASE:
1616   case AMDGPU::SRC_SHARED_LIMIT:
1617   case AMDGPU::SRC_PRIVATE_BASE:
1618   case AMDGPU::SRC_PRIVATE_LIMIT:
1619   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1620     return true;
1621   default:
1622     return false;
1623   }
1624 }
1625 
1626 bool AMDGPUOperand::isInlineValue() const {
1627   return isRegKind() && ::isInlineValue(getReg());
1628 }
1629 
1630 //===----------------------------------------------------------------------===//
1631 // AsmParser
1632 //===----------------------------------------------------------------------===//
1633 
1634 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1635   if (Is == IS_VGPR) {
1636     switch (RegWidth) {
1637       default: return -1;
1638       case 1: return AMDGPU::VGPR_32RegClassID;
1639       case 2: return AMDGPU::VReg_64RegClassID;
1640       case 3: return AMDGPU::VReg_96RegClassID;
1641       case 4: return AMDGPU::VReg_128RegClassID;
1642       case 8: return AMDGPU::VReg_256RegClassID;
1643       case 16: return AMDGPU::VReg_512RegClassID;
1644     }
1645   } else if (Is == IS_TTMP) {
1646     switch (RegWidth) {
1647       default: return -1;
1648       case 1: return AMDGPU::TTMP_32RegClassID;
1649       case 2: return AMDGPU::TTMP_64RegClassID;
1650       case 4: return AMDGPU::TTMP_128RegClassID;
1651       case 8: return AMDGPU::TTMP_256RegClassID;
1652       case 16: return AMDGPU::TTMP_512RegClassID;
1653     }
1654   } else if (Is == IS_SGPR) {
1655     switch (RegWidth) {
1656       default: return -1;
1657       case 1: return AMDGPU::SGPR_32RegClassID;
1658       case 2: return AMDGPU::SGPR_64RegClassID;
1659       case 4: return AMDGPU::SGPR_128RegClassID;
1660       case 8: return AMDGPU::SGPR_256RegClassID;
1661       case 16: return AMDGPU::SGPR_512RegClassID;
1662     }
1663   }
1664   return -1;
1665 }
1666 
1667 static unsigned getSpecialRegForName(StringRef RegName) {
1668   return StringSwitch<unsigned>(RegName)
1669     .Case("exec", AMDGPU::EXEC)
1670     .Case("vcc", AMDGPU::VCC)
1671     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1672     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1673     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1674     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1675     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1676     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1677     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1678     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1679     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1680     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1681     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1682     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1683     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1684     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1685     .Case("m0", AMDGPU::M0)
1686     .Case("scc", AMDGPU::SCC)
1687     .Case("tba", AMDGPU::TBA)
1688     .Case("tma", AMDGPU::TMA)
1689     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1690     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1691     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1692     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1693     .Case("vcc_lo", AMDGPU::VCC_LO)
1694     .Case("vcc_hi", AMDGPU::VCC_HI)
1695     .Case("exec_lo", AMDGPU::EXEC_LO)
1696     .Case("exec_hi", AMDGPU::EXEC_HI)
1697     .Case("tma_lo", AMDGPU::TMA_LO)
1698     .Case("tma_hi", AMDGPU::TMA_HI)
1699     .Case("tba_lo", AMDGPU::TBA_LO)
1700     .Case("tba_hi", AMDGPU::TBA_HI)
1701     .Default(0);
1702 }
1703 
1704 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1705                                     SMLoc &EndLoc) {
1706   auto R = parseRegister();
1707   if (!R) return true;
1708   assert(R->isReg());
1709   RegNo = R->getReg();
1710   StartLoc = R->getStartLoc();
1711   EndLoc = R->getEndLoc();
1712   return false;
1713 }
1714 
1715 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1716                                             RegisterKind RegKind, unsigned Reg1,
1717                                             unsigned RegNum) {
1718   switch (RegKind) {
1719   case IS_SPECIAL:
1720     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1721       Reg = AMDGPU::EXEC;
1722       RegWidth = 2;
1723       return true;
1724     }
1725     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1726       Reg = AMDGPU::FLAT_SCR;
1727       RegWidth = 2;
1728       return true;
1729     }
1730     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1731       Reg = AMDGPU::XNACK_MASK;
1732       RegWidth = 2;
1733       return true;
1734     }
1735     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1736       Reg = AMDGPU::VCC;
1737       RegWidth = 2;
1738       return true;
1739     }
1740     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1741       Reg = AMDGPU::TBA;
1742       RegWidth = 2;
1743       return true;
1744     }
1745     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1746       Reg = AMDGPU::TMA;
1747       RegWidth = 2;
1748       return true;
1749     }
1750     return false;
1751   case IS_VGPR:
1752   case IS_SGPR:
1753   case IS_TTMP:
1754     if (Reg1 != Reg + RegWidth) {
1755       return false;
1756     }
1757     RegWidth++;
1758     return true;
1759   default:
1760     llvm_unreachable("unexpected register kind");
1761   }
1762 }
1763 
1764 static const StringRef Registers[] = {
1765   { "v" },
1766   { "s" },
1767   { "ttmp" },
1768 };
1769 
1770 bool
1771 AMDGPUAsmParser::isRegister(const AsmToken &Token,
1772                             const AsmToken &NextToken) const {
1773 
1774   // A list of consecutive registers: [s0,s1,s2,s3]
1775   if (Token.is(AsmToken::LBrac))
1776     return true;
1777 
1778   if (!Token.is(AsmToken::Identifier))
1779     return false;
1780 
1781   // A single register like s0 or a range of registers like s[0:1]
1782 
1783   StringRef RegName = Token.getString();
1784 
1785   for (StringRef Reg : Registers) {
1786     if (RegName.startswith(Reg)) {
1787       if (Reg.size() < RegName.size()) {
1788         unsigned RegNum;
1789         // A single register with an index: rXX
1790         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
1791           return true;
1792       } else {
1793         // A range of registers: r[XX:YY].
1794         if (NextToken.is(AsmToken::LBrac))
1795           return true;
1796       }
1797     }
1798   }
1799 
1800   return getSpecialRegForName(RegName);
1801 }
1802 
1803 bool
1804 AMDGPUAsmParser::isRegister()
1805 {
1806   return isRegister(getToken(), peekToken());
1807 }
1808 
1809 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1810                                           unsigned &RegNum, unsigned &RegWidth,
1811                                           unsigned *DwordRegIndex) {
1812   if (DwordRegIndex) { *DwordRegIndex = 0; }
1813   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1814   if (getLexer().is(AsmToken::Identifier)) {
1815     StringRef RegName = Parser.getTok().getString();
1816     if ((Reg = getSpecialRegForName(RegName))) {
1817       Parser.Lex();
1818       RegKind = IS_SPECIAL;
1819     } else {
1820       unsigned RegNumIndex = 0;
1821       if (RegName[0] == 'v') {
1822         RegNumIndex = 1;
1823         RegKind = IS_VGPR;
1824       } else if (RegName[0] == 's') {
1825         RegNumIndex = 1;
1826         RegKind = IS_SGPR;
1827       } else if (RegName.startswith("ttmp")) {
1828         RegNumIndex = strlen("ttmp");
1829         RegKind = IS_TTMP;
1830       } else {
1831         return false;
1832       }
1833       if (RegName.size() > RegNumIndex) {
1834         // Single 32-bit register: vXX.
1835         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1836           return false;
1837         Parser.Lex();
1838         RegWidth = 1;
1839       } else {
1840         // Range of registers: v[XX:YY]. ":YY" is optional.
1841         Parser.Lex();
1842         int64_t RegLo, RegHi;
1843         if (getLexer().isNot(AsmToken::LBrac))
1844           return false;
1845         Parser.Lex();
1846 
1847         if (getParser().parseAbsoluteExpression(RegLo))
1848           return false;
1849 
1850         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1851         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1852           return false;
1853         Parser.Lex();
1854 
1855         if (isRBrace) {
1856           RegHi = RegLo;
1857         } else {
1858           if (getParser().parseAbsoluteExpression(RegHi))
1859             return false;
1860 
1861           if (getLexer().isNot(AsmToken::RBrac))
1862             return false;
1863           Parser.Lex();
1864         }
1865         RegNum = (unsigned) RegLo;
1866         RegWidth = (RegHi - RegLo) + 1;
1867       }
1868     }
1869   } else if (getLexer().is(AsmToken::LBrac)) {
1870     // List of consecutive registers: [s0,s1,s2,s3]
1871     Parser.Lex();
1872     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1873       return false;
1874     if (RegWidth != 1)
1875       return false;
1876     RegisterKind RegKind1;
1877     unsigned Reg1, RegNum1, RegWidth1;
1878     do {
1879       if (getLexer().is(AsmToken::Comma)) {
1880         Parser.Lex();
1881       } else if (getLexer().is(AsmToken::RBrac)) {
1882         Parser.Lex();
1883         break;
1884       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1885         if (RegWidth1 != 1) {
1886           return false;
1887         }
1888         if (RegKind1 != RegKind) {
1889           return false;
1890         }
1891         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1892           return false;
1893         }
1894       } else {
1895         return false;
1896       }
1897     } while (true);
1898   } else {
1899     return false;
1900   }
1901   switch (RegKind) {
1902   case IS_SPECIAL:
1903     RegNum = 0;
1904     RegWidth = 1;
1905     break;
1906   case IS_VGPR:
1907   case IS_SGPR:
1908   case IS_TTMP:
1909   {
1910     unsigned Size = 1;
1911     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1912       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1913       Size = std::min(RegWidth, 4u);
1914     }
1915     if (RegNum % Size != 0)
1916       return false;
1917     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1918     RegNum = RegNum / Size;
1919     int RCID = getRegClass(RegKind, RegWidth);
1920     if (RCID == -1)
1921       return false;
1922     const MCRegisterClass RC = TRI->getRegClass(RCID);
1923     if (RegNum >= RC.getNumRegs())
1924       return false;
1925     Reg = RC.getRegister(RegNum);
1926     break;
1927   }
1928 
1929   default:
1930     llvm_unreachable("unexpected register kind");
1931   }
1932 
1933   if (!subtargetHasRegister(*TRI, Reg))
1934     return false;
1935   return true;
1936 }
1937 
1938 Optional<StringRef>
1939 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1940   switch (RegKind) {
1941   case IS_VGPR:
1942     return StringRef(".amdgcn.next_free_vgpr");
1943   case IS_SGPR:
1944     return StringRef(".amdgcn.next_free_sgpr");
1945   default:
1946     return None;
1947   }
1948 }
1949 
1950 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1951   auto SymbolName = getGprCountSymbolName(RegKind);
1952   assert(SymbolName && "initializing invalid register kind");
1953   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1954   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1955 }
1956 
1957 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1958                                             unsigned DwordRegIndex,
1959                                             unsigned RegWidth) {
1960   // Symbols are only defined for GCN targets
1961   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
1962     return true;
1963 
1964   auto SymbolName = getGprCountSymbolName(RegKind);
1965   if (!SymbolName)
1966     return true;
1967   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1968 
1969   int64_t NewMax = DwordRegIndex + RegWidth - 1;
1970   int64_t OldCount;
1971 
1972   if (!Sym->isVariable())
1973     return !Error(getParser().getTok().getLoc(),
1974                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
1975   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
1976     return !Error(
1977         getParser().getTok().getLoc(),
1978         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
1979 
1980   if (OldCount <= NewMax)
1981     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
1982 
1983   return true;
1984 }
1985 
1986 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1987   const auto &Tok = Parser.getTok();
1988   SMLoc StartLoc = Tok.getLoc();
1989   SMLoc EndLoc = Tok.getEndLoc();
1990   RegisterKind RegKind;
1991   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1992 
1993   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1994     //FIXME: improve error messages (bug 41303).
1995     Error(StartLoc, "not a valid operand.");
1996     return nullptr;
1997   }
1998   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1999     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2000       return nullptr;
2001   } else
2002     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2003   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2004 }
2005 
2006 bool
2007 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier) {
2008   if (HasSP3AbsModifier) {
2009     // This is a workaround for handling expressions
2010     // as arguments of SP3 'abs' modifier, for example:
2011     //     |1.0|
2012     //     |-1|
2013     //     |1+x|
2014     // This syntax is not compatible with syntax of standard
2015     // MC expressions (due to the trailing '|').
2016 
2017     SMLoc EndLoc;
2018     const MCExpr *Expr;
2019     SMLoc StartLoc = getLoc();
2020 
2021     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
2022       return true;
2023     }
2024 
2025     if (!Expr->evaluateAsAbsolute(Val))
2026       return Error(StartLoc, "expected absolute expression");
2027 
2028     return false;
2029   }
2030 
2031   return getParser().parseAbsoluteExpression(Val);
2032 }
2033 
2034 OperandMatchResultTy
2035 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2036   // TODO: add syntactic sugar for 1/(2*PI)
2037 
2038   const auto& Tok = getToken();
2039   const auto& NextTok = peekToken();
2040   bool IsReal = Tok.is(AsmToken::Real);
2041   SMLoc S = Tok.getLoc();
2042   bool Negate = false;
2043 
2044   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2045     lex();
2046     IsReal = true;
2047     Negate = true;
2048   }
2049 
2050   if (IsReal) {
2051     // Floating-point expressions are not supported.
2052     // Can only allow floating-point literals with an
2053     // optional sign.
2054 
2055     StringRef Num = getTokenStr();
2056     lex();
2057 
2058     APFloat RealVal(APFloat::IEEEdouble());
2059     auto roundMode = APFloat::rmNearestTiesToEven;
2060     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2061       return MatchOperand_ParseFail;
2062     }
2063     if (Negate)
2064       RealVal.changeSign();
2065 
2066     Operands.push_back(
2067       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2068                                AMDGPUOperand::ImmTyNone, true));
2069 
2070     return MatchOperand_Success;
2071 
2072     // FIXME: Should enable arbitrary expressions here
2073   } else if (Tok.is(AsmToken::Integer) ||
2074              (Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Integer))){
2075 
2076     int64_t IntVal;
2077     if (parseAbsoluteExpr(IntVal, HasSP3AbsModifier))
2078       return MatchOperand_ParseFail;
2079 
2080     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2081     return MatchOperand_Success;
2082   }
2083 
2084   return MatchOperand_NoMatch;
2085 }
2086 
2087 OperandMatchResultTy
2088 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2089   if (!isRegister())
2090     return MatchOperand_NoMatch;
2091 
2092   if (auto R = parseRegister()) {
2093     assert(R->isReg());
2094     Operands.push_back(std::move(R));
2095     return MatchOperand_Success;
2096   }
2097   return MatchOperand_ParseFail;
2098 }
2099 
2100 OperandMatchResultTy
2101 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2102   auto res = parseReg(Operands);
2103   return (res == MatchOperand_NoMatch)?
2104          parseImm(Operands, HasSP3AbsMod) :
2105          res;
2106 }
2107 
2108 // Check if the current token is an SP3 'neg' modifier.
2109 // Currently this modifier is allowed in the following context:
2110 //
2111 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2112 // 2. Before an 'abs' modifier: -abs(...)
2113 // 3. Before an SP3 'abs' modifier: -|...|
2114 //
2115 // In all other cases "-" is handled as a part
2116 // of an expression that follows the sign.
2117 //
2118 // Note: When "-" is followed by an integer literal,
2119 // this is interpreted as integer negation rather
2120 // than a floating-point NEG modifier applied to N.
2121 // Beside being contr-intuitive, such use of floating-point
2122 // NEG modifier would have resulted in different meaning
2123 // of integer literals used with VOP1/2/C and VOP3,
2124 // for example:
2125 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2126 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2127 // Negative fp literals with preceding "-" are
2128 // handled likewise for unifomtity
2129 //
2130 bool
2131 AMDGPUAsmParser::parseSP3NegModifier() {
2132 
2133   AsmToken NextToken[2];
2134   peekTokens(NextToken);
2135 
2136   if (isToken(AsmToken::Minus) &&
2137       (isRegister(NextToken[0], NextToken[1]) ||
2138        NextToken[0].is(AsmToken::Pipe) ||
2139        isId(NextToken[0], "abs"))) {
2140     lex();
2141     return true;
2142   }
2143 
2144   return false;
2145 }
2146 
2147 OperandMatchResultTy
2148 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2149                                               bool AllowImm) {
2150   bool Neg, SP3Neg;
2151   bool Abs, SP3Abs;
2152   SMLoc Loc;
2153 
2154   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2155   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2156     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2157     return MatchOperand_ParseFail;
2158   }
2159 
2160   SP3Neg = parseSP3NegModifier();
2161 
2162   Loc = getLoc();
2163   Neg = trySkipId("neg");
2164   if (Neg && SP3Neg) {
2165     Error(Loc, "expected register or immediate");
2166     return MatchOperand_ParseFail;
2167   }
2168   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2169     return MatchOperand_ParseFail;
2170 
2171   Abs = trySkipId("abs");
2172   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2173     return MatchOperand_ParseFail;
2174 
2175   Loc = getLoc();
2176   SP3Abs = trySkipToken(AsmToken::Pipe);
2177   if (Abs && SP3Abs) {
2178     Error(Loc, "expected register or immediate");
2179     return MatchOperand_ParseFail;
2180   }
2181 
2182   OperandMatchResultTy Res;
2183   if (AllowImm) {
2184     Res = parseRegOrImm(Operands, SP3Abs);
2185   } else {
2186     Res = parseReg(Operands);
2187   }
2188   if (Res != MatchOperand_Success) {
2189     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2190   }
2191 
2192   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2193     return MatchOperand_ParseFail;
2194   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2195     return MatchOperand_ParseFail;
2196   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2197     return MatchOperand_ParseFail;
2198 
2199   AMDGPUOperand::Modifiers Mods;
2200   Mods.Abs = Abs || SP3Abs;
2201   Mods.Neg = Neg || SP3Neg;
2202 
2203   if (Mods.hasFPModifiers()) {
2204     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2205     Op.setModifiers(Mods);
2206   }
2207   return MatchOperand_Success;
2208 }
2209 
2210 OperandMatchResultTy
2211 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2212                                                bool AllowImm) {
2213   bool Sext = trySkipId("sext");
2214   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2215     return MatchOperand_ParseFail;
2216 
2217   OperandMatchResultTy Res;
2218   if (AllowImm) {
2219     Res = parseRegOrImm(Operands);
2220   } else {
2221     Res = parseReg(Operands);
2222   }
2223   if (Res != MatchOperand_Success) {
2224     return Sext? MatchOperand_ParseFail : Res;
2225   }
2226 
2227   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2228     return MatchOperand_ParseFail;
2229 
2230   AMDGPUOperand::Modifiers Mods;
2231   Mods.Sext = Sext;
2232 
2233   if (Mods.hasIntModifiers()) {
2234     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2235     Op.setModifiers(Mods);
2236   }
2237 
2238   return MatchOperand_Success;
2239 }
2240 
2241 OperandMatchResultTy
2242 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2243   return parseRegOrImmWithFPInputMods(Operands, false);
2244 }
2245 
2246 OperandMatchResultTy
2247 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2248   return parseRegOrImmWithIntInputMods(Operands, false);
2249 }
2250 
2251 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2252   auto Loc = getLoc();
2253   if (trySkipId("off")) {
2254     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2255                                                 AMDGPUOperand::ImmTyOff, false));
2256     return MatchOperand_Success;
2257   }
2258 
2259   if (!isRegister())
2260     return MatchOperand_NoMatch;
2261 
2262   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2263   if (Reg) {
2264     Operands.push_back(std::move(Reg));
2265     return MatchOperand_Success;
2266   }
2267 
2268   return MatchOperand_ParseFail;
2269 
2270 }
2271 
2272 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2273   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2274 
2275   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2276       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2277       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2278       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2279     return Match_InvalidOperand;
2280 
2281   if ((TSFlags & SIInstrFlags::VOP3) &&
2282       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2283       getForcedEncodingSize() != 64)
2284     return Match_PreferE32;
2285 
2286   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2287       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2288     // v_mac_f32/16 allow only dst_sel == DWORD;
2289     auto OpNum =
2290         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2291     const auto &Op = Inst.getOperand(OpNum);
2292     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2293       return Match_InvalidOperand;
2294     }
2295   }
2296 
2297   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2298     // FIXME: Produces error without correct column reported.
2299     auto OpNum =
2300         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2301     const auto &Op = Inst.getOperand(OpNum);
2302     if (Op.getImm() != 0)
2303       return Match_InvalidOperand;
2304   }
2305 
2306   return Match_Success;
2307 }
2308 
2309 // What asm variants we should check
2310 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2311   if (getForcedEncodingSize() == 32) {
2312     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2313     return makeArrayRef(Variants);
2314   }
2315 
2316   if (isForcedVOP3()) {
2317     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2318     return makeArrayRef(Variants);
2319   }
2320 
2321   if (isForcedSDWA()) {
2322     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2323                                         AMDGPUAsmVariants::SDWA9};
2324     return makeArrayRef(Variants);
2325   }
2326 
2327   if (isForcedDPP()) {
2328     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2329     return makeArrayRef(Variants);
2330   }
2331 
2332   static const unsigned Variants[] = {
2333     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2334     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2335   };
2336 
2337   return makeArrayRef(Variants);
2338 }
2339 
2340 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2341   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2342   const unsigned Num = Desc.getNumImplicitUses();
2343   for (unsigned i = 0; i < Num; ++i) {
2344     unsigned Reg = Desc.ImplicitUses[i];
2345     switch (Reg) {
2346     case AMDGPU::FLAT_SCR:
2347     case AMDGPU::VCC:
2348     case AMDGPU::M0:
2349       return Reg;
2350     default:
2351       break;
2352     }
2353   }
2354   return AMDGPU::NoRegister;
2355 }
2356 
2357 // NB: This code is correct only when used to check constant
2358 // bus limitations because GFX7 support no f16 inline constants.
2359 // Note that there are no cases when a GFX7 opcode violates
2360 // constant bus limitations due to the use of an f16 constant.
2361 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2362                                        unsigned OpIdx) const {
2363   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2364 
2365   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2366     return false;
2367   }
2368 
2369   const MCOperand &MO = Inst.getOperand(OpIdx);
2370 
2371   int64_t Val = MO.getImm();
2372   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2373 
2374   switch (OpSize) { // expected operand size
2375   case 8:
2376     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2377   case 4:
2378     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2379   case 2: {
2380     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2381     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2382         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2383       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2384     } else {
2385       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2386     }
2387   }
2388   default:
2389     llvm_unreachable("invalid operand size");
2390   }
2391 }
2392 
2393 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2394   const MCOperand &MO = Inst.getOperand(OpIdx);
2395   if (MO.isImm()) {
2396     return !isInlineConstant(Inst, OpIdx);
2397   }
2398   return !MO.isReg() ||
2399          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2400 }
2401 
2402 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2403   const unsigned Opcode = Inst.getOpcode();
2404   const MCInstrDesc &Desc = MII.get(Opcode);
2405   unsigned ConstantBusUseCount = 0;
2406 
2407   if (Desc.TSFlags &
2408       (SIInstrFlags::VOPC |
2409        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2410        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2411        SIInstrFlags::SDWA)) {
2412     // Check special imm operands (used by madmk, etc)
2413     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2414       ++ConstantBusUseCount;
2415     }
2416 
2417     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2418     if (SGPRUsed != AMDGPU::NoRegister) {
2419       ++ConstantBusUseCount;
2420     }
2421 
2422     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2423     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2424     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2425 
2426     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2427 
2428     for (int OpIdx : OpIndices) {
2429       if (OpIdx == -1) break;
2430 
2431       const MCOperand &MO = Inst.getOperand(OpIdx);
2432       if (usesConstantBus(Inst, OpIdx)) {
2433         if (MO.isReg()) {
2434           const unsigned Reg = mc2PseudoReg(MO.getReg());
2435           // Pairs of registers with a partial intersections like these
2436           //   s0, s[0:1]
2437           //   flat_scratch_lo, flat_scratch
2438           //   flat_scratch_lo, flat_scratch_hi
2439           // are theoretically valid but they are disabled anyway.
2440           // Note that this code mimics SIInstrInfo::verifyInstruction
2441           if (Reg != SGPRUsed) {
2442             ++ConstantBusUseCount;
2443           }
2444           SGPRUsed = Reg;
2445         } else { // Expression or a literal
2446           ++ConstantBusUseCount;
2447         }
2448       }
2449     }
2450   }
2451 
2452   return ConstantBusUseCount <= 1;
2453 }
2454 
2455 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2456   const unsigned Opcode = Inst.getOpcode();
2457   const MCInstrDesc &Desc = MII.get(Opcode);
2458 
2459   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2460   if (DstIdx == -1 ||
2461       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2462     return true;
2463   }
2464 
2465   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2466 
2467   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2468   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2469   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2470 
2471   assert(DstIdx != -1);
2472   const MCOperand &Dst = Inst.getOperand(DstIdx);
2473   assert(Dst.isReg());
2474   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2475 
2476   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2477 
2478   for (int SrcIdx : SrcIndices) {
2479     if (SrcIdx == -1) break;
2480     const MCOperand &Src = Inst.getOperand(SrcIdx);
2481     if (Src.isReg()) {
2482       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2483       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2484         return false;
2485       }
2486     }
2487   }
2488 
2489   return true;
2490 }
2491 
2492 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2493 
2494   const unsigned Opc = Inst.getOpcode();
2495   const MCInstrDesc &Desc = MII.get(Opc);
2496 
2497   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2498     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2499     assert(ClampIdx != -1);
2500     return Inst.getOperand(ClampIdx).getImm() == 0;
2501   }
2502 
2503   return true;
2504 }
2505 
2506 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2507 
2508   const unsigned Opc = Inst.getOpcode();
2509   const MCInstrDesc &Desc = MII.get(Opc);
2510 
2511   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2512     return true;
2513 
2514   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2515   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2516   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2517 
2518   assert(VDataIdx != -1);
2519   assert(DMaskIdx != -1);
2520   assert(TFEIdx != -1);
2521 
2522   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2523   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2524   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2525   if (DMask == 0)
2526     DMask = 1;
2527 
2528   unsigned DataSize =
2529     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2530   if (hasPackedD16()) {
2531     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2532     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2533       DataSize = (DataSize + 1) / 2;
2534   }
2535 
2536   return (VDataSize / 4) == DataSize + TFESize;
2537 }
2538 
2539 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2540 
2541   const unsigned Opc = Inst.getOpcode();
2542   const MCInstrDesc &Desc = MII.get(Opc);
2543 
2544   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2545     return true;
2546   if (!Desc.mayLoad() || !Desc.mayStore())
2547     return true; // Not atomic
2548 
2549   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2550   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2551 
2552   // This is an incomplete check because image_atomic_cmpswap
2553   // may only use 0x3 and 0xf while other atomic operations
2554   // may use 0x1 and 0x3. However these limitations are
2555   // verified when we check that dmask matches dst size.
2556   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2557 }
2558 
2559 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2560 
2561   const unsigned Opc = Inst.getOpcode();
2562   const MCInstrDesc &Desc = MII.get(Opc);
2563 
2564   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2565     return true;
2566 
2567   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2568   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2569 
2570   // GATHER4 instructions use dmask in a different fashion compared to
2571   // other MIMG instructions. The only useful DMASK values are
2572   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2573   // (red,red,red,red) etc.) The ISA document doesn't mention
2574   // this.
2575   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2576 }
2577 
2578 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2579 
2580   const unsigned Opc = Inst.getOpcode();
2581   const MCInstrDesc &Desc = MII.get(Opc);
2582 
2583   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2584     return true;
2585 
2586   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2587   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2588     if (isCI() || isSI())
2589       return false;
2590   }
2591 
2592   return true;
2593 }
2594 
2595 static bool IsRevOpcode(const unsigned Opcode)
2596 {
2597   switch (Opcode) {
2598   case AMDGPU::V_SUBREV_F32_e32:
2599   case AMDGPU::V_SUBREV_F32_e64:
2600   case AMDGPU::V_SUBREV_F32_e32_si:
2601   case AMDGPU::V_SUBREV_F32_e32_vi:
2602   case AMDGPU::V_SUBREV_F32_e64_si:
2603   case AMDGPU::V_SUBREV_F32_e64_vi:
2604   case AMDGPU::V_SUBREV_I32_e32:
2605   case AMDGPU::V_SUBREV_I32_e64:
2606   case AMDGPU::V_SUBREV_I32_e32_si:
2607   case AMDGPU::V_SUBREV_I32_e64_si:
2608   case AMDGPU::V_SUBBREV_U32_e32:
2609   case AMDGPU::V_SUBBREV_U32_e64:
2610   case AMDGPU::V_SUBBREV_U32_e32_si:
2611   case AMDGPU::V_SUBBREV_U32_e32_vi:
2612   case AMDGPU::V_SUBBREV_U32_e64_si:
2613   case AMDGPU::V_SUBBREV_U32_e64_vi:
2614   case AMDGPU::V_SUBREV_U32_e32:
2615   case AMDGPU::V_SUBREV_U32_e64:
2616   case AMDGPU::V_SUBREV_U32_e32_gfx9:
2617   case AMDGPU::V_SUBREV_U32_e32_vi:
2618   case AMDGPU::V_SUBREV_U32_e64_gfx9:
2619   case AMDGPU::V_SUBREV_U32_e64_vi:
2620   case AMDGPU::V_SUBREV_F16_e32:
2621   case AMDGPU::V_SUBREV_F16_e64:
2622   case AMDGPU::V_SUBREV_F16_e32_vi:
2623   case AMDGPU::V_SUBREV_F16_e64_vi:
2624   case AMDGPU::V_SUBREV_U16_e32:
2625   case AMDGPU::V_SUBREV_U16_e64:
2626   case AMDGPU::V_SUBREV_U16_e32_vi:
2627   case AMDGPU::V_SUBREV_U16_e64_vi:
2628   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
2629   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
2630   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
2631   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
2632   case AMDGPU::V_LSHLREV_B32_e32_si:
2633   case AMDGPU::V_LSHLREV_B32_e64_si:
2634   case AMDGPU::V_LSHLREV_B16_e32_vi:
2635   case AMDGPU::V_LSHLREV_B16_e64_vi:
2636   case AMDGPU::V_LSHLREV_B32_e32_vi:
2637   case AMDGPU::V_LSHLREV_B32_e64_vi:
2638   case AMDGPU::V_LSHLREV_B64_vi:
2639   case AMDGPU::V_LSHRREV_B32_e32_si:
2640   case AMDGPU::V_LSHRREV_B32_e64_si:
2641   case AMDGPU::V_LSHRREV_B16_e32_vi:
2642   case AMDGPU::V_LSHRREV_B16_e64_vi:
2643   case AMDGPU::V_LSHRREV_B32_e32_vi:
2644   case AMDGPU::V_LSHRREV_B32_e64_vi:
2645   case AMDGPU::V_LSHRREV_B64_vi:
2646   case AMDGPU::V_ASHRREV_I32_e64_si:
2647   case AMDGPU::V_ASHRREV_I32_e32_si:
2648   case AMDGPU::V_ASHRREV_I16_e32_vi:
2649   case AMDGPU::V_ASHRREV_I16_e64_vi:
2650   case AMDGPU::V_ASHRREV_I32_e32_vi:
2651   case AMDGPU::V_ASHRREV_I32_e64_vi:
2652   case AMDGPU::V_ASHRREV_I64_vi:
2653   case AMDGPU::V_PK_LSHLREV_B16_vi:
2654   case AMDGPU::V_PK_LSHRREV_B16_vi:
2655   case AMDGPU::V_PK_ASHRREV_I16_vi:
2656     return true;
2657   default:
2658     return false;
2659   }
2660 }
2661 
2662 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
2663 
2664   using namespace SIInstrFlags;
2665   const unsigned Opcode = Inst.getOpcode();
2666   const MCInstrDesc &Desc = MII.get(Opcode);
2667 
2668   // lds_direct register is defined so that it can be used
2669   // with 9-bit operands only. Ignore encodings which do not accept these.
2670   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
2671     return true;
2672 
2673   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2674   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2675   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2676 
2677   const int SrcIndices[] = { Src1Idx, Src2Idx };
2678 
2679   // lds_direct cannot be specified as either src1 or src2.
2680   for (int SrcIdx : SrcIndices) {
2681     if (SrcIdx == -1) break;
2682     const MCOperand &Src = Inst.getOperand(SrcIdx);
2683     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
2684       return false;
2685     }
2686   }
2687 
2688   if (Src0Idx == -1)
2689     return true;
2690 
2691   const MCOperand &Src = Inst.getOperand(Src0Idx);
2692   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
2693     return true;
2694 
2695   // lds_direct is specified as src0. Check additional limitations.
2696   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
2697 }
2698 
2699 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
2700   unsigned Opcode = Inst.getOpcode();
2701   const MCInstrDesc &Desc = MII.get(Opcode);
2702   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
2703     return true;
2704 
2705   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2706   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2707 
2708   const int OpIndices[] = { Src0Idx, Src1Idx };
2709 
2710   unsigned NumLiterals = 0;
2711   uint32_t LiteralValue;
2712 
2713   for (int OpIdx : OpIndices) {
2714     if (OpIdx == -1) break;
2715 
2716     const MCOperand &MO = Inst.getOperand(OpIdx);
2717     if (MO.isImm() &&
2718         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
2719         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
2720         !isInlineConstant(Inst, OpIdx)) {
2721       uint32_t Value = static_cast<uint32_t>(MO.getImm());
2722       if (NumLiterals == 0 || LiteralValue != Value) {
2723         LiteralValue = Value;
2724         ++NumLiterals;
2725       }
2726     }
2727   }
2728 
2729   return NumLiterals <= 1;
2730 }
2731 
2732 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2733                                           const SMLoc &IDLoc) {
2734   if (!validateLdsDirect(Inst)) {
2735     Error(IDLoc,
2736       "invalid use of lds_direct");
2737     return false;
2738   }
2739   if (!validateSOPLiteral(Inst)) {
2740     Error(IDLoc,
2741       "only one literal operand is allowed");
2742     return false;
2743   }
2744   if (!validateConstantBusLimitations(Inst)) {
2745     Error(IDLoc,
2746       "invalid operand (violates constant bus restrictions)");
2747     return false;
2748   }
2749   if (!validateEarlyClobberLimitations(Inst)) {
2750     Error(IDLoc,
2751       "destination must be different than all sources");
2752     return false;
2753   }
2754   if (!validateIntClampSupported(Inst)) {
2755     Error(IDLoc,
2756       "integer clamping is not supported on this GPU");
2757     return false;
2758   }
2759   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
2760   if (!validateMIMGD16(Inst)) {
2761     Error(IDLoc,
2762       "d16 modifier is not supported on this GPU");
2763     return false;
2764   }
2765   if (!validateMIMGDataSize(Inst)) {
2766     Error(IDLoc,
2767       "image data size does not match dmask and tfe");
2768     return false;
2769   }
2770   if (!validateMIMGAtomicDMask(Inst)) {
2771     Error(IDLoc,
2772       "invalid atomic image dmask");
2773     return false;
2774   }
2775   if (!validateMIMGGatherDMask(Inst)) {
2776     Error(IDLoc,
2777       "invalid image_gather dmask: only one bit must be set");
2778     return false;
2779   }
2780 
2781   return true;
2782 }
2783 
2784 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
2785                                             const FeatureBitset &FBS,
2786                                             unsigned VariantID = 0);
2787 
2788 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2789                                               OperandVector &Operands,
2790                                               MCStreamer &Out,
2791                                               uint64_t &ErrorInfo,
2792                                               bool MatchingInlineAsm) {
2793   MCInst Inst;
2794   unsigned Result = Match_Success;
2795   for (auto Variant : getMatchedVariants()) {
2796     uint64_t EI;
2797     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2798                                   Variant);
2799     // We order match statuses from least to most specific. We use most specific
2800     // status as resulting
2801     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2802     if ((R == Match_Success) ||
2803         (R == Match_PreferE32) ||
2804         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2805         (R == Match_InvalidOperand && Result != Match_MissingFeature
2806                                    && Result != Match_PreferE32) ||
2807         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2808                                    && Result != Match_MissingFeature
2809                                    && Result != Match_PreferE32)) {
2810       Result = R;
2811       ErrorInfo = EI;
2812     }
2813     if (R == Match_Success)
2814       break;
2815   }
2816 
2817   switch (Result) {
2818   default: break;
2819   case Match_Success:
2820     if (!validateInstruction(Inst, IDLoc)) {
2821       return true;
2822     }
2823     Inst.setLoc(IDLoc);
2824     Out.EmitInstruction(Inst, getSTI());
2825     return false;
2826 
2827   case Match_MissingFeature:
2828     return Error(IDLoc, "instruction not supported on this GPU");
2829 
2830   case Match_MnemonicFail: {
2831     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
2832     std::string Suggestion = AMDGPUMnemonicSpellCheck(
2833         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
2834     return Error(IDLoc, "invalid instruction" + Suggestion,
2835                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
2836   }
2837 
2838   case Match_InvalidOperand: {
2839     SMLoc ErrorLoc = IDLoc;
2840     if (ErrorInfo != ~0ULL) {
2841       if (ErrorInfo >= Operands.size()) {
2842         return Error(IDLoc, "too few operands for instruction");
2843       }
2844       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2845       if (ErrorLoc == SMLoc())
2846         ErrorLoc = IDLoc;
2847     }
2848     return Error(ErrorLoc, "invalid operand for instruction");
2849   }
2850 
2851   case Match_PreferE32:
2852     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2853                         "should be encoded as e32");
2854   }
2855   llvm_unreachable("Implement any new match types added!");
2856 }
2857 
2858 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2859   int64_t Tmp = -1;
2860   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2861     return true;
2862   }
2863   if (getParser().parseAbsoluteExpression(Tmp)) {
2864     return true;
2865   }
2866   Ret = static_cast<uint32_t>(Tmp);
2867   return false;
2868 }
2869 
2870 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2871                                                uint32_t &Minor) {
2872   if (ParseAsAbsoluteExpression(Major))
2873     return TokError("invalid major version");
2874 
2875   if (getLexer().isNot(AsmToken::Comma))
2876     return TokError("minor version number required, comma expected");
2877   Lex();
2878 
2879   if (ParseAsAbsoluteExpression(Minor))
2880     return TokError("invalid minor version");
2881 
2882   return false;
2883 }
2884 
2885 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
2886   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2887     return TokError("directive only supported for amdgcn architecture");
2888 
2889   std::string Target;
2890 
2891   SMLoc TargetStart = getTok().getLoc();
2892   if (getParser().parseEscapedString(Target))
2893     return true;
2894   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
2895 
2896   std::string ExpectedTarget;
2897   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
2898   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
2899 
2900   if (Target != ExpectedTargetOS.str())
2901     return getParser().Error(TargetRange.Start, "target must match options",
2902                              TargetRange);
2903 
2904   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
2905   return false;
2906 }
2907 
2908 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
2909   return getParser().Error(Range.Start, "value out of range", Range);
2910 }
2911 
2912 bool AMDGPUAsmParser::calculateGPRBlocks(
2913     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
2914     bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
2915     unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
2916     unsigned &SGPRBlocks) {
2917   // TODO(scott.linder): These calculations are duplicated from
2918   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
2919   IsaVersion Version = getIsaVersion(getSTI().getCPU());
2920 
2921   unsigned NumVGPRs = NextFreeVGPR;
2922   unsigned NumSGPRs = NextFreeSGPR;
2923   unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
2924 
2925   if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
2926       NumSGPRs > MaxAddressableNumSGPRs)
2927     return OutOfRangeError(SGPRRange);
2928 
2929   NumSGPRs +=
2930       IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
2931 
2932   if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
2933       NumSGPRs > MaxAddressableNumSGPRs)
2934     return OutOfRangeError(SGPRRange);
2935 
2936   if (Features.test(FeatureSGPRInitBug))
2937     NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
2938 
2939   VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
2940   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
2941 
2942   return false;
2943 }
2944 
2945 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
2946   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2947     return TokError("directive only supported for amdgcn architecture");
2948 
2949   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
2950     return TokError("directive only supported for amdhsa OS");
2951 
2952   StringRef KernelName;
2953   if (getParser().parseIdentifier(KernelName))
2954     return true;
2955 
2956   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
2957 
2958   StringSet<> Seen;
2959 
2960   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
2961 
2962   SMRange VGPRRange;
2963   uint64_t NextFreeVGPR = 0;
2964   SMRange SGPRRange;
2965   uint64_t NextFreeSGPR = 0;
2966   unsigned UserSGPRCount = 0;
2967   bool ReserveVCC = true;
2968   bool ReserveFlatScr = true;
2969   bool ReserveXNACK = hasXNACK();
2970 
2971   while (true) {
2972     while (getLexer().is(AsmToken::EndOfStatement))
2973       Lex();
2974 
2975     if (getLexer().isNot(AsmToken::Identifier))
2976       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
2977 
2978     StringRef ID = getTok().getIdentifier();
2979     SMRange IDRange = getTok().getLocRange();
2980     Lex();
2981 
2982     if (ID == ".end_amdhsa_kernel")
2983       break;
2984 
2985     if (Seen.find(ID) != Seen.end())
2986       return TokError(".amdhsa_ directives cannot be repeated");
2987     Seen.insert(ID);
2988 
2989     SMLoc ValStart = getTok().getLoc();
2990     int64_t IVal;
2991     if (getParser().parseAbsoluteExpression(IVal))
2992       return true;
2993     SMLoc ValEnd = getTok().getLoc();
2994     SMRange ValRange = SMRange(ValStart, ValEnd);
2995 
2996     if (IVal < 0)
2997       return OutOfRangeError(ValRange);
2998 
2999     uint64_t Val = IVal;
3000 
3001 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3002   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3003     return OutOfRangeError(RANGE);                                             \
3004   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3005 
3006     if (ID == ".amdhsa_group_segment_fixed_size") {
3007       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3008         return OutOfRangeError(ValRange);
3009       KD.group_segment_fixed_size = Val;
3010     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3011       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3012         return OutOfRangeError(ValRange);
3013       KD.private_segment_fixed_size = Val;
3014     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3015       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3016                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3017                        Val, ValRange);
3018       UserSGPRCount += 4;
3019     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3020       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3021                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3022                        ValRange);
3023       UserSGPRCount += 2;
3024     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3025       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3026                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3027                        ValRange);
3028       UserSGPRCount += 2;
3029     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3030       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3031                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3032                        Val, ValRange);
3033       UserSGPRCount += 2;
3034     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3035       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3036                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3037                        ValRange);
3038       UserSGPRCount += 2;
3039     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3040       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3041                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3042                        ValRange);
3043       UserSGPRCount += 2;
3044     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3045       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3046                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3047                        Val, ValRange);
3048       UserSGPRCount += 1;
3049     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3050       PARSE_BITS_ENTRY(
3051           KD.compute_pgm_rsrc2,
3052           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3053           ValRange);
3054     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3055       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3056                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3057                        ValRange);
3058     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3059       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3060                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3061                        ValRange);
3062     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3063       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3064                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3065                        ValRange);
3066     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3067       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3068                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3069                        ValRange);
3070     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3071       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3072                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3073                        ValRange);
3074     } else if (ID == ".amdhsa_next_free_vgpr") {
3075       VGPRRange = ValRange;
3076       NextFreeVGPR = Val;
3077     } else if (ID == ".amdhsa_next_free_sgpr") {
3078       SGPRRange = ValRange;
3079       NextFreeSGPR = Val;
3080     } else if (ID == ".amdhsa_reserve_vcc") {
3081       if (!isUInt<1>(Val))
3082         return OutOfRangeError(ValRange);
3083       ReserveVCC = Val;
3084     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3085       if (IVersion.Major < 7)
3086         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3087                                  IDRange);
3088       if (!isUInt<1>(Val))
3089         return OutOfRangeError(ValRange);
3090       ReserveFlatScr = Val;
3091     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3092       if (IVersion.Major < 8)
3093         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3094                                  IDRange);
3095       if (!isUInt<1>(Val))
3096         return OutOfRangeError(ValRange);
3097       ReserveXNACK = Val;
3098     } else if (ID == ".amdhsa_float_round_mode_32") {
3099       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3100                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3101     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3102       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3103                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3104     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3105       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3106                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3107     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3108       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3109                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3110                        ValRange);
3111     } else if (ID == ".amdhsa_dx10_clamp") {
3112       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3113                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3114     } else if (ID == ".amdhsa_ieee_mode") {
3115       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3116                        Val, ValRange);
3117     } else if (ID == ".amdhsa_fp16_overflow") {
3118       if (IVersion.Major < 9)
3119         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3120                                  IDRange);
3121       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3122                        ValRange);
3123     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3124       PARSE_BITS_ENTRY(
3125           KD.compute_pgm_rsrc2,
3126           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3127           ValRange);
3128     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3129       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3130                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3131                        Val, ValRange);
3132     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3133       PARSE_BITS_ENTRY(
3134           KD.compute_pgm_rsrc2,
3135           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3136           ValRange);
3137     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3138       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3139                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3140                        Val, ValRange);
3141     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3142       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3143                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3144                        Val, ValRange);
3145     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3146       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3147                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3148                        Val, ValRange);
3149     } else if (ID == ".amdhsa_exception_int_div_zero") {
3150       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3151                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3152                        Val, ValRange);
3153     } else {
3154       return getParser().Error(IDRange.Start,
3155                                "unknown .amdhsa_kernel directive", IDRange);
3156     }
3157 
3158 #undef PARSE_BITS_ENTRY
3159   }
3160 
3161   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3162     return TokError(".amdhsa_next_free_vgpr directive is required");
3163 
3164   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3165     return TokError(".amdhsa_next_free_sgpr directive is required");
3166 
3167   unsigned VGPRBlocks;
3168   unsigned SGPRBlocks;
3169   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3170                          ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
3171                          SGPRRange, VGPRBlocks, SGPRBlocks))
3172     return true;
3173 
3174   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3175           VGPRBlocks))
3176     return OutOfRangeError(VGPRRange);
3177   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3178                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3179 
3180   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3181           SGPRBlocks))
3182     return OutOfRangeError(SGPRRange);
3183   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3184                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3185                   SGPRBlocks);
3186 
3187   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3188     return TokError("too many user SGPRs enabled");
3189   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3190                   UserSGPRCount);
3191 
3192   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3193       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3194       ReserveFlatScr, ReserveXNACK);
3195   return false;
3196 }
3197 
3198 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3199   uint32_t Major;
3200   uint32_t Minor;
3201 
3202   if (ParseDirectiveMajorMinor(Major, Minor))
3203     return true;
3204 
3205   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3206   return false;
3207 }
3208 
3209 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3210   uint32_t Major;
3211   uint32_t Minor;
3212   uint32_t Stepping;
3213   StringRef VendorName;
3214   StringRef ArchName;
3215 
3216   // If this directive has no arguments, then use the ISA version for the
3217   // targeted GPU.
3218   if (getLexer().is(AsmToken::EndOfStatement)) {
3219     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3220     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3221                                                       ISA.Stepping,
3222                                                       "AMD", "AMDGPU");
3223     return false;
3224   }
3225 
3226   if (ParseDirectiveMajorMinor(Major, Minor))
3227     return true;
3228 
3229   if (getLexer().isNot(AsmToken::Comma))
3230     return TokError("stepping version number required, comma expected");
3231   Lex();
3232 
3233   if (ParseAsAbsoluteExpression(Stepping))
3234     return TokError("invalid stepping version");
3235 
3236   if (getLexer().isNot(AsmToken::Comma))
3237     return TokError("vendor name required, comma expected");
3238   Lex();
3239 
3240   if (getLexer().isNot(AsmToken::String))
3241     return TokError("invalid vendor name");
3242 
3243   VendorName = getLexer().getTok().getStringContents();
3244   Lex();
3245 
3246   if (getLexer().isNot(AsmToken::Comma))
3247     return TokError("arch name required, comma expected");
3248   Lex();
3249 
3250   if (getLexer().isNot(AsmToken::String))
3251     return TokError("invalid arch name");
3252 
3253   ArchName = getLexer().getTok().getStringContents();
3254   Lex();
3255 
3256   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3257                                                     VendorName, ArchName);
3258   return false;
3259 }
3260 
3261 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3262                                                amd_kernel_code_t &Header) {
3263   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3264   // assembly for backwards compatibility.
3265   if (ID == "max_scratch_backing_memory_byte_size") {
3266     Parser.eatToEndOfStatement();
3267     return false;
3268   }
3269 
3270   SmallString<40> ErrStr;
3271   raw_svector_ostream Err(ErrStr);
3272   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3273     return TokError(Err.str());
3274   }
3275   Lex();
3276   return false;
3277 }
3278 
3279 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3280   amd_kernel_code_t Header;
3281   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3282 
3283   while (true) {
3284     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3285     // will set the current token to EndOfStatement.
3286     while(getLexer().is(AsmToken::EndOfStatement))
3287       Lex();
3288 
3289     if (getLexer().isNot(AsmToken::Identifier))
3290       return TokError("expected value identifier or .end_amd_kernel_code_t");
3291 
3292     StringRef ID = getLexer().getTok().getIdentifier();
3293     Lex();
3294 
3295     if (ID == ".end_amd_kernel_code_t")
3296       break;
3297 
3298     if (ParseAMDKernelCodeTValue(ID, Header))
3299       return true;
3300   }
3301 
3302   getTargetStreamer().EmitAMDKernelCodeT(Header);
3303 
3304   return false;
3305 }
3306 
3307 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3308   if (getLexer().isNot(AsmToken::Identifier))
3309     return TokError("expected symbol name");
3310 
3311   StringRef KernelName = Parser.getTok().getString();
3312 
3313   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3314                                            ELF::STT_AMDGPU_HSA_KERNEL);
3315   Lex();
3316   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3317     KernelScope.initialize(getContext());
3318   return false;
3319 }
3320 
3321 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3322   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3323     return Error(getParser().getTok().getLoc(),
3324                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3325                  "architectures");
3326   }
3327 
3328   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3329 
3330   std::string ISAVersionStringFromSTI;
3331   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3332   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3333 
3334   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3335     return Error(getParser().getTok().getLoc(),
3336                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3337                  "arguments specified through the command line");
3338   }
3339 
3340   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3341   Lex();
3342 
3343   return false;
3344 }
3345 
3346 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3347   const char *AssemblerDirectiveBegin;
3348   const char *AssemblerDirectiveEnd;
3349   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3350       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3351           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3352                             HSAMD::V3::AssemblerDirectiveEnd)
3353           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3354                             HSAMD::AssemblerDirectiveEnd);
3355 
3356   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3357     return Error(getParser().getTok().getLoc(),
3358                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3359                  "not available on non-amdhsa OSes")).str());
3360   }
3361 
3362   std::string HSAMetadataString;
3363   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
3364                           HSAMetadataString))
3365     return true;
3366 
3367   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3368     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3369       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3370   } else {
3371     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3372       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3373   }
3374 
3375   return false;
3376 }
3377 
3378 /// Common code to parse out a block of text (typically YAML) between start and
3379 /// end directives.
3380 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
3381                                           const char *AssemblerDirectiveEnd,
3382                                           std::string &CollectString) {
3383 
3384   raw_string_ostream CollectStream(CollectString);
3385 
3386   getLexer().setSkipSpace(false);
3387 
3388   bool FoundEnd = false;
3389   while (!getLexer().is(AsmToken::Eof)) {
3390     while (getLexer().is(AsmToken::Space)) {
3391       CollectStream << getLexer().getTok().getString();
3392       Lex();
3393     }
3394 
3395     if (getLexer().is(AsmToken::Identifier)) {
3396       StringRef ID = getLexer().getTok().getIdentifier();
3397       if (ID == AssemblerDirectiveEnd) {
3398         Lex();
3399         FoundEnd = true;
3400         break;
3401       }
3402     }
3403 
3404     CollectStream << Parser.parseStringToEndOfStatement()
3405                   << getContext().getAsmInfo()->getSeparatorString();
3406 
3407     Parser.eatToEndOfStatement();
3408   }
3409 
3410   getLexer().setSkipSpace(true);
3411 
3412   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3413     return TokError(Twine("expected directive ") +
3414                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
3415   }
3416 
3417   CollectStream.flush();
3418   return false;
3419 }
3420 
3421 /// Parse the assembler directive for new MsgPack-format PAL metadata.
3422 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
3423   std::string String;
3424   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
3425                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
3426     return true;
3427 
3428   auto PALMetadata = getTargetStreamer().getPALMetadata();
3429   if (!PALMetadata->setFromString(String))
3430     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
3431   return false;
3432 }
3433 
3434 /// Parse the assembler directive for old linear-format PAL metadata.
3435 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3436   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3437     return Error(getParser().getTok().getLoc(),
3438                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3439                  "not available on non-amdpal OSes")).str());
3440   }
3441 
3442   auto PALMetadata = getTargetStreamer().getPALMetadata();
3443   PALMetadata->setLegacy();
3444   for (;;) {
3445     uint32_t Key, Value;
3446     if (ParseAsAbsoluteExpression(Key)) {
3447       return TokError(Twine("invalid value in ") +
3448                       Twine(PALMD::AssemblerDirective));
3449     }
3450     if (getLexer().isNot(AsmToken::Comma)) {
3451       return TokError(Twine("expected an even number of values in ") +
3452                       Twine(PALMD::AssemblerDirective));
3453     }
3454     Lex();
3455     if (ParseAsAbsoluteExpression(Value)) {
3456       return TokError(Twine("invalid value in ") +
3457                       Twine(PALMD::AssemblerDirective));
3458     }
3459     PALMetadata->setRegister(Key, Value);
3460     if (getLexer().isNot(AsmToken::Comma))
3461       break;
3462     Lex();
3463   }
3464   return false;
3465 }
3466 
3467 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3468   StringRef IDVal = DirectiveID.getString();
3469 
3470   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3471     if (IDVal == ".amdgcn_target")
3472       return ParseDirectiveAMDGCNTarget();
3473 
3474     if (IDVal == ".amdhsa_kernel")
3475       return ParseDirectiveAMDHSAKernel();
3476 
3477     // TODO: Restructure/combine with PAL metadata directive.
3478     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3479       return ParseDirectiveHSAMetadata();
3480   } else {
3481     if (IDVal == ".hsa_code_object_version")
3482       return ParseDirectiveHSACodeObjectVersion();
3483 
3484     if (IDVal == ".hsa_code_object_isa")
3485       return ParseDirectiveHSACodeObjectISA();
3486 
3487     if (IDVal == ".amd_kernel_code_t")
3488       return ParseDirectiveAMDKernelCodeT();
3489 
3490     if (IDVal == ".amdgpu_hsa_kernel")
3491       return ParseDirectiveAMDGPUHsaKernel();
3492 
3493     if (IDVal == ".amd_amdgpu_isa")
3494       return ParseDirectiveISAVersion();
3495 
3496     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3497       return ParseDirectiveHSAMetadata();
3498   }
3499 
3500   if (IDVal == PALMD::AssemblerDirectiveBegin)
3501     return ParseDirectivePALMetadataBegin();
3502 
3503   if (IDVal == PALMD::AssemblerDirective)
3504     return ParseDirectivePALMetadata();
3505 
3506   return true;
3507 }
3508 
3509 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3510                                            unsigned RegNo) const {
3511 
3512   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3513        R.isValid(); ++R) {
3514     if (*R == RegNo)
3515       return isGFX9();
3516   }
3517 
3518   switch (RegNo) {
3519   case AMDGPU::TBA:
3520   case AMDGPU::TBA_LO:
3521   case AMDGPU::TBA_HI:
3522   case AMDGPU::TMA:
3523   case AMDGPU::TMA_LO:
3524   case AMDGPU::TMA_HI:
3525     return !isGFX9();
3526   case AMDGPU::XNACK_MASK:
3527   case AMDGPU::XNACK_MASK_LO:
3528   case AMDGPU::XNACK_MASK_HI:
3529     return !isCI() && !isSI() && hasXNACK();
3530   default:
3531     break;
3532   }
3533 
3534   if (isInlineValue(RegNo))
3535     return !isCI() && !isSI() && !isVI();
3536 
3537   if (isCI())
3538     return true;
3539 
3540   if (isSI()) {
3541     // No flat_scr
3542     switch (RegNo) {
3543     case AMDGPU::FLAT_SCR:
3544     case AMDGPU::FLAT_SCR_LO:
3545     case AMDGPU::FLAT_SCR_HI:
3546       return false;
3547     default:
3548       return true;
3549     }
3550   }
3551 
3552   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3553   // SI/CI have.
3554   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3555        R.isValid(); ++R) {
3556     if (*R == RegNo)
3557       return false;
3558   }
3559 
3560   return true;
3561 }
3562 
3563 OperandMatchResultTy
3564 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
3565   // Try to parse with a custom parser
3566   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3567 
3568   // If we successfully parsed the operand or if there as an error parsing,
3569   // we are done.
3570   //
3571   // If we are parsing after we reach EndOfStatement then this means we
3572   // are appending default values to the Operands list.  This is only done
3573   // by custom parser, so we shouldn't continue on to the generic parsing.
3574   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3575       getLexer().is(AsmToken::EndOfStatement))
3576     return ResTy;
3577 
3578   ResTy = parseRegOrImm(Operands);
3579 
3580   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
3581     return ResTy;
3582 
3583   const auto &Tok = Parser.getTok();
3584   SMLoc S = Tok.getLoc();
3585 
3586   const MCExpr *Expr = nullptr;
3587   if (!Parser.parseExpression(Expr)) {
3588     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3589     return MatchOperand_Success;
3590   }
3591 
3592   // Possibly this is an instruction flag like 'gds'.
3593   if (Tok.getKind() == AsmToken::Identifier) {
3594     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
3595     Parser.Lex();
3596     return MatchOperand_Success;
3597   }
3598 
3599   return MatchOperand_NoMatch;
3600 }
3601 
3602 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3603   // Clear any forced encodings from the previous instruction.
3604   setForcedEncodingSize(0);
3605   setForcedDPP(false);
3606   setForcedSDWA(false);
3607 
3608   if (Name.endswith("_e64")) {
3609     setForcedEncodingSize(64);
3610     return Name.substr(0, Name.size() - 4);
3611   } else if (Name.endswith("_e32")) {
3612     setForcedEncodingSize(32);
3613     return Name.substr(0, Name.size() - 4);
3614   } else if (Name.endswith("_dpp")) {
3615     setForcedDPP(true);
3616     return Name.substr(0, Name.size() - 4);
3617   } else if (Name.endswith("_sdwa")) {
3618     setForcedSDWA(true);
3619     return Name.substr(0, Name.size() - 5);
3620   }
3621   return Name;
3622 }
3623 
3624 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
3625                                        StringRef Name,
3626                                        SMLoc NameLoc, OperandVector &Operands) {
3627   // Add the instruction mnemonic
3628   Name = parseMnemonicSuffix(Name);
3629   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
3630 
3631   while (!getLexer().is(AsmToken::EndOfStatement)) {
3632     OperandMatchResultTy Res = parseOperand(Operands, Name);
3633 
3634     // Eat the comma or space if there is one.
3635     if (getLexer().is(AsmToken::Comma))
3636       Parser.Lex();
3637 
3638     switch (Res) {
3639       case MatchOperand_Success: break;
3640       case MatchOperand_ParseFail:
3641         Error(getLexer().getLoc(), "failed parsing operand.");
3642         while (!getLexer().is(AsmToken::EndOfStatement)) {
3643           Parser.Lex();
3644         }
3645         return true;
3646       case MatchOperand_NoMatch:
3647         Error(getLexer().getLoc(), "not a valid operand.");
3648         while (!getLexer().is(AsmToken::EndOfStatement)) {
3649           Parser.Lex();
3650         }
3651         return true;
3652     }
3653   }
3654 
3655   return false;
3656 }
3657 
3658 //===----------------------------------------------------------------------===//
3659 // Utility functions
3660 //===----------------------------------------------------------------------===//
3661 
3662 OperandMatchResultTy
3663 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
3664   switch(getLexer().getKind()) {
3665     default: return MatchOperand_NoMatch;
3666     case AsmToken::Identifier: {
3667       StringRef Name = Parser.getTok().getString();
3668       if (!Name.equals(Prefix)) {
3669         return MatchOperand_NoMatch;
3670       }
3671 
3672       Parser.Lex();
3673       if (getLexer().isNot(AsmToken::Colon))
3674         return MatchOperand_ParseFail;
3675 
3676       Parser.Lex();
3677 
3678       bool IsMinus = false;
3679       if (getLexer().getKind() == AsmToken::Minus) {
3680         Parser.Lex();
3681         IsMinus = true;
3682       }
3683 
3684       if (getLexer().isNot(AsmToken::Integer))
3685         return MatchOperand_ParseFail;
3686 
3687       if (getParser().parseAbsoluteExpression(Int))
3688         return MatchOperand_ParseFail;
3689 
3690       if (IsMinus)
3691         Int = -Int;
3692       break;
3693     }
3694   }
3695   return MatchOperand_Success;
3696 }
3697 
3698 OperandMatchResultTy
3699 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
3700                                     AMDGPUOperand::ImmTy ImmTy,
3701                                     bool (*ConvertResult)(int64_t&)) {
3702   SMLoc S = Parser.getTok().getLoc();
3703   int64_t Value = 0;
3704 
3705   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
3706   if (Res != MatchOperand_Success)
3707     return Res;
3708 
3709   if (ConvertResult && !ConvertResult(Value)) {
3710     return MatchOperand_ParseFail;
3711   }
3712 
3713   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
3714   return MatchOperand_Success;
3715 }
3716 
3717 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
3718   const char *Prefix,
3719   OperandVector &Operands,
3720   AMDGPUOperand::ImmTy ImmTy,
3721   bool (*ConvertResult)(int64_t&)) {
3722   StringRef Name = Parser.getTok().getString();
3723   if (!Name.equals(Prefix))
3724     return MatchOperand_NoMatch;
3725 
3726   Parser.Lex();
3727   if (getLexer().isNot(AsmToken::Colon))
3728     return MatchOperand_ParseFail;
3729 
3730   Parser.Lex();
3731   if (getLexer().isNot(AsmToken::LBrac))
3732     return MatchOperand_ParseFail;
3733   Parser.Lex();
3734 
3735   unsigned Val = 0;
3736   SMLoc S = Parser.getTok().getLoc();
3737 
3738   // FIXME: How to verify the number of elements matches the number of src
3739   // operands?
3740   for (int I = 0; I < 4; ++I) {
3741     if (I != 0) {
3742       if (getLexer().is(AsmToken::RBrac))
3743         break;
3744 
3745       if (getLexer().isNot(AsmToken::Comma))
3746         return MatchOperand_ParseFail;
3747       Parser.Lex();
3748     }
3749 
3750     if (getLexer().isNot(AsmToken::Integer))
3751       return MatchOperand_ParseFail;
3752 
3753     int64_t Op;
3754     if (getParser().parseAbsoluteExpression(Op))
3755       return MatchOperand_ParseFail;
3756 
3757     if (Op != 0 && Op != 1)
3758       return MatchOperand_ParseFail;
3759     Val |= (Op << I);
3760   }
3761 
3762   Parser.Lex();
3763   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
3764   return MatchOperand_Success;
3765 }
3766 
3767 OperandMatchResultTy
3768 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
3769                                AMDGPUOperand::ImmTy ImmTy) {
3770   int64_t Bit = 0;
3771   SMLoc S = Parser.getTok().getLoc();
3772 
3773   // We are at the end of the statement, and this is a default argument, so
3774   // use a default value.
3775   if (getLexer().isNot(AsmToken::EndOfStatement)) {
3776     switch(getLexer().getKind()) {
3777       case AsmToken::Identifier: {
3778         StringRef Tok = Parser.getTok().getString();
3779         if (Tok == Name) {
3780           if (Tok == "r128" && isGFX9())
3781             Error(S, "r128 modifier is not supported on this GPU");
3782           if (Tok == "a16" && !isGFX9())
3783             Error(S, "a16 modifier is not supported on this GPU");
3784           Bit = 1;
3785           Parser.Lex();
3786         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
3787           Bit = 0;
3788           Parser.Lex();
3789         } else {
3790           return MatchOperand_NoMatch;
3791         }
3792         break;
3793       }
3794       default:
3795         return MatchOperand_NoMatch;
3796     }
3797   }
3798 
3799   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
3800   return MatchOperand_Success;
3801 }
3802 
3803 static void addOptionalImmOperand(
3804   MCInst& Inst, const OperandVector& Operands,
3805   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
3806   AMDGPUOperand::ImmTy ImmT,
3807   int64_t Default = 0) {
3808   auto i = OptionalIdx.find(ImmT);
3809   if (i != OptionalIdx.end()) {
3810     unsigned Idx = i->second;
3811     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
3812   } else {
3813     Inst.addOperand(MCOperand::createImm(Default));
3814   }
3815 }
3816 
3817 OperandMatchResultTy
3818 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
3819   if (getLexer().isNot(AsmToken::Identifier)) {
3820     return MatchOperand_NoMatch;
3821   }
3822   StringRef Tok = Parser.getTok().getString();
3823   if (Tok != Prefix) {
3824     return MatchOperand_NoMatch;
3825   }
3826 
3827   Parser.Lex();
3828   if (getLexer().isNot(AsmToken::Colon)) {
3829     return MatchOperand_ParseFail;
3830   }
3831 
3832   Parser.Lex();
3833   if (getLexer().isNot(AsmToken::Identifier)) {
3834     return MatchOperand_ParseFail;
3835   }
3836 
3837   Value = Parser.getTok().getString();
3838   return MatchOperand_Success;
3839 }
3840 
3841 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
3842 // values to live in a joint format operand in the MCInst encoding.
3843 OperandMatchResultTy
3844 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
3845   SMLoc S = Parser.getTok().getLoc();
3846   int64_t Dfmt = 0, Nfmt = 0;
3847   // dfmt and nfmt can appear in either order, and each is optional.
3848   bool GotDfmt = false, GotNfmt = false;
3849   while (!GotDfmt || !GotNfmt) {
3850     if (!GotDfmt) {
3851       auto Res = parseIntWithPrefix("dfmt", Dfmt);
3852       if (Res != MatchOperand_NoMatch) {
3853         if (Res != MatchOperand_Success)
3854           return Res;
3855         if (Dfmt >= 16) {
3856           Error(Parser.getTok().getLoc(), "out of range dfmt");
3857           return MatchOperand_ParseFail;
3858         }
3859         GotDfmt = true;
3860         Parser.Lex();
3861         continue;
3862       }
3863     }
3864     if (!GotNfmt) {
3865       auto Res = parseIntWithPrefix("nfmt", Nfmt);
3866       if (Res != MatchOperand_NoMatch) {
3867         if (Res != MatchOperand_Success)
3868           return Res;
3869         if (Nfmt >= 8) {
3870           Error(Parser.getTok().getLoc(), "out of range nfmt");
3871           return MatchOperand_ParseFail;
3872         }
3873         GotNfmt = true;
3874         Parser.Lex();
3875         continue;
3876       }
3877     }
3878     break;
3879   }
3880   if (!GotDfmt && !GotNfmt)
3881     return MatchOperand_NoMatch;
3882   auto Format = Dfmt | Nfmt << 4;
3883   Operands.push_back(
3884       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
3885   return MatchOperand_Success;
3886 }
3887 
3888 //===----------------------------------------------------------------------===//
3889 // ds
3890 //===----------------------------------------------------------------------===//
3891 
3892 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
3893                                     const OperandVector &Operands) {
3894   OptionalImmIndexMap OptionalIdx;
3895 
3896   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3897     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3898 
3899     // Add the register arguments
3900     if (Op.isReg()) {
3901       Op.addRegOperands(Inst, 1);
3902       continue;
3903     }
3904 
3905     // Handle optional arguments
3906     OptionalIdx[Op.getImmTy()] = i;
3907   }
3908 
3909   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
3910   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
3911   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3912 
3913   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3914 }
3915 
3916 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
3917                                 bool IsGdsHardcoded) {
3918   OptionalImmIndexMap OptionalIdx;
3919 
3920   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3921     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3922 
3923     // Add the register arguments
3924     if (Op.isReg()) {
3925       Op.addRegOperands(Inst, 1);
3926       continue;
3927     }
3928 
3929     if (Op.isToken() && Op.getToken() == "gds") {
3930       IsGdsHardcoded = true;
3931       continue;
3932     }
3933 
3934     // Handle optional arguments
3935     OptionalIdx[Op.getImmTy()] = i;
3936   }
3937 
3938   AMDGPUOperand::ImmTy OffsetType =
3939     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
3940      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
3941                                                       AMDGPUOperand::ImmTyOffset;
3942 
3943   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
3944 
3945   if (!IsGdsHardcoded) {
3946     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3947   }
3948   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3949 }
3950 
3951 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
3952   OptionalImmIndexMap OptionalIdx;
3953 
3954   unsigned OperandIdx[4];
3955   unsigned EnMask = 0;
3956   int SrcIdx = 0;
3957 
3958   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3959     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3960 
3961     // Add the register arguments
3962     if (Op.isReg()) {
3963       assert(SrcIdx < 4);
3964       OperandIdx[SrcIdx] = Inst.size();
3965       Op.addRegOperands(Inst, 1);
3966       ++SrcIdx;
3967       continue;
3968     }
3969 
3970     if (Op.isOff()) {
3971       assert(SrcIdx < 4);
3972       OperandIdx[SrcIdx] = Inst.size();
3973       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
3974       ++SrcIdx;
3975       continue;
3976     }
3977 
3978     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
3979       Op.addImmOperands(Inst, 1);
3980       continue;
3981     }
3982 
3983     if (Op.isToken() && Op.getToken() == "done")
3984       continue;
3985 
3986     // Handle optional arguments
3987     OptionalIdx[Op.getImmTy()] = i;
3988   }
3989 
3990   assert(SrcIdx == 4);
3991 
3992   bool Compr = false;
3993   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
3994     Compr = true;
3995     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
3996     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
3997     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
3998   }
3999 
4000   for (auto i = 0; i < SrcIdx; ++i) {
4001     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4002       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4003     }
4004   }
4005 
4006   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4007   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4008 
4009   Inst.addOperand(MCOperand::createImm(EnMask));
4010 }
4011 
4012 //===----------------------------------------------------------------------===//
4013 // s_waitcnt
4014 //===----------------------------------------------------------------------===//
4015 
4016 static bool
4017 encodeCnt(
4018   const AMDGPU::IsaVersion ISA,
4019   int64_t &IntVal,
4020   int64_t CntVal,
4021   bool Saturate,
4022   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4023   unsigned (*decode)(const IsaVersion &Version, unsigned))
4024 {
4025   bool Failed = false;
4026 
4027   IntVal = encode(ISA, IntVal, CntVal);
4028   if (CntVal != decode(ISA, IntVal)) {
4029     if (Saturate) {
4030       IntVal = encode(ISA, IntVal, -1);
4031     } else {
4032       Failed = true;
4033     }
4034   }
4035   return Failed;
4036 }
4037 
4038 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4039   StringRef CntName = Parser.getTok().getString();
4040   int64_t CntVal;
4041 
4042   Parser.Lex();
4043   if (getLexer().isNot(AsmToken::LParen))
4044     return true;
4045 
4046   Parser.Lex();
4047   if (getLexer().isNot(AsmToken::Integer))
4048     return true;
4049 
4050   SMLoc ValLoc = Parser.getTok().getLoc();
4051   if (getParser().parseAbsoluteExpression(CntVal))
4052     return true;
4053 
4054   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4055 
4056   bool Failed = true;
4057   bool Sat = CntName.endswith("_sat");
4058 
4059   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4060     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4061   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4062     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4063   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4064     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4065   }
4066 
4067   if (Failed) {
4068     Error(ValLoc, "too large value for " + CntName);
4069     return true;
4070   }
4071 
4072   if (getLexer().isNot(AsmToken::RParen)) {
4073     return true;
4074   }
4075 
4076   Parser.Lex();
4077   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
4078     const AsmToken NextToken = getLexer().peekTok();
4079     if (NextToken.is(AsmToken::Identifier)) {
4080       Parser.Lex();
4081     }
4082   }
4083 
4084   return false;
4085 }
4086 
4087 OperandMatchResultTy
4088 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4089   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4090   int64_t Waitcnt = getWaitcntBitMask(ISA);
4091   SMLoc S = Parser.getTok().getLoc();
4092 
4093   switch(getLexer().getKind()) {
4094     default: return MatchOperand_ParseFail;
4095     case AsmToken::Integer:
4096       // The operand can be an integer value.
4097       if (getParser().parseAbsoluteExpression(Waitcnt))
4098         return MatchOperand_ParseFail;
4099       break;
4100 
4101     case AsmToken::Identifier:
4102       do {
4103         if (parseCnt(Waitcnt))
4104           return MatchOperand_ParseFail;
4105       } while(getLexer().isNot(AsmToken::EndOfStatement));
4106       break;
4107   }
4108   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4109   return MatchOperand_Success;
4110 }
4111 
4112 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
4113                                           int64_t &Width) {
4114   using namespace llvm::AMDGPU::Hwreg;
4115 
4116   if (Parser.getTok().getString() != "hwreg")
4117     return true;
4118   Parser.Lex();
4119 
4120   if (getLexer().isNot(AsmToken::LParen))
4121     return true;
4122   Parser.Lex();
4123 
4124   if (getLexer().is(AsmToken::Identifier)) {
4125     HwReg.IsSymbolic = true;
4126     HwReg.Id = ID_UNKNOWN_;
4127     const StringRef tok = Parser.getTok().getString();
4128     int Last = ID_SYMBOLIC_LAST_;
4129     if (isSI() || isCI() || isVI())
4130       Last = ID_SYMBOLIC_FIRST_GFX9_;
4131     for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
4132       if (tok == IdSymbolic[i]) {
4133         HwReg.Id = i;
4134         break;
4135       }
4136     }
4137     Parser.Lex();
4138   } else {
4139     HwReg.IsSymbolic = false;
4140     if (getLexer().isNot(AsmToken::Integer))
4141       return true;
4142     if (getParser().parseAbsoluteExpression(HwReg.Id))
4143       return true;
4144   }
4145 
4146   if (getLexer().is(AsmToken::RParen)) {
4147     Parser.Lex();
4148     return false;
4149   }
4150 
4151   // optional params
4152   if (getLexer().isNot(AsmToken::Comma))
4153     return true;
4154   Parser.Lex();
4155 
4156   if (getLexer().isNot(AsmToken::Integer))
4157     return true;
4158   if (getParser().parseAbsoluteExpression(Offset))
4159     return true;
4160 
4161   if (getLexer().isNot(AsmToken::Comma))
4162     return true;
4163   Parser.Lex();
4164 
4165   if (getLexer().isNot(AsmToken::Integer))
4166     return true;
4167   if (getParser().parseAbsoluteExpression(Width))
4168     return true;
4169 
4170   if (getLexer().isNot(AsmToken::RParen))
4171     return true;
4172   Parser.Lex();
4173 
4174   return false;
4175 }
4176 
4177 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4178   using namespace llvm::AMDGPU::Hwreg;
4179 
4180   int64_t Imm16Val = 0;
4181   SMLoc S = Parser.getTok().getLoc();
4182 
4183   switch(getLexer().getKind()) {
4184     default: return MatchOperand_NoMatch;
4185     case AsmToken::Integer:
4186       // The operand can be an integer value.
4187       if (getParser().parseAbsoluteExpression(Imm16Val))
4188         return MatchOperand_NoMatch;
4189       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4190         Error(S, "invalid immediate: only 16-bit values are legal");
4191         // Do not return error code, but create an imm operand anyway and proceed
4192         // to the next operand, if any. That avoids unneccessary error messages.
4193       }
4194       break;
4195 
4196     case AsmToken::Identifier: {
4197         OperandInfoTy HwReg(ID_UNKNOWN_);
4198         int64_t Offset = OFFSET_DEFAULT_;
4199         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
4200         if (parseHwregConstruct(HwReg, Offset, Width))
4201           return MatchOperand_ParseFail;
4202         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
4203           if (HwReg.IsSymbolic)
4204             Error(S, "invalid symbolic name of hardware register");
4205           else
4206             Error(S, "invalid code of hardware register: only 6-bit values are legal");
4207         }
4208         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
4209           Error(S, "invalid bit offset: only 5-bit values are legal");
4210         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
4211           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
4212         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
4213       }
4214       break;
4215   }
4216   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
4217   return MatchOperand_Success;
4218 }
4219 
4220 bool AMDGPUOperand::isSWaitCnt() const {
4221   return isImm();
4222 }
4223 
4224 bool AMDGPUOperand::isHwreg() const {
4225   return isImmTy(ImmTyHwreg);
4226 }
4227 
4228 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
4229   using namespace llvm::AMDGPU::SendMsg;
4230 
4231   if (Parser.getTok().getString() != "sendmsg")
4232     return true;
4233   Parser.Lex();
4234 
4235   if (getLexer().isNot(AsmToken::LParen))
4236     return true;
4237   Parser.Lex();
4238 
4239   if (getLexer().is(AsmToken::Identifier)) {
4240     Msg.IsSymbolic = true;
4241     Msg.Id = ID_UNKNOWN_;
4242     const std::string tok = Parser.getTok().getString();
4243     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
4244       switch(i) {
4245         default: continue; // Omit gaps.
4246         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
4247       }
4248       if (tok == IdSymbolic[i]) {
4249         Msg.Id = i;
4250         break;
4251       }
4252     }
4253     Parser.Lex();
4254   } else {
4255     Msg.IsSymbolic = false;
4256     if (getLexer().isNot(AsmToken::Integer))
4257       return true;
4258     if (getParser().parseAbsoluteExpression(Msg.Id))
4259       return true;
4260     if (getLexer().is(AsmToken::Integer))
4261       if (getParser().parseAbsoluteExpression(Msg.Id))
4262         Msg.Id = ID_UNKNOWN_;
4263   }
4264   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
4265     return false;
4266 
4267   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
4268     if (getLexer().isNot(AsmToken::RParen))
4269       return true;
4270     Parser.Lex();
4271     return false;
4272   }
4273 
4274   if (getLexer().isNot(AsmToken::Comma))
4275     return true;
4276   Parser.Lex();
4277 
4278   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
4279   Operation.Id = ID_UNKNOWN_;
4280   if (getLexer().is(AsmToken::Identifier)) {
4281     Operation.IsSymbolic = true;
4282     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
4283     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
4284     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
4285     const StringRef Tok = Parser.getTok().getString();
4286     for (int i = F; i < L; ++i) {
4287       if (Tok == S[i]) {
4288         Operation.Id = i;
4289         break;
4290       }
4291     }
4292     Parser.Lex();
4293   } else {
4294     Operation.IsSymbolic = false;
4295     if (getLexer().isNot(AsmToken::Integer))
4296       return true;
4297     if (getParser().parseAbsoluteExpression(Operation.Id))
4298       return true;
4299   }
4300 
4301   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4302     // Stream id is optional.
4303     if (getLexer().is(AsmToken::RParen)) {
4304       Parser.Lex();
4305       return false;
4306     }
4307 
4308     if (getLexer().isNot(AsmToken::Comma))
4309       return true;
4310     Parser.Lex();
4311 
4312     if (getLexer().isNot(AsmToken::Integer))
4313       return true;
4314     if (getParser().parseAbsoluteExpression(StreamId))
4315       return true;
4316   }
4317 
4318   if (getLexer().isNot(AsmToken::RParen))
4319     return true;
4320   Parser.Lex();
4321   return false;
4322 }
4323 
4324 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4325   if (getLexer().getKind() != AsmToken::Identifier)
4326     return MatchOperand_NoMatch;
4327 
4328   StringRef Str = Parser.getTok().getString();
4329   int Slot = StringSwitch<int>(Str)
4330     .Case("p10", 0)
4331     .Case("p20", 1)
4332     .Case("p0", 2)
4333     .Default(-1);
4334 
4335   SMLoc S = Parser.getTok().getLoc();
4336   if (Slot == -1)
4337     return MatchOperand_ParseFail;
4338 
4339   Parser.Lex();
4340   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4341                                               AMDGPUOperand::ImmTyInterpSlot));
4342   return MatchOperand_Success;
4343 }
4344 
4345 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4346   if (getLexer().getKind() != AsmToken::Identifier)
4347     return MatchOperand_NoMatch;
4348 
4349   StringRef Str = Parser.getTok().getString();
4350   if (!Str.startswith("attr"))
4351     return MatchOperand_NoMatch;
4352 
4353   StringRef Chan = Str.take_back(2);
4354   int AttrChan = StringSwitch<int>(Chan)
4355     .Case(".x", 0)
4356     .Case(".y", 1)
4357     .Case(".z", 2)
4358     .Case(".w", 3)
4359     .Default(-1);
4360   if (AttrChan == -1)
4361     return MatchOperand_ParseFail;
4362 
4363   Str = Str.drop_back(2).drop_front(4);
4364 
4365   uint8_t Attr;
4366   if (Str.getAsInteger(10, Attr))
4367     return MatchOperand_ParseFail;
4368 
4369   SMLoc S = Parser.getTok().getLoc();
4370   Parser.Lex();
4371   if (Attr > 63) {
4372     Error(S, "out of bounds attr");
4373     return MatchOperand_Success;
4374   }
4375 
4376   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4377 
4378   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4379                                               AMDGPUOperand::ImmTyInterpAttr));
4380   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4381                                               AMDGPUOperand::ImmTyAttrChan));
4382   return MatchOperand_Success;
4383 }
4384 
4385 void AMDGPUAsmParser::errorExpTgt() {
4386   Error(Parser.getTok().getLoc(), "invalid exp target");
4387 }
4388 
4389 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4390                                                       uint8_t &Val) {
4391   if (Str == "null") {
4392     Val = 9;
4393     return MatchOperand_Success;
4394   }
4395 
4396   if (Str.startswith("mrt")) {
4397     Str = Str.drop_front(3);
4398     if (Str == "z") { // == mrtz
4399       Val = 8;
4400       return MatchOperand_Success;
4401     }
4402 
4403     if (Str.getAsInteger(10, Val))
4404       return MatchOperand_ParseFail;
4405 
4406     if (Val > 7)
4407       errorExpTgt();
4408 
4409     return MatchOperand_Success;
4410   }
4411 
4412   if (Str.startswith("pos")) {
4413     Str = Str.drop_front(3);
4414     if (Str.getAsInteger(10, Val))
4415       return MatchOperand_ParseFail;
4416 
4417     if (Val > 3)
4418       errorExpTgt();
4419 
4420     Val += 12;
4421     return MatchOperand_Success;
4422   }
4423 
4424   if (Str.startswith("param")) {
4425     Str = Str.drop_front(5);
4426     if (Str.getAsInteger(10, Val))
4427       return MatchOperand_ParseFail;
4428 
4429     if (Val >= 32)
4430       errorExpTgt();
4431 
4432     Val += 32;
4433     return MatchOperand_Success;
4434   }
4435 
4436   if (Str.startswith("invalid_target_")) {
4437     Str = Str.drop_front(15);
4438     if (Str.getAsInteger(10, Val))
4439       return MatchOperand_ParseFail;
4440 
4441     errorExpTgt();
4442     return MatchOperand_Success;
4443   }
4444 
4445   return MatchOperand_NoMatch;
4446 }
4447 
4448 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4449   uint8_t Val;
4450   StringRef Str = Parser.getTok().getString();
4451 
4452   auto Res = parseExpTgtImpl(Str, Val);
4453   if (Res != MatchOperand_Success)
4454     return Res;
4455 
4456   SMLoc S = Parser.getTok().getLoc();
4457   Parser.Lex();
4458 
4459   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4460                                               AMDGPUOperand::ImmTyExpTgt));
4461   return MatchOperand_Success;
4462 }
4463 
4464 OperandMatchResultTy
4465 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4466   using namespace llvm::AMDGPU::SendMsg;
4467 
4468   int64_t Imm16Val = 0;
4469   SMLoc S = Parser.getTok().getLoc();
4470 
4471   switch(getLexer().getKind()) {
4472   default:
4473     return MatchOperand_NoMatch;
4474   case AsmToken::Integer:
4475     // The operand can be an integer value.
4476     if (getParser().parseAbsoluteExpression(Imm16Val))
4477       return MatchOperand_NoMatch;
4478     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4479       Error(S, "invalid immediate: only 16-bit values are legal");
4480       // Do not return error code, but create an imm operand anyway and proceed
4481       // to the next operand, if any. That avoids unneccessary error messages.
4482     }
4483     break;
4484   case AsmToken::Identifier: {
4485       OperandInfoTy Msg(ID_UNKNOWN_);
4486       OperandInfoTy Operation(OP_UNKNOWN_);
4487       int64_t StreamId = STREAM_ID_DEFAULT_;
4488       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4489         return MatchOperand_ParseFail;
4490       do {
4491         // Validate and encode message ID.
4492         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4493                 || Msg.Id == ID_SYSMSG)) {
4494           if (Msg.IsSymbolic)
4495             Error(S, "invalid/unsupported symbolic name of message");
4496           else
4497             Error(S, "invalid/unsupported code of message");
4498           break;
4499         }
4500         Imm16Val = (Msg.Id << ID_SHIFT_);
4501         // Validate and encode operation ID.
4502         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4503           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4504             if (Operation.IsSymbolic)
4505               Error(S, "invalid symbolic name of GS_OP");
4506             else
4507               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4508             break;
4509           }
4510           if (Operation.Id == OP_GS_NOP
4511               && Msg.Id != ID_GS_DONE) {
4512             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4513             break;
4514           }
4515           Imm16Val |= (Operation.Id << OP_SHIFT_);
4516         }
4517         if (Msg.Id == ID_SYSMSG) {
4518           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4519             if (Operation.IsSymbolic)
4520               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4521             else
4522               Error(S, "invalid/unsupported code of SYSMSG_OP");
4523             break;
4524           }
4525           Imm16Val |= (Operation.Id << OP_SHIFT_);
4526         }
4527         // Validate and encode stream ID.
4528         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4529           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4530             Error(S, "invalid stream id: only 2-bit values are legal");
4531             break;
4532           }
4533           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4534         }
4535       } while (false);
4536     }
4537     break;
4538   }
4539   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4540   return MatchOperand_Success;
4541 }
4542 
4543 bool AMDGPUOperand::isSendMsg() const {
4544   return isImmTy(ImmTySendMsg);
4545 }
4546 
4547 //===----------------------------------------------------------------------===//
4548 // parser helpers
4549 //===----------------------------------------------------------------------===//
4550 
4551 bool
4552 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
4553   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
4554 }
4555 
4556 bool
4557 AMDGPUAsmParser::isId(const StringRef Id) const {
4558   return isId(getToken(), Id);
4559 }
4560 
4561 bool
4562 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
4563   return getTokenKind() == Kind;
4564 }
4565 
4566 bool
4567 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4568   if (isId(Id)) {
4569     lex();
4570     return true;
4571   }
4572   return false;
4573 }
4574 
4575 bool
4576 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4577   if (isToken(Kind)) {
4578     lex();
4579     return true;
4580   }
4581   return false;
4582 }
4583 
4584 bool
4585 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4586                            const StringRef ErrMsg) {
4587   if (!trySkipToken(Kind)) {
4588     Error(getLoc(), ErrMsg);
4589     return false;
4590   }
4591   return true;
4592 }
4593 
4594 bool
4595 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4596   return !getParser().parseAbsoluteExpression(Imm);
4597 }
4598 
4599 bool
4600 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4601   if (isToken(AsmToken::String)) {
4602     Val = getToken().getStringContents();
4603     lex();
4604     return true;
4605   } else {
4606     Error(getLoc(), ErrMsg);
4607     return false;
4608   }
4609 }
4610 
4611 AsmToken
4612 AMDGPUAsmParser::getToken() const {
4613   return Parser.getTok();
4614 }
4615 
4616 AsmToken
4617 AMDGPUAsmParser::peekToken() {
4618   return getLexer().peekTok();
4619 }
4620 
4621 void
4622 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
4623   auto TokCount = getLexer().peekTokens(Tokens);
4624 
4625   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
4626     Tokens[Idx] = AsmToken(AsmToken::Error, "");
4627 }
4628 
4629 AsmToken::TokenKind
4630 AMDGPUAsmParser::getTokenKind() const {
4631   return getLexer().getKind();
4632 }
4633 
4634 SMLoc
4635 AMDGPUAsmParser::getLoc() const {
4636   return getToken().getLoc();
4637 }
4638 
4639 StringRef
4640 AMDGPUAsmParser::getTokenStr() const {
4641   return getToken().getString();
4642 }
4643 
4644 void
4645 AMDGPUAsmParser::lex() {
4646   Parser.Lex();
4647 }
4648 
4649 //===----------------------------------------------------------------------===//
4650 // swizzle
4651 //===----------------------------------------------------------------------===//
4652 
4653 LLVM_READNONE
4654 static unsigned
4655 encodeBitmaskPerm(const unsigned AndMask,
4656                   const unsigned OrMask,
4657                   const unsigned XorMask) {
4658   using namespace llvm::AMDGPU::Swizzle;
4659 
4660   return BITMASK_PERM_ENC |
4661          (AndMask << BITMASK_AND_SHIFT) |
4662          (OrMask  << BITMASK_OR_SHIFT)  |
4663          (XorMask << BITMASK_XOR_SHIFT);
4664 }
4665 
4666 bool
4667 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
4668                                       const unsigned MinVal,
4669                                       const unsigned MaxVal,
4670                                       const StringRef ErrMsg) {
4671   for (unsigned i = 0; i < OpNum; ++i) {
4672     if (!skipToken(AsmToken::Comma, "expected a comma")){
4673       return false;
4674     }
4675     SMLoc ExprLoc = Parser.getTok().getLoc();
4676     if (!parseExpr(Op[i])) {
4677       return false;
4678     }
4679     if (Op[i] < MinVal || Op[i] > MaxVal) {
4680       Error(ExprLoc, ErrMsg);
4681       return false;
4682     }
4683   }
4684 
4685   return true;
4686 }
4687 
4688 bool
4689 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
4690   using namespace llvm::AMDGPU::Swizzle;
4691 
4692   int64_t Lane[LANE_NUM];
4693   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
4694                            "expected a 2-bit lane id")) {
4695     Imm = QUAD_PERM_ENC;
4696     for (unsigned I = 0; I < LANE_NUM; ++I) {
4697       Imm |= Lane[I] << (LANE_SHIFT * I);
4698     }
4699     return true;
4700   }
4701   return false;
4702 }
4703 
4704 bool
4705 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
4706   using namespace llvm::AMDGPU::Swizzle;
4707 
4708   SMLoc S = Parser.getTok().getLoc();
4709   int64_t GroupSize;
4710   int64_t LaneIdx;
4711 
4712   if (!parseSwizzleOperands(1, &GroupSize,
4713                             2, 32,
4714                             "group size must be in the interval [2,32]")) {
4715     return false;
4716   }
4717   if (!isPowerOf2_64(GroupSize)) {
4718     Error(S, "group size must be a power of two");
4719     return false;
4720   }
4721   if (parseSwizzleOperands(1, &LaneIdx,
4722                            0, GroupSize - 1,
4723                            "lane id must be in the interval [0,group size - 1]")) {
4724     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
4725     return true;
4726   }
4727   return false;
4728 }
4729 
4730 bool
4731 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
4732   using namespace llvm::AMDGPU::Swizzle;
4733 
4734   SMLoc S = Parser.getTok().getLoc();
4735   int64_t GroupSize;
4736 
4737   if (!parseSwizzleOperands(1, &GroupSize,
4738       2, 32, "group size must be in the interval [2,32]")) {
4739     return false;
4740   }
4741   if (!isPowerOf2_64(GroupSize)) {
4742     Error(S, "group size must be a power of two");
4743     return false;
4744   }
4745 
4746   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
4747   return true;
4748 }
4749 
4750 bool
4751 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
4752   using namespace llvm::AMDGPU::Swizzle;
4753 
4754   SMLoc S = Parser.getTok().getLoc();
4755   int64_t GroupSize;
4756 
4757   if (!parseSwizzleOperands(1, &GroupSize,
4758       1, 16, "group size must be in the interval [1,16]")) {
4759     return false;
4760   }
4761   if (!isPowerOf2_64(GroupSize)) {
4762     Error(S, "group size must be a power of two");
4763     return false;
4764   }
4765 
4766   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
4767   return true;
4768 }
4769 
4770 bool
4771 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
4772   using namespace llvm::AMDGPU::Swizzle;
4773 
4774   if (!skipToken(AsmToken::Comma, "expected a comma")) {
4775     return false;
4776   }
4777 
4778   StringRef Ctl;
4779   SMLoc StrLoc = Parser.getTok().getLoc();
4780   if (!parseString(Ctl)) {
4781     return false;
4782   }
4783   if (Ctl.size() != BITMASK_WIDTH) {
4784     Error(StrLoc, "expected a 5-character mask");
4785     return false;
4786   }
4787 
4788   unsigned AndMask = 0;
4789   unsigned OrMask = 0;
4790   unsigned XorMask = 0;
4791 
4792   for (size_t i = 0; i < Ctl.size(); ++i) {
4793     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
4794     switch(Ctl[i]) {
4795     default:
4796       Error(StrLoc, "invalid mask");
4797       return false;
4798     case '0':
4799       break;
4800     case '1':
4801       OrMask |= Mask;
4802       break;
4803     case 'p':
4804       AndMask |= Mask;
4805       break;
4806     case 'i':
4807       AndMask |= Mask;
4808       XorMask |= Mask;
4809       break;
4810     }
4811   }
4812 
4813   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
4814   return true;
4815 }
4816 
4817 bool
4818 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
4819 
4820   SMLoc OffsetLoc = Parser.getTok().getLoc();
4821 
4822   if (!parseExpr(Imm)) {
4823     return false;
4824   }
4825   if (!isUInt<16>(Imm)) {
4826     Error(OffsetLoc, "expected a 16-bit offset");
4827     return false;
4828   }
4829   return true;
4830 }
4831 
4832 bool
4833 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
4834   using namespace llvm::AMDGPU::Swizzle;
4835 
4836   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
4837 
4838     SMLoc ModeLoc = Parser.getTok().getLoc();
4839     bool Ok = false;
4840 
4841     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
4842       Ok = parseSwizzleQuadPerm(Imm);
4843     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
4844       Ok = parseSwizzleBitmaskPerm(Imm);
4845     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
4846       Ok = parseSwizzleBroadcast(Imm);
4847     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
4848       Ok = parseSwizzleSwap(Imm);
4849     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
4850       Ok = parseSwizzleReverse(Imm);
4851     } else {
4852       Error(ModeLoc, "expected a swizzle mode");
4853     }
4854 
4855     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
4856   }
4857 
4858   return false;
4859 }
4860 
4861 OperandMatchResultTy
4862 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
4863   SMLoc S = Parser.getTok().getLoc();
4864   int64_t Imm = 0;
4865 
4866   if (trySkipId("offset")) {
4867 
4868     bool Ok = false;
4869     if (skipToken(AsmToken::Colon, "expected a colon")) {
4870       if (trySkipId("swizzle")) {
4871         Ok = parseSwizzleMacro(Imm);
4872       } else {
4873         Ok = parseSwizzleOffset(Imm);
4874       }
4875     }
4876 
4877     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
4878 
4879     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
4880   } else {
4881     // Swizzle "offset" operand is optional.
4882     // If it is omitted, try parsing other optional operands.
4883     return parseOptionalOpr(Operands);
4884   }
4885 }
4886 
4887 bool
4888 AMDGPUOperand::isSwizzle() const {
4889   return isImmTy(ImmTySwizzle);
4890 }
4891 
4892 //===----------------------------------------------------------------------===//
4893 // VGPR Index Mode
4894 //===----------------------------------------------------------------------===//
4895 
4896 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
4897 
4898   using namespace llvm::AMDGPU::VGPRIndexMode;
4899 
4900   if (trySkipToken(AsmToken::RParen)) {
4901     return OFF;
4902   }
4903 
4904   int64_t Imm = 0;
4905 
4906   while (true) {
4907     unsigned Mode = 0;
4908     SMLoc S = Parser.getTok().getLoc();
4909 
4910     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
4911       if (trySkipId(IdSymbolic[ModeId])) {
4912         Mode = 1 << ModeId;
4913         break;
4914       }
4915     }
4916 
4917     if (Mode == 0) {
4918       Error(S, (Imm == 0)?
4919                "expected a VGPR index mode or a closing parenthesis" :
4920                "expected a VGPR index mode");
4921       break;
4922     }
4923 
4924     if (Imm & Mode) {
4925       Error(S, "duplicate VGPR index mode");
4926       break;
4927     }
4928     Imm |= Mode;
4929 
4930     if (trySkipToken(AsmToken::RParen))
4931       break;
4932     if (!skipToken(AsmToken::Comma,
4933                    "expected a comma or a closing parenthesis"))
4934       break;
4935   }
4936 
4937   return Imm;
4938 }
4939 
4940 OperandMatchResultTy
4941 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
4942 
4943   int64_t Imm = 0;
4944   SMLoc S = Parser.getTok().getLoc();
4945 
4946   if (getLexer().getKind() == AsmToken::Identifier &&
4947       Parser.getTok().getString() == "gpr_idx" &&
4948       getLexer().peekTok().is(AsmToken::LParen)) {
4949 
4950     Parser.Lex();
4951     Parser.Lex();
4952 
4953     // If parse failed, trigger an error but do not return error code
4954     // to avoid excessive error messages.
4955     Imm = parseGPRIdxMacro();
4956 
4957   } else {
4958     if (getParser().parseAbsoluteExpression(Imm))
4959       return MatchOperand_NoMatch;
4960     if (Imm < 0 || !isUInt<4>(Imm)) {
4961       Error(S, "invalid immediate: only 4-bit values are legal");
4962     }
4963   }
4964 
4965   Operands.push_back(
4966       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
4967   return MatchOperand_Success;
4968 }
4969 
4970 bool AMDGPUOperand::isGPRIdxMode() const {
4971   return isImmTy(ImmTyGprIdxMode);
4972 }
4973 
4974 //===----------------------------------------------------------------------===//
4975 // sopp branch targets
4976 //===----------------------------------------------------------------------===//
4977 
4978 OperandMatchResultTy
4979 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
4980   SMLoc S = Parser.getTok().getLoc();
4981 
4982   switch (getLexer().getKind()) {
4983     default: return MatchOperand_ParseFail;
4984     case AsmToken::Integer: {
4985       int64_t Imm;
4986       if (getParser().parseAbsoluteExpression(Imm))
4987         return MatchOperand_ParseFail;
4988       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
4989       return MatchOperand_Success;
4990     }
4991 
4992     case AsmToken::Identifier:
4993       Operands.push_back(AMDGPUOperand::CreateExpr(this,
4994           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
4995                                   Parser.getTok().getString()), getContext()), S));
4996       Parser.Lex();
4997       return MatchOperand_Success;
4998   }
4999 }
5000 
5001 //===----------------------------------------------------------------------===//
5002 // mubuf
5003 //===----------------------------------------------------------------------===//
5004 
5005 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5006   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5007 }
5008 
5009 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5010   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5011 }
5012 
5013 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5014                                const OperandVector &Operands,
5015                                bool IsAtomic,
5016                                bool IsAtomicReturn,
5017                                bool IsLds) {
5018   bool IsLdsOpcode = IsLds;
5019   bool HasLdsModifier = false;
5020   OptionalImmIndexMap OptionalIdx;
5021   assert(IsAtomicReturn ? IsAtomic : true);
5022   unsigned FirstOperandIdx = 1;
5023 
5024   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5025     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5026 
5027     // Add the register arguments
5028     if (Op.isReg()) {
5029       Op.addRegOperands(Inst, 1);
5030       // Insert a tied src for atomic return dst.
5031       // This cannot be postponed as subsequent calls to
5032       // addImmOperands rely on correct number of MC operands.
5033       if (IsAtomicReturn && i == FirstOperandIdx)
5034         Op.addRegOperands(Inst, 1);
5035       continue;
5036     }
5037 
5038     // Handle the case where soffset is an immediate
5039     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5040       Op.addImmOperands(Inst, 1);
5041       continue;
5042     }
5043 
5044     HasLdsModifier = Op.isLDS();
5045 
5046     // Handle tokens like 'offen' which are sometimes hard-coded into the
5047     // asm string.  There are no MCInst operands for these.
5048     if (Op.isToken()) {
5049       continue;
5050     }
5051     assert(Op.isImm());
5052 
5053     // Handle optional arguments
5054     OptionalIdx[Op.getImmTy()] = i;
5055   }
5056 
5057   // This is a workaround for an llvm quirk which may result in an
5058   // incorrect instruction selection. Lds and non-lds versions of
5059   // MUBUF instructions are identical except that lds versions
5060   // have mandatory 'lds' modifier. However this modifier follows
5061   // optional modifiers and llvm asm matcher regards this 'lds'
5062   // modifier as an optional one. As a result, an lds version
5063   // of opcode may be selected even if it has no 'lds' modifier.
5064   if (IsLdsOpcode && !HasLdsModifier) {
5065     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5066     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5067       Inst.setOpcode(NoLdsOpcode);
5068       IsLdsOpcode = false;
5069     }
5070   }
5071 
5072   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5073   if (!IsAtomic) { // glc is hard-coded.
5074     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5075   }
5076   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5077 
5078   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5079     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5080   }
5081 }
5082 
5083 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5084   OptionalImmIndexMap OptionalIdx;
5085 
5086   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5087     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5088 
5089     // Add the register arguments
5090     if (Op.isReg()) {
5091       Op.addRegOperands(Inst, 1);
5092       continue;
5093     }
5094 
5095     // Handle the case where soffset is an immediate
5096     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5097       Op.addImmOperands(Inst, 1);
5098       continue;
5099     }
5100 
5101     // Handle tokens like 'offen' which are sometimes hard-coded into the
5102     // asm string.  There are no MCInst operands for these.
5103     if (Op.isToken()) {
5104       continue;
5105     }
5106     assert(Op.isImm());
5107 
5108     // Handle optional arguments
5109     OptionalIdx[Op.getImmTy()] = i;
5110   }
5111 
5112   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5113                         AMDGPUOperand::ImmTyOffset);
5114   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5115   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5116   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5117   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5118 }
5119 
5120 //===----------------------------------------------------------------------===//
5121 // mimg
5122 //===----------------------------------------------------------------------===//
5123 
5124 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5125                               bool IsAtomic) {
5126   unsigned I = 1;
5127   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5128   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5129     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5130   }
5131 
5132   if (IsAtomic) {
5133     // Add src, same as dst
5134     assert(Desc.getNumDefs() == 1);
5135     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5136   }
5137 
5138   OptionalImmIndexMap OptionalIdx;
5139 
5140   for (unsigned E = Operands.size(); I != E; ++I) {
5141     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5142 
5143     // Add the register arguments
5144     if (Op.isReg()) {
5145       Op.addRegOperands(Inst, 1);
5146     } else if (Op.isImmModifier()) {
5147       OptionalIdx[Op.getImmTy()] = I;
5148     } else {
5149       llvm_unreachable("unexpected operand type");
5150     }
5151   }
5152 
5153   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5154   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5155   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5156   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5157   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5158   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5159   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5160   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5161   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5162 }
5163 
5164 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5165   cvtMIMG(Inst, Operands, true);
5166 }
5167 
5168 //===----------------------------------------------------------------------===//
5169 // smrd
5170 //===----------------------------------------------------------------------===//
5171 
5172 bool AMDGPUOperand::isSMRDOffset8() const {
5173   return isImm() && isUInt<8>(getImm());
5174 }
5175 
5176 bool AMDGPUOperand::isSMRDOffset20() const {
5177   return isImm() && isUInt<20>(getImm());
5178 }
5179 
5180 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5181   // 32-bit literals are only supported on CI and we only want to use them
5182   // when the offset is > 8-bits.
5183   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5184 }
5185 
5186 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5187   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5188 }
5189 
5190 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5191   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5192 }
5193 
5194 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5195   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5196 }
5197 
5198 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
5199   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5200 }
5201 
5202 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
5203   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5204 }
5205 
5206 //===----------------------------------------------------------------------===//
5207 // vop3
5208 //===----------------------------------------------------------------------===//
5209 
5210 static bool ConvertOmodMul(int64_t &Mul) {
5211   if (Mul != 1 && Mul != 2 && Mul != 4)
5212     return false;
5213 
5214   Mul >>= 1;
5215   return true;
5216 }
5217 
5218 static bool ConvertOmodDiv(int64_t &Div) {
5219   if (Div == 1) {
5220     Div = 0;
5221     return true;
5222   }
5223 
5224   if (Div == 2) {
5225     Div = 3;
5226     return true;
5227   }
5228 
5229   return false;
5230 }
5231 
5232 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5233   if (BoundCtrl == 0) {
5234     BoundCtrl = 1;
5235     return true;
5236   }
5237 
5238   if (BoundCtrl == -1) {
5239     BoundCtrl = 0;
5240     return true;
5241   }
5242 
5243   return false;
5244 }
5245 
5246 // Note: the order in this table matches the order of operands in AsmString.
5247 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5248   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5249   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5250   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5251   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5252   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5253   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5254   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5255   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5256   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5257   {"dfmt",    AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5258   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5259   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5260   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5261   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5262   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5263   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5264   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5265   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5266   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5267   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5268   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5269   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5270   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5271   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5272   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5273   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5274   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5275   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5276   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5277   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5278   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5279   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5280   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5281   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5282   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5283   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5284   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
5285 };
5286 
5287 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5288   unsigned size = Operands.size();
5289   assert(size > 0);
5290 
5291   OperandMatchResultTy res = parseOptionalOpr(Operands);
5292 
5293   // This is a hack to enable hardcoded mandatory operands which follow
5294   // optional operands.
5295   //
5296   // Current design assumes that all operands after the first optional operand
5297   // are also optional. However implementation of some instructions violates
5298   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5299   //
5300   // To alleviate this problem, we have to (implicitly) parse extra operands
5301   // to make sure autogenerated parser of custom operands never hit hardcoded
5302   // mandatory operands.
5303 
5304   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5305 
5306     // We have parsed the first optional operand.
5307     // Parse as many operands as necessary to skip all mandatory operands.
5308 
5309     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5310       if (res != MatchOperand_Success ||
5311           getLexer().is(AsmToken::EndOfStatement)) break;
5312       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5313       res = parseOptionalOpr(Operands);
5314     }
5315   }
5316 
5317   return res;
5318 }
5319 
5320 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
5321   OperandMatchResultTy res;
5322   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
5323     // try to parse any optional operand here
5324     if (Op.IsBit) {
5325       res = parseNamedBit(Op.Name, Operands, Op.Type);
5326     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
5327       res = parseOModOperand(Operands);
5328     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
5329                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
5330                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
5331       res = parseSDWASel(Operands, Op.Name, Op.Type);
5332     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
5333       res = parseSDWADstUnused(Operands);
5334     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
5335                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
5336                Op.Type == AMDGPUOperand::ImmTyNegLo ||
5337                Op.Type == AMDGPUOperand::ImmTyNegHi) {
5338       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
5339                                         Op.ConvertResult);
5340     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
5341       res = parseDfmtNfmt(Operands);
5342     } else {
5343       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
5344     }
5345     if (res != MatchOperand_NoMatch) {
5346       return res;
5347     }
5348   }
5349   return MatchOperand_NoMatch;
5350 }
5351 
5352 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
5353   StringRef Name = Parser.getTok().getString();
5354   if (Name == "mul") {
5355     return parseIntWithPrefix("mul", Operands,
5356                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
5357   }
5358 
5359   if (Name == "div") {
5360     return parseIntWithPrefix("div", Operands,
5361                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
5362   }
5363 
5364   return MatchOperand_NoMatch;
5365 }
5366 
5367 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
5368   cvtVOP3P(Inst, Operands);
5369 
5370   int Opc = Inst.getOpcode();
5371 
5372   int SrcNum;
5373   const int Ops[] = { AMDGPU::OpName::src0,
5374                       AMDGPU::OpName::src1,
5375                       AMDGPU::OpName::src2 };
5376   for (SrcNum = 0;
5377        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
5378        ++SrcNum);
5379   assert(SrcNum > 0);
5380 
5381   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5382   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5383 
5384   if ((OpSel & (1 << SrcNum)) != 0) {
5385     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
5386     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
5387     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
5388   }
5389 }
5390 
5391 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
5392       // 1. This operand is input modifiers
5393   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
5394       // 2. This is not last operand
5395       && Desc.NumOperands > (OpNum + 1)
5396       // 3. Next operand is register class
5397       && Desc.OpInfo[OpNum + 1].RegClass != -1
5398       // 4. Next register is not tied to any other operand
5399       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
5400 }
5401 
5402 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
5403 {
5404   OptionalImmIndexMap OptionalIdx;
5405   unsigned Opc = Inst.getOpcode();
5406 
5407   unsigned I = 1;
5408   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5409   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5410     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5411   }
5412 
5413   for (unsigned E = Operands.size(); I != E; ++I) {
5414     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5415     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5416       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5417     } else if (Op.isInterpSlot() ||
5418                Op.isInterpAttr() ||
5419                Op.isAttrChan()) {
5420       Inst.addOperand(MCOperand::createImm(Op.getImm()));
5421     } else if (Op.isImmModifier()) {
5422       OptionalIdx[Op.getImmTy()] = I;
5423     } else {
5424       llvm_unreachable("unhandled operand type");
5425     }
5426   }
5427 
5428   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
5429     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
5430   }
5431 
5432   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5433     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5434   }
5435 
5436   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5437     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5438   }
5439 }
5440 
5441 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
5442                               OptionalImmIndexMap &OptionalIdx) {
5443   unsigned Opc = Inst.getOpcode();
5444 
5445   unsigned I = 1;
5446   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5447   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5448     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5449   }
5450 
5451   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
5452     // This instruction has src modifiers
5453     for (unsigned E = Operands.size(); I != E; ++I) {
5454       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5455       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5456         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5457       } else if (Op.isImmModifier()) {
5458         OptionalIdx[Op.getImmTy()] = I;
5459       } else if (Op.isRegOrImm()) {
5460         Op.addRegOrImmOperands(Inst, 1);
5461       } else {
5462         llvm_unreachable("unhandled operand type");
5463       }
5464     }
5465   } else {
5466     // No src modifiers
5467     for (unsigned E = Operands.size(); I != E; ++I) {
5468       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5469       if (Op.isMod()) {
5470         OptionalIdx[Op.getImmTy()] = I;
5471       } else {
5472         Op.addRegOrImmOperands(Inst, 1);
5473       }
5474     }
5475   }
5476 
5477   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5478     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5479   }
5480 
5481   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5482     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5483   }
5484 
5485   // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
5486   // it has src2 register operand that is tied to dst operand
5487   // we don't allow modifiers for this operand in assembler so src2_modifiers
5488   // should be 0.
5489   if (Opc == AMDGPU::V_MAC_F32_e64_si ||
5490       Opc == AMDGPU::V_MAC_F32_e64_vi ||
5491       Opc == AMDGPU::V_MAC_F16_e64_vi ||
5492       Opc == AMDGPU::V_FMAC_F32_e64_vi) {
5493     auto it = Inst.begin();
5494     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5495     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5496     ++it;
5497     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5498   }
5499 }
5500 
5501 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5502   OptionalImmIndexMap OptionalIdx;
5503   cvtVOP3(Inst, Operands, OptionalIdx);
5504 }
5505 
5506 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5507                                const OperandVector &Operands) {
5508   OptionalImmIndexMap OptIdx;
5509   const int Opc = Inst.getOpcode();
5510   const MCInstrDesc &Desc = MII.get(Opc);
5511 
5512   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5513 
5514   cvtVOP3(Inst, Operands, OptIdx);
5515 
5516   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5517     assert(!IsPacked);
5518     Inst.addOperand(Inst.getOperand(0));
5519   }
5520 
5521   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5522   // instruction, and then figure out where to actually put the modifiers
5523 
5524   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5525 
5526   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5527   if (OpSelHiIdx != -1) {
5528     int DefaultVal = IsPacked ? -1 : 0;
5529     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5530                           DefaultVal);
5531   }
5532 
5533   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5534   if (NegLoIdx != -1) {
5535     assert(IsPacked);
5536     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5537     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5538   }
5539 
5540   const int Ops[] = { AMDGPU::OpName::src0,
5541                       AMDGPU::OpName::src1,
5542                       AMDGPU::OpName::src2 };
5543   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5544                          AMDGPU::OpName::src1_modifiers,
5545                          AMDGPU::OpName::src2_modifiers };
5546 
5547   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5548 
5549   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5550   unsigned OpSelHi = 0;
5551   unsigned NegLo = 0;
5552   unsigned NegHi = 0;
5553 
5554   if (OpSelHiIdx != -1) {
5555     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5556   }
5557 
5558   if (NegLoIdx != -1) {
5559     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5560     NegLo = Inst.getOperand(NegLoIdx).getImm();
5561     NegHi = Inst.getOperand(NegHiIdx).getImm();
5562   }
5563 
5564   for (int J = 0; J < 3; ++J) {
5565     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5566     if (OpIdx == -1)
5567       break;
5568 
5569     uint32_t ModVal = 0;
5570 
5571     if ((OpSel & (1 << J)) != 0)
5572       ModVal |= SISrcMods::OP_SEL_0;
5573 
5574     if ((OpSelHi & (1 << J)) != 0)
5575       ModVal |= SISrcMods::OP_SEL_1;
5576 
5577     if ((NegLo & (1 << J)) != 0)
5578       ModVal |= SISrcMods::NEG;
5579 
5580     if ((NegHi & (1 << J)) != 0)
5581       ModVal |= SISrcMods::NEG_HI;
5582 
5583     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5584 
5585     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5586   }
5587 }
5588 
5589 //===----------------------------------------------------------------------===//
5590 // dpp
5591 //===----------------------------------------------------------------------===//
5592 
5593 bool AMDGPUOperand::isDPPCtrl() const {
5594   using namespace AMDGPU::DPP;
5595 
5596   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5597   if (result) {
5598     int64_t Imm = getImm();
5599     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
5600            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
5601            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
5602            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
5603            (Imm == DppCtrl::WAVE_SHL1) ||
5604            (Imm == DppCtrl::WAVE_ROL1) ||
5605            (Imm == DppCtrl::WAVE_SHR1) ||
5606            (Imm == DppCtrl::WAVE_ROR1) ||
5607            (Imm == DppCtrl::ROW_MIRROR) ||
5608            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
5609            (Imm == DppCtrl::BCAST15) ||
5610            (Imm == DppCtrl::BCAST31);
5611   }
5612   return false;
5613 }
5614 
5615 bool AMDGPUOperand::isS16Imm() const {
5616   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
5617 }
5618 
5619 bool AMDGPUOperand::isU16Imm() const {
5620   return isImm() && isUInt<16>(getImm());
5621 }
5622 
5623 OperandMatchResultTy
5624 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
5625   using namespace AMDGPU::DPP;
5626 
5627   SMLoc S = Parser.getTok().getLoc();
5628   StringRef Prefix;
5629   int64_t Int;
5630 
5631   if (getLexer().getKind() == AsmToken::Identifier) {
5632     Prefix = Parser.getTok().getString();
5633   } else {
5634     return MatchOperand_NoMatch;
5635   }
5636 
5637   if (Prefix == "row_mirror") {
5638     Int = DppCtrl::ROW_MIRROR;
5639     Parser.Lex();
5640   } else if (Prefix == "row_half_mirror") {
5641     Int = DppCtrl::ROW_HALF_MIRROR;
5642     Parser.Lex();
5643   } else {
5644     // Check to prevent parseDPPCtrlOps from eating invalid tokens
5645     if (Prefix != "quad_perm"
5646         && Prefix != "row_shl"
5647         && Prefix != "row_shr"
5648         && Prefix != "row_ror"
5649         && Prefix != "wave_shl"
5650         && Prefix != "wave_rol"
5651         && Prefix != "wave_shr"
5652         && Prefix != "wave_ror"
5653         && Prefix != "row_bcast") {
5654       return MatchOperand_NoMatch;
5655     }
5656 
5657     Parser.Lex();
5658     if (getLexer().isNot(AsmToken::Colon))
5659       return MatchOperand_ParseFail;
5660 
5661     if (Prefix == "quad_perm") {
5662       // quad_perm:[%d,%d,%d,%d]
5663       Parser.Lex();
5664       if (getLexer().isNot(AsmToken::LBrac))
5665         return MatchOperand_ParseFail;
5666       Parser.Lex();
5667 
5668       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
5669         return MatchOperand_ParseFail;
5670 
5671       for (int i = 0; i < 3; ++i) {
5672         if (getLexer().isNot(AsmToken::Comma))
5673           return MatchOperand_ParseFail;
5674         Parser.Lex();
5675 
5676         int64_t Temp;
5677         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
5678           return MatchOperand_ParseFail;
5679         const int shift = i*2 + 2;
5680         Int += (Temp << shift);
5681       }
5682 
5683       if (getLexer().isNot(AsmToken::RBrac))
5684         return MatchOperand_ParseFail;
5685       Parser.Lex();
5686     } else {
5687       // sel:%d
5688       Parser.Lex();
5689       if (getParser().parseAbsoluteExpression(Int))
5690         return MatchOperand_ParseFail;
5691 
5692       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
5693         Int |= DppCtrl::ROW_SHL0;
5694       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
5695         Int |= DppCtrl::ROW_SHR0;
5696       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
5697         Int |= DppCtrl::ROW_ROR0;
5698       } else if (Prefix == "wave_shl" && 1 == Int) {
5699         Int = DppCtrl::WAVE_SHL1;
5700       } else if (Prefix == "wave_rol" && 1 == Int) {
5701         Int = DppCtrl::WAVE_ROL1;
5702       } else if (Prefix == "wave_shr" && 1 == Int) {
5703         Int = DppCtrl::WAVE_SHR1;
5704       } else if (Prefix == "wave_ror" && 1 == Int) {
5705         Int = DppCtrl::WAVE_ROR1;
5706       } else if (Prefix == "row_bcast") {
5707         if (Int == 15) {
5708           Int = DppCtrl::BCAST15;
5709         } else if (Int == 31) {
5710           Int = DppCtrl::BCAST31;
5711         } else {
5712           return MatchOperand_ParseFail;
5713         }
5714       } else {
5715         return MatchOperand_ParseFail;
5716       }
5717     }
5718   }
5719 
5720   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
5721   return MatchOperand_Success;
5722 }
5723 
5724 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
5725   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
5726 }
5727 
5728 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
5729   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
5730 }
5731 
5732 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
5733   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
5734 }
5735 
5736 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
5737   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
5738 }
5739 
5740 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
5741   OptionalImmIndexMap OptionalIdx;
5742 
5743   unsigned I = 1;
5744   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5745   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5746     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5747   }
5748 
5749   for (unsigned E = Operands.size(); I != E; ++I) {
5750     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
5751                                             MCOI::TIED_TO);
5752     if (TiedTo != -1) {
5753       assert((unsigned)TiedTo < Inst.getNumOperands());
5754       // handle tied old or src2 for MAC instructions
5755       Inst.addOperand(Inst.getOperand(TiedTo));
5756     }
5757     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5758     // Add the register arguments
5759     if (Op.isReg() && Op.getReg() == AMDGPU::VCC) {
5760       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
5761       // Skip it.
5762       continue;
5763     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5764       Op.addRegWithFPInputModsOperands(Inst, 2);
5765     } else if (Op.isDPPCtrl()) {
5766       Op.addImmOperands(Inst, 1);
5767     } else if (Op.isImm()) {
5768       // Handle optional arguments
5769       OptionalIdx[Op.getImmTy()] = I;
5770     } else {
5771       llvm_unreachable("Invalid operand type");
5772     }
5773   }
5774 
5775   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
5776   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
5777   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
5778 }
5779 
5780 //===----------------------------------------------------------------------===//
5781 // sdwa
5782 //===----------------------------------------------------------------------===//
5783 
5784 OperandMatchResultTy
5785 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
5786                               AMDGPUOperand::ImmTy Type) {
5787   using namespace llvm::AMDGPU::SDWA;
5788 
5789   SMLoc S = Parser.getTok().getLoc();
5790   StringRef Value;
5791   OperandMatchResultTy res;
5792 
5793   res = parseStringWithPrefix(Prefix, Value);
5794   if (res != MatchOperand_Success) {
5795     return res;
5796   }
5797 
5798   int64_t Int;
5799   Int = StringSwitch<int64_t>(Value)
5800         .Case("BYTE_0", SdwaSel::BYTE_0)
5801         .Case("BYTE_1", SdwaSel::BYTE_1)
5802         .Case("BYTE_2", SdwaSel::BYTE_2)
5803         .Case("BYTE_3", SdwaSel::BYTE_3)
5804         .Case("WORD_0", SdwaSel::WORD_0)
5805         .Case("WORD_1", SdwaSel::WORD_1)
5806         .Case("DWORD", SdwaSel::DWORD)
5807         .Default(0xffffffff);
5808   Parser.Lex(); // eat last token
5809 
5810   if (Int == 0xffffffff) {
5811     return MatchOperand_ParseFail;
5812   }
5813 
5814   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
5815   return MatchOperand_Success;
5816 }
5817 
5818 OperandMatchResultTy
5819 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
5820   using namespace llvm::AMDGPU::SDWA;
5821 
5822   SMLoc S = Parser.getTok().getLoc();
5823   StringRef Value;
5824   OperandMatchResultTy res;
5825 
5826   res = parseStringWithPrefix("dst_unused", Value);
5827   if (res != MatchOperand_Success) {
5828     return res;
5829   }
5830 
5831   int64_t Int;
5832   Int = StringSwitch<int64_t>(Value)
5833         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
5834         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
5835         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
5836         .Default(0xffffffff);
5837   Parser.Lex(); // eat last token
5838 
5839   if (Int == 0xffffffff) {
5840     return MatchOperand_ParseFail;
5841   }
5842 
5843   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
5844   return MatchOperand_Success;
5845 }
5846 
5847 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
5848   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
5849 }
5850 
5851 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
5852   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
5853 }
5854 
5855 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
5856   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
5857 }
5858 
5859 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
5860   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
5861 }
5862 
5863 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
5864                               uint64_t BasicInstType, bool skipVcc) {
5865   using namespace llvm::AMDGPU::SDWA;
5866 
5867   OptionalImmIndexMap OptionalIdx;
5868   bool skippedVcc = false;
5869 
5870   unsigned I = 1;
5871   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5872   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5873     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5874   }
5875 
5876   for (unsigned E = Operands.size(); I != E; ++I) {
5877     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5878     if (skipVcc && !skippedVcc && Op.isReg() && Op.getReg() == AMDGPU::VCC) {
5879       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
5880       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
5881       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
5882       // Skip VCC only if we didn't skip it on previous iteration.
5883       if (BasicInstType == SIInstrFlags::VOP2 &&
5884           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
5885         skippedVcc = true;
5886         continue;
5887       } else if (BasicInstType == SIInstrFlags::VOPC &&
5888                  Inst.getNumOperands() == 0) {
5889         skippedVcc = true;
5890         continue;
5891       }
5892     }
5893     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5894       Op.addRegOrImmWithInputModsOperands(Inst, 2);
5895     } else if (Op.isImm()) {
5896       // Handle optional arguments
5897       OptionalIdx[Op.getImmTy()] = I;
5898     } else {
5899       llvm_unreachable("Invalid operand type");
5900     }
5901     skippedVcc = false;
5902   }
5903 
5904   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
5905       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
5906     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
5907     switch (BasicInstType) {
5908     case SIInstrFlags::VOP1:
5909       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5910       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5911         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5912       }
5913       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5914       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5915       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5916       break;
5917 
5918     case SIInstrFlags::VOP2:
5919       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5920       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5921         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5922       }
5923       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5924       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5925       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5926       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5927       break;
5928 
5929     case SIInstrFlags::VOPC:
5930       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5931       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5932       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5933       break;
5934 
5935     default:
5936       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
5937     }
5938   }
5939 
5940   // special case v_mac_{f16, f32}:
5941   // it has src2 register operand that is tied to dst operand
5942   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
5943       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
5944     auto it = Inst.begin();
5945     std::advance(
5946       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
5947     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5948   }
5949 }
5950 
5951 /// Force static initialization.
5952 extern "C" void LLVMInitializeAMDGPUAsmParser() {
5953   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
5954   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
5955 }
5956 
5957 #define GET_REGISTER_MATCHER
5958 #define GET_MATCHER_IMPLEMENTATION
5959 #define GET_MNEMONIC_SPELL_CHECKER
5960 #include "AMDGPUGenAsmMatcher.inc"
5961 
5962 // This fuction should be defined after auto-generated include so that we have
5963 // MatchClassKind enum defined
5964 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
5965                                                      unsigned Kind) {
5966   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
5967   // But MatchInstructionImpl() expects to meet token and fails to validate
5968   // operand. This method checks if we are given immediate operand but expect to
5969   // get corresponding token.
5970   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
5971   switch (Kind) {
5972   case MCK_addr64:
5973     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
5974   case MCK_gds:
5975     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
5976   case MCK_lds:
5977     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
5978   case MCK_glc:
5979     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
5980   case MCK_idxen:
5981     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
5982   case MCK_offen:
5983     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
5984   case MCK_SSrcB32:
5985     // When operands have expression values, they will return true for isToken,
5986     // because it is not possible to distinguish between a token and an
5987     // expression at parse time. MatchInstructionImpl() will always try to
5988     // match an operand as a token, when isToken returns true, and when the
5989     // name of the expression is not a valid token, the match will fail,
5990     // so we need to handle it here.
5991     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
5992   case MCK_SSrcF32:
5993     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
5994   case MCK_SoppBrTarget:
5995     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
5996   case MCK_VReg32OrOff:
5997     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
5998   case MCK_InterpSlot:
5999     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6000   case MCK_Attr:
6001     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6002   case MCK_AttrChan:
6003     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6004   default:
6005     return Match_InvalidOperand;
6006   }
6007 }
6008 
6009 //===----------------------------------------------------------------------===//
6010 // endpgm
6011 //===----------------------------------------------------------------------===//
6012 
6013 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6014   SMLoc S = Parser.getTok().getLoc();
6015   int64_t Imm = 0;
6016 
6017   if (!parseExpr(Imm)) {
6018     // The operand is optional, if not present default to 0
6019     Imm = 0;
6020   }
6021 
6022   if (!isUInt<16>(Imm)) {
6023     Error(S, "expected a 16-bit value");
6024     return MatchOperand_ParseFail;
6025   }
6026 
6027   Operands.push_back(
6028       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6029   return MatchOperand_Success;
6030 }
6031 
6032 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
6033